{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.33110641277242986, "eval_steps": 500, "global_step": 12000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0, "eval_runtime": 27.428, "eval_samples_per_second": 1.167, "eval_steps_per_second": 0.146, "step": 0 }, { "epoch": 2.7592201064369157e-05, "grad_norm": 3.6326730251312256, "learning_rate": 0.001, "loss": 0.4698, "step": 1 }, { "epoch": 5.5184402128738314e-05, "grad_norm": 0.062148336321115494, "learning_rate": 0.001, "loss": 0.4517, "step": 2 }, { "epoch": 8.277660319310746e-05, "grad_norm": 0.0332246758043766, "learning_rate": 0.001, "loss": 0.4677, "step": 3 }, { "epoch": 0.00011036880425747663, "grad_norm": 0.10165657848119736, "learning_rate": 0.001, "loss": 0.4603, "step": 4 }, { "epoch": 0.00013796100532184578, "grad_norm": 0.04767799377441406, "learning_rate": 0.001, "loss": 0.4623, "step": 5 }, { "epoch": 0.00016555320638621493, "grad_norm": 0.023448018357157707, "learning_rate": 0.001, "loss": 0.4107, "step": 6 }, { "epoch": 0.0001931454074505841, "grad_norm": 0.013601069338619709, "learning_rate": 0.001, "loss": 0.4374, "step": 7 }, { "epoch": 0.00022073760851495325, "grad_norm": 0.009370611980557442, "learning_rate": 0.001, "loss": 0.403, "step": 8 }, { "epoch": 0.0002483298095793224, "grad_norm": 0.008329728618264198, "learning_rate": 0.001, "loss": 0.4436, "step": 9 }, { "epoch": 0.00027592201064369155, "grad_norm": 0.004966976586729288, "learning_rate": 0.001, "loss": 0.3936, "step": 10 }, { "epoch": 0.0003035142117080607, "grad_norm": 0.004952501505613327, "learning_rate": 0.001, "loss": 0.4144, "step": 11 }, { "epoch": 0.00033110641277242985, "grad_norm": 0.008474660106003284, "learning_rate": 0.001, "loss": 0.4487, "step": 12 }, { "epoch": 0.000358698613836799, "grad_norm": 0.004617814905941486, "learning_rate": 0.001, "loss": 0.3752, "step": 13 }, { "epoch": 0.0003862908149011682, "grad_norm": 0.0045386566780507565, "learning_rate": 0.001, "loss": 0.4456, "step": 14 }, { "epoch": 0.00041388301596553736, "grad_norm": 0.0031611656304448843, "learning_rate": 0.001, "loss": 0.3454, "step": 15 }, { "epoch": 0.0004414752170299065, "grad_norm": 0.0026714960113167763, "learning_rate": 0.001, "loss": 0.4236, "step": 16 }, { "epoch": 0.00046906741809427566, "grad_norm": 0.0028185201808810234, "learning_rate": 0.001, "loss": 0.4043, "step": 17 }, { "epoch": 0.0004966596191586448, "grad_norm": 0.002944110194221139, "learning_rate": 0.001, "loss": 0.4142, "step": 18 }, { "epoch": 0.000524251820223014, "grad_norm": 0.004909531679004431, "learning_rate": 0.001, "loss": 0.4356, "step": 19 }, { "epoch": 0.0005518440212873831, "grad_norm": 0.0034190011210739613, "learning_rate": 0.001, "loss": 0.4146, "step": 20 }, { "epoch": 0.0005794362223517523, "grad_norm": 0.003598715178668499, "learning_rate": 0.001, "loss": 0.4102, "step": 21 }, { "epoch": 0.0006070284234161214, "grad_norm": 0.004229442682117224, "learning_rate": 0.001, "loss": 0.461, "step": 22 }, { "epoch": 0.0006346206244804906, "grad_norm": 0.0024963070172816515, "learning_rate": 0.001, "loss": 0.3674, "step": 23 }, { "epoch": 0.0006622128255448597, "grad_norm": 0.002500817645341158, "learning_rate": 0.001, "loss": 0.4194, "step": 24 }, { "epoch": 0.0006898050266092289, "grad_norm": 0.002520238049328327, "learning_rate": 0.001, "loss": 0.4037, "step": 25 }, { "epoch": 0.000717397227673598, "grad_norm": 0.002308758907020092, "learning_rate": 0.001, "loss": 0.3992, "step": 26 }, { "epoch": 0.0007449894287379673, "grad_norm": 0.0020286778453737497, "learning_rate": 0.001, "loss": 0.3859, "step": 27 }, { "epoch": 0.0007725816298023364, "grad_norm": 0.0013328269124031067, "learning_rate": 0.001, "loss": 0.4132, "step": 28 }, { "epoch": 0.0008001738308667056, "grad_norm": 0.003233084687963128, "learning_rate": 0.001, "loss": 0.4118, "step": 29 }, { "epoch": 0.0008277660319310747, "grad_norm": 0.0017853466561064124, "learning_rate": 0.001, "loss": 0.4184, "step": 30 }, { "epoch": 0.0008553582329954439, "grad_norm": 0.0021520040463656187, "learning_rate": 0.001, "loss": 0.4215, "step": 31 }, { "epoch": 0.000882950434059813, "grad_norm": 0.002464903285726905, "learning_rate": 0.001, "loss": 0.4404, "step": 32 }, { "epoch": 0.0009105426351241822, "grad_norm": 0.0017728271195665002, "learning_rate": 0.001, "loss": 0.406, "step": 33 }, { "epoch": 0.0009381348361885513, "grad_norm": 0.001393413869664073, "learning_rate": 0.001, "loss": 0.3873, "step": 34 }, { "epoch": 0.0009657270372529205, "grad_norm": 0.001682270085439086, "learning_rate": 0.001, "loss": 0.4231, "step": 35 }, { "epoch": 0.0009933192383172896, "grad_norm": 0.0018528420478105545, "learning_rate": 0.001, "loss": 0.3882, "step": 36 }, { "epoch": 0.0010209114393816589, "grad_norm": 0.0014731371775269508, "learning_rate": 0.001, "loss": 0.4001, "step": 37 }, { "epoch": 0.001048503640446028, "grad_norm": 0.0012595922453328967, "learning_rate": 0.001, "loss": 0.4372, "step": 38 }, { "epoch": 0.0010760958415103972, "grad_norm": 0.0011791549623012543, "learning_rate": 0.001, "loss": 0.4218, "step": 39 }, { "epoch": 0.0011036880425747662, "grad_norm": 0.001127090072259307, "learning_rate": 0.001, "loss": 0.3939, "step": 40 }, { "epoch": 0.0011312802436391355, "grad_norm": 0.0015457386616617441, "learning_rate": 0.001, "loss": 0.4273, "step": 41 }, { "epoch": 0.0011588724447035045, "grad_norm": 0.0013682112330570817, "learning_rate": 0.001, "loss": 0.4156, "step": 42 }, { "epoch": 0.0011864646457678738, "grad_norm": 0.0012363678542897105, "learning_rate": 0.001, "loss": 0.3948, "step": 43 }, { "epoch": 0.0012140568468322428, "grad_norm": 0.0017921420512720942, "learning_rate": 0.001, "loss": 0.4117, "step": 44 }, { "epoch": 0.001241649047896612, "grad_norm": 0.001728672650642693, "learning_rate": 0.001, "loss": 0.3857, "step": 45 }, { "epoch": 0.0012692412489609811, "grad_norm": 0.0038352590054273605, "learning_rate": 0.001, "loss": 0.4116, "step": 46 }, { "epoch": 0.0012968334500253504, "grad_norm": 0.001102472422644496, "learning_rate": 0.001, "loss": 0.4265, "step": 47 }, { "epoch": 0.0013244256510897194, "grad_norm": 0.0060236481949687, "learning_rate": 0.001, "loss": 0.4381, "step": 48 }, { "epoch": 0.0013520178521540887, "grad_norm": 0.0013644751161336899, "learning_rate": 0.001, "loss": 0.4319, "step": 49 }, { "epoch": 0.0013796100532184577, "grad_norm": 0.0019051303388550878, "learning_rate": 0.001, "loss": 0.3992, "step": 50 }, { "epoch": 0.001407202254282827, "grad_norm": 0.0021409685723483562, "learning_rate": 0.001, "loss": 0.3841, "step": 51 }, { "epoch": 0.001434794455347196, "grad_norm": 0.0015203810762614012, "learning_rate": 0.001, "loss": 0.3805, "step": 52 }, { "epoch": 0.0014623866564115653, "grad_norm": 0.0018806306179612875, "learning_rate": 0.001, "loss": 0.4277, "step": 53 }, { "epoch": 0.0014899788574759345, "grad_norm": 0.0024247588589787483, "learning_rate": 0.001, "loss": 0.4347, "step": 54 }, { "epoch": 0.0015175710585403036, "grad_norm": 0.000940295634791255, "learning_rate": 0.001, "loss": 0.3891, "step": 55 }, { "epoch": 0.0015451632596046728, "grad_norm": 0.0012640036875382066, "learning_rate": 0.001, "loss": 0.4115, "step": 56 }, { "epoch": 0.0015727554606690419, "grad_norm": 0.00419093482196331, "learning_rate": 0.001, "loss": 0.3778, "step": 57 }, { "epoch": 0.0016003476617334111, "grad_norm": 0.002213024301454425, "learning_rate": 0.001, "loss": 0.4179, "step": 58 }, { "epoch": 0.0016279398627977802, "grad_norm": 0.004215582739561796, "learning_rate": 0.001, "loss": 0.4603, "step": 59 }, { "epoch": 0.0016555320638621494, "grad_norm": 0.0016123323002830148, "learning_rate": 0.001, "loss": 0.4395, "step": 60 }, { "epoch": 0.0016831242649265185, "grad_norm": 0.0014739107573404908, "learning_rate": 0.001, "loss": 0.3739, "step": 61 }, { "epoch": 0.0017107164659908877, "grad_norm": 0.0014588399790227413, "learning_rate": 0.001, "loss": 0.4476, "step": 62 }, { "epoch": 0.0017383086670552568, "grad_norm": 0.0037299874238669872, "learning_rate": 0.001, "loss": 0.4109, "step": 63 }, { "epoch": 0.001765900868119626, "grad_norm": 0.0027305828407406807, "learning_rate": 0.001, "loss": 0.3968, "step": 64 }, { "epoch": 0.001793493069183995, "grad_norm": 0.0027416169177740812, "learning_rate": 0.001, "loss": 0.4318, "step": 65 }, { "epoch": 0.0018210852702483643, "grad_norm": 0.0017876614583656192, "learning_rate": 0.001, "loss": 0.3905, "step": 66 }, { "epoch": 0.0018486774713127334, "grad_norm": 0.0019625681452453136, "learning_rate": 0.001, "loss": 0.443, "step": 67 }, { "epoch": 0.0018762696723771026, "grad_norm": 0.0032296774443238974, "learning_rate": 0.001, "loss": 0.4341, "step": 68 }, { "epoch": 0.0019038618734414717, "grad_norm": 0.0022506825625896454, "learning_rate": 0.001, "loss": 0.3885, "step": 69 }, { "epoch": 0.001931454074505841, "grad_norm": 0.002406027168035507, "learning_rate": 0.001, "loss": 0.4453, "step": 70 }, { "epoch": 0.00195904627557021, "grad_norm": 0.0014062536647543311, "learning_rate": 0.001, "loss": 0.3747, "step": 71 }, { "epoch": 0.0019866384766345792, "grad_norm": 0.0020411391742527485, "learning_rate": 0.001, "loss": 0.3968, "step": 72 }, { "epoch": 0.0020142306776989483, "grad_norm": 0.0023991591297090054, "learning_rate": 0.001, "loss": 0.4039, "step": 73 }, { "epoch": 0.0020418228787633178, "grad_norm": 0.002641907660290599, "learning_rate": 0.001, "loss": 0.4308, "step": 74 }, { "epoch": 0.002069415079827687, "grad_norm": 0.001838643685914576, "learning_rate": 0.001, "loss": 0.384, "step": 75 }, { "epoch": 0.002097007280892056, "grad_norm": 0.0014234330737963319, "learning_rate": 0.001, "loss": 0.3965, "step": 76 }, { "epoch": 0.002124599481956425, "grad_norm": 0.0020837297197431326, "learning_rate": 0.001, "loss": 0.4013, "step": 77 }, { "epoch": 0.0021521916830207944, "grad_norm": 0.002444372745230794, "learning_rate": 0.001, "loss": 0.4, "step": 78 }, { "epoch": 0.0021797838840851634, "grad_norm": 0.001574559137225151, "learning_rate": 0.001, "loss": 0.447, "step": 79 }, { "epoch": 0.0022073760851495324, "grad_norm": 0.002465339843183756, "learning_rate": 0.001, "loss": 0.3916, "step": 80 }, { "epoch": 0.0022349682862139015, "grad_norm": 0.0014318680623546243, "learning_rate": 0.001, "loss": 0.3763, "step": 81 }, { "epoch": 0.002262560487278271, "grad_norm": 0.0018497896380722523, "learning_rate": 0.001, "loss": 0.3837, "step": 82 }, { "epoch": 0.00229015268834264, "grad_norm": 0.0023651847150176764, "learning_rate": 0.001, "loss": 0.4341, "step": 83 }, { "epoch": 0.002317744889407009, "grad_norm": 0.0019378148717805743, "learning_rate": 0.001, "loss": 0.408, "step": 84 }, { "epoch": 0.002345337090471378, "grad_norm": 0.0015967771178111434, "learning_rate": 0.001, "loss": 0.3456, "step": 85 }, { "epoch": 0.0023729292915357476, "grad_norm": 0.0013114806497469544, "learning_rate": 0.001, "loss": 0.3987, "step": 86 }, { "epoch": 0.0024005214926001166, "grad_norm": 0.0018527540378272533, "learning_rate": 0.001, "loss": 0.4442, "step": 87 }, { "epoch": 0.0024281136936644856, "grad_norm": 0.0018121429020538926, "learning_rate": 0.001, "loss": 0.3871, "step": 88 }, { "epoch": 0.0024557058947288547, "grad_norm": 0.0017777991015464067, "learning_rate": 0.001, "loss": 0.4088, "step": 89 }, { "epoch": 0.002483298095793224, "grad_norm": 0.0014042413095012307, "learning_rate": 0.001, "loss": 0.3839, "step": 90 }, { "epoch": 0.002510890296857593, "grad_norm": 0.0023180118296295404, "learning_rate": 0.001, "loss": 0.3691, "step": 91 }, { "epoch": 0.0025384824979219622, "grad_norm": 0.0018260233337059617, "learning_rate": 0.001, "loss": 0.3866, "step": 92 }, { "epoch": 0.0025660746989863317, "grad_norm": 0.0020317393355071545, "learning_rate": 0.001, "loss": 0.43, "step": 93 }, { "epoch": 0.0025936669000507008, "grad_norm": 0.0014620552537962794, "learning_rate": 0.001, "loss": 0.3873, "step": 94 }, { "epoch": 0.00262125910111507, "grad_norm": 0.0030886537861078978, "learning_rate": 0.001, "loss": 0.4325, "step": 95 }, { "epoch": 0.002648851302179439, "grad_norm": 0.001787678455002606, "learning_rate": 0.001, "loss": 0.4261, "step": 96 }, { "epoch": 0.0026764435032438083, "grad_norm": 0.0020450972951948643, "learning_rate": 0.001, "loss": 0.4242, "step": 97 }, { "epoch": 0.0027040357043081773, "grad_norm": 0.0015356248477473855, "learning_rate": 0.001, "loss": 0.4263, "step": 98 }, { "epoch": 0.0027316279053725464, "grad_norm": 0.0019487686222419143, "learning_rate": 0.001, "loss": 0.4089, "step": 99 }, { "epoch": 0.0027592201064369154, "grad_norm": 0.0016876134322956204, "learning_rate": 0.001, "loss": 0.4376, "step": 100 }, { "epoch": 0.002786812307501285, "grad_norm": 0.0015564693603664637, "learning_rate": 0.001, "loss": 0.4016, "step": 101 }, { "epoch": 0.002814404508565654, "grad_norm": 0.0011805463582277298, "learning_rate": 0.001, "loss": 0.4055, "step": 102 }, { "epoch": 0.002841996709630023, "grad_norm": 0.0027237439062446356, "learning_rate": 0.001, "loss": 0.4386, "step": 103 }, { "epoch": 0.002869588910694392, "grad_norm": 0.001259063370525837, "learning_rate": 0.001, "loss": 0.3692, "step": 104 }, { "epoch": 0.0028971811117587615, "grad_norm": 0.0012861357536166906, "learning_rate": 0.001, "loss": 0.437, "step": 105 }, { "epoch": 0.0029247733128231305, "grad_norm": 0.0017483624396845698, "learning_rate": 0.001, "loss": 0.3922, "step": 106 }, { "epoch": 0.0029523655138874996, "grad_norm": 0.0022901813499629498, "learning_rate": 0.001, "loss": 0.4296, "step": 107 }, { "epoch": 0.002979957714951869, "grad_norm": 0.00259222649037838, "learning_rate": 0.001, "loss": 0.4107, "step": 108 }, { "epoch": 0.003007549916016238, "grad_norm": 0.0057897912338376045, "learning_rate": 0.001, "loss": 0.375, "step": 109 }, { "epoch": 0.003035142117080607, "grad_norm": 0.002251250436529517, "learning_rate": 0.001, "loss": 0.3959, "step": 110 }, { "epoch": 0.003062734318144976, "grad_norm": 0.0033183628693223, "learning_rate": 0.001, "loss": 0.4267, "step": 111 }, { "epoch": 0.0030903265192093457, "grad_norm": 0.001699024927802384, "learning_rate": 0.001, "loss": 0.4068, "step": 112 }, { "epoch": 0.0031179187202737147, "grad_norm": 0.002592903096228838, "learning_rate": 0.001, "loss": 0.3814, "step": 113 }, { "epoch": 0.0031455109213380837, "grad_norm": 0.001526323496364057, "learning_rate": 0.001, "loss": 0.4176, "step": 114 }, { "epoch": 0.003173103122402453, "grad_norm": 0.0022348894271999598, "learning_rate": 0.001, "loss": 0.4369, "step": 115 }, { "epoch": 0.0032006953234668223, "grad_norm": 0.0024093682877719402, "learning_rate": 0.001, "loss": 0.3827, "step": 116 }, { "epoch": 0.0032282875245311913, "grad_norm": 0.0054725524969398975, "learning_rate": 0.001, "loss": 0.4098, "step": 117 }, { "epoch": 0.0032558797255955603, "grad_norm": 0.0026599527336657047, "learning_rate": 0.001, "loss": 0.3803, "step": 118 }, { "epoch": 0.0032834719266599294, "grad_norm": 0.002410522662103176, "learning_rate": 0.001, "loss": 0.3951, "step": 119 }, { "epoch": 0.003311064127724299, "grad_norm": 0.0026565720327198505, "learning_rate": 0.001, "loss": 0.4274, "step": 120 }, { "epoch": 0.003338656328788668, "grad_norm": 0.0017594440141692758, "learning_rate": 0.001, "loss": 0.3748, "step": 121 }, { "epoch": 0.003366248529853037, "grad_norm": 0.001644355827011168, "learning_rate": 0.001, "loss": 0.4052, "step": 122 }, { "epoch": 0.003393840730917406, "grad_norm": 0.001211437862366438, "learning_rate": 0.001, "loss": 0.3873, "step": 123 }, { "epoch": 0.0034214329319817755, "grad_norm": 0.0012707557762041688, "learning_rate": 0.001, "loss": 0.4175, "step": 124 }, { "epoch": 0.0034490251330461445, "grad_norm": 0.00231559993699193, "learning_rate": 0.001, "loss": 0.3718, "step": 125 }, { "epoch": 0.0034766173341105135, "grad_norm": 0.003967110998928547, "learning_rate": 0.001, "loss": 0.4437, "step": 126 }, { "epoch": 0.003504209535174883, "grad_norm": 0.0014507032465189695, "learning_rate": 0.001, "loss": 0.4008, "step": 127 }, { "epoch": 0.003531801736239252, "grad_norm": 0.0023966797161847353, "learning_rate": 0.001, "loss": 0.3724, "step": 128 }, { "epoch": 0.003559393937303621, "grad_norm": 0.0019388011423870921, "learning_rate": 0.001, "loss": 0.4083, "step": 129 }, { "epoch": 0.00358698613836799, "grad_norm": 0.0012276334455236793, "learning_rate": 0.001, "loss": 0.4624, "step": 130 }, { "epoch": 0.0036145783394323596, "grad_norm": 0.0016817412106320262, "learning_rate": 0.001, "loss": 0.3641, "step": 131 }, { "epoch": 0.0036421705404967287, "grad_norm": 0.0014624105533584952, "learning_rate": 0.001, "loss": 0.415, "step": 132 }, { "epoch": 0.0036697627415610977, "grad_norm": 0.0023928394075483084, "learning_rate": 0.001, "loss": 0.4156, "step": 133 }, { "epoch": 0.0036973549426254667, "grad_norm": 0.00194596650544554, "learning_rate": 0.001, "loss": 0.4234, "step": 134 }, { "epoch": 0.0037249471436898362, "grad_norm": 0.0017044798005372286, "learning_rate": 0.001, "loss": 0.3717, "step": 135 }, { "epoch": 0.0037525393447542053, "grad_norm": 0.0017636306583881378, "learning_rate": 0.001, "loss": 0.4256, "step": 136 }, { "epoch": 0.0037801315458185743, "grad_norm": 0.0042044175788760185, "learning_rate": 0.001, "loss": 0.3936, "step": 137 }, { "epoch": 0.0038077237468829433, "grad_norm": 0.0023780064657330513, "learning_rate": 0.001, "loss": 0.4101, "step": 138 }, { "epoch": 0.003835315947947313, "grad_norm": 0.0018606351222842932, "learning_rate": 0.001, "loss": 0.3951, "step": 139 }, { "epoch": 0.003862908149011682, "grad_norm": 0.0020658932626247406, "learning_rate": 0.001, "loss": 0.4095, "step": 140 }, { "epoch": 0.003890500350076051, "grad_norm": 0.0021609310060739517, "learning_rate": 0.001, "loss": 0.3906, "step": 141 }, { "epoch": 0.00391809255114042, "grad_norm": 0.0029214313253760338, "learning_rate": 0.001, "loss": 0.4448, "step": 142 }, { "epoch": 0.003945684752204789, "grad_norm": 0.0013527723494917154, "learning_rate": 0.001, "loss": 0.4184, "step": 143 }, { "epoch": 0.0039732769532691585, "grad_norm": 0.0014684811467304826, "learning_rate": 0.001, "loss": 0.4359, "step": 144 }, { "epoch": 0.0040008691543335275, "grad_norm": 0.002133656293153763, "learning_rate": 0.001, "loss": 0.3947, "step": 145 }, { "epoch": 0.0040284613553978965, "grad_norm": 0.0017957837553694844, "learning_rate": 0.001, "loss": 0.4116, "step": 146 }, { "epoch": 0.004056053556462266, "grad_norm": 0.001586731756106019, "learning_rate": 0.001, "loss": 0.3759, "step": 147 }, { "epoch": 0.0040836457575266355, "grad_norm": 0.0015585459768772125, "learning_rate": 0.001, "loss": 0.4004, "step": 148 }, { "epoch": 0.0041112379585910045, "grad_norm": 0.001772695453837514, "learning_rate": 0.001, "loss": 0.4009, "step": 149 }, { "epoch": 0.004138830159655374, "grad_norm": 0.0032015417236834764, "learning_rate": 0.001, "loss": 0.4229, "step": 150 }, { "epoch": 0.004166422360719743, "grad_norm": 0.0014684676425531507, "learning_rate": 0.001, "loss": 0.3902, "step": 151 }, { "epoch": 0.004194014561784112, "grad_norm": 0.0014085081638768315, "learning_rate": 0.001, "loss": 0.4123, "step": 152 }, { "epoch": 0.004221606762848481, "grad_norm": 0.00355419609695673, "learning_rate": 0.001, "loss": 0.3749, "step": 153 }, { "epoch": 0.00424919896391285, "grad_norm": 0.0021562143228948116, "learning_rate": 0.001, "loss": 0.4111, "step": 154 }, { "epoch": 0.004276791164977219, "grad_norm": 0.001616101711988449, "learning_rate": 0.001, "loss": 0.4119, "step": 155 }, { "epoch": 0.004304383366041589, "grad_norm": 0.0020637568086385727, "learning_rate": 0.001, "loss": 0.4025, "step": 156 }, { "epoch": 0.004331975567105958, "grad_norm": 0.0013927265536040068, "learning_rate": 0.001, "loss": 0.3819, "step": 157 }, { "epoch": 0.004359567768170327, "grad_norm": 0.001993082230910659, "learning_rate": 0.001, "loss": 0.4093, "step": 158 }, { "epoch": 0.004387159969234696, "grad_norm": 0.001766142901033163, "learning_rate": 0.001, "loss": 0.4097, "step": 159 }, { "epoch": 0.004414752170299065, "grad_norm": 0.0017266202485188842, "learning_rate": 0.001, "loss": 0.381, "step": 160 }, { "epoch": 0.004442344371363434, "grad_norm": 0.0017482074908912182, "learning_rate": 0.001, "loss": 0.4035, "step": 161 }, { "epoch": 0.004469936572427803, "grad_norm": 0.004118494223803282, "learning_rate": 0.001, "loss": 0.4169, "step": 162 }, { "epoch": 0.004497528773492173, "grad_norm": 0.00243256869725883, "learning_rate": 0.001, "loss": 0.3896, "step": 163 }, { "epoch": 0.004525120974556542, "grad_norm": 0.001603225595317781, "learning_rate": 0.001, "loss": 0.409, "step": 164 }, { "epoch": 0.004552713175620911, "grad_norm": 0.0016920053167268634, "learning_rate": 0.001, "loss": 0.3818, "step": 165 }, { "epoch": 0.00458030537668528, "grad_norm": 0.0016063055954873562, "learning_rate": 0.001, "loss": 0.4264, "step": 166 }, { "epoch": 0.004607897577749649, "grad_norm": 0.0018608705140650272, "learning_rate": 0.001, "loss": 0.419, "step": 167 }, { "epoch": 0.004635489778814018, "grad_norm": 0.001778002129867673, "learning_rate": 0.001, "loss": 0.4311, "step": 168 }, { "epoch": 0.004663081979878387, "grad_norm": 0.0041995905339717865, "learning_rate": 0.001, "loss": 0.3816, "step": 169 }, { "epoch": 0.004690674180942756, "grad_norm": 0.0013721170835196972, "learning_rate": 0.001, "loss": 0.4096, "step": 170 }, { "epoch": 0.004718266382007126, "grad_norm": 0.0018527969950810075, "learning_rate": 0.001, "loss": 0.4166, "step": 171 }, { "epoch": 0.004745858583071495, "grad_norm": 0.001698823063634336, "learning_rate": 0.001, "loss": 0.4139, "step": 172 }, { "epoch": 0.004773450784135864, "grad_norm": 0.0015160103794187307, "learning_rate": 0.001, "loss": 0.3993, "step": 173 }, { "epoch": 0.004801042985200233, "grad_norm": 0.0016305141616612673, "learning_rate": 0.001, "loss": 0.4338, "step": 174 }, { "epoch": 0.004828635186264602, "grad_norm": 0.0016023332718759775, "learning_rate": 0.001, "loss": 0.3868, "step": 175 }, { "epoch": 0.004856227387328971, "grad_norm": 0.0021146016661077738, "learning_rate": 0.001, "loss": 0.3993, "step": 176 }, { "epoch": 0.00488381958839334, "grad_norm": 0.00299852411262691, "learning_rate": 0.001, "loss": 0.4033, "step": 177 }, { "epoch": 0.004911411789457709, "grad_norm": 0.002649805974215269, "learning_rate": 0.001, "loss": 0.4111, "step": 178 }, { "epoch": 0.004939003990522079, "grad_norm": 0.002029626164585352, "learning_rate": 0.001, "loss": 0.3914, "step": 179 }, { "epoch": 0.004966596191586448, "grad_norm": 0.002307730261236429, "learning_rate": 0.001, "loss": 0.4142, "step": 180 }, { "epoch": 0.004994188392650817, "grad_norm": 0.0019044012296944857, "learning_rate": 0.001, "loss": 0.4298, "step": 181 }, { "epoch": 0.005021780593715186, "grad_norm": 0.0012402728898450732, "learning_rate": 0.001, "loss": 0.4187, "step": 182 }, { "epoch": 0.005049372794779555, "grad_norm": 0.0015751667087897658, "learning_rate": 0.001, "loss": 0.375, "step": 183 }, { "epoch": 0.0050769649958439245, "grad_norm": 0.0018701856024563313, "learning_rate": 0.001, "loss": 0.4079, "step": 184 }, { "epoch": 0.0051045571969082935, "grad_norm": 0.002105995314195752, "learning_rate": 0.001, "loss": 0.3899, "step": 185 }, { "epoch": 0.005132149397972663, "grad_norm": 0.0030122329480946064, "learning_rate": 0.001, "loss": 0.4169, "step": 186 }, { "epoch": 0.0051597415990370325, "grad_norm": 0.0018410159973427653, "learning_rate": 0.001, "loss": 0.3982, "step": 187 }, { "epoch": 0.0051873338001014015, "grad_norm": 0.0017063120612874627, "learning_rate": 0.001, "loss": 0.4323, "step": 188 }, { "epoch": 0.0052149260011657705, "grad_norm": 0.001945548108778894, "learning_rate": 0.001, "loss": 0.3792, "step": 189 }, { "epoch": 0.00524251820223014, "grad_norm": 0.002262406051158905, "learning_rate": 0.001, "loss": 0.3932, "step": 190 }, { "epoch": 0.005270110403294509, "grad_norm": 0.003793769981712103, "learning_rate": 0.001, "loss": 0.42, "step": 191 }, { "epoch": 0.005297702604358878, "grad_norm": 0.0024173606652766466, "learning_rate": 0.001, "loss": 0.4175, "step": 192 }, { "epoch": 0.005325294805423247, "grad_norm": 0.0017464763950556517, "learning_rate": 0.001, "loss": 0.3971, "step": 193 }, { "epoch": 0.005352887006487617, "grad_norm": 0.0034284228459000587, "learning_rate": 0.001, "loss": 0.3932, "step": 194 }, { "epoch": 0.005380479207551986, "grad_norm": 0.001637522829696536, "learning_rate": 0.001, "loss": 0.404, "step": 195 }, { "epoch": 0.005408071408616355, "grad_norm": 0.0019539205823093653, "learning_rate": 0.001, "loss": 0.3976, "step": 196 }, { "epoch": 0.005435663609680724, "grad_norm": 0.0023392250295728445, "learning_rate": 0.001, "loss": 0.4224, "step": 197 }, { "epoch": 0.005463255810745093, "grad_norm": 0.005975689273327589, "learning_rate": 0.001, "loss": 0.3852, "step": 198 }, { "epoch": 0.005490848011809462, "grad_norm": 0.005825843196362257, "learning_rate": 0.001, "loss": 0.4076, "step": 199 }, { "epoch": 0.005518440212873831, "grad_norm": 0.0023729391396045685, "learning_rate": 0.001, "loss": 0.4088, "step": 200 }, { "epoch": 0.005546032413938201, "grad_norm": 0.0020657021086663008, "learning_rate": 0.001, "loss": 0.4162, "step": 201 }, { "epoch": 0.00557362461500257, "grad_norm": 0.0022743509616702795, "learning_rate": 0.001, "loss": 0.4189, "step": 202 }, { "epoch": 0.005601216816066939, "grad_norm": 0.002227703807875514, "learning_rate": 0.001, "loss": 0.4005, "step": 203 }, { "epoch": 0.005628809017131308, "grad_norm": 0.0023818998597562313, "learning_rate": 0.001, "loss": 0.454, "step": 204 }, { "epoch": 0.005656401218195677, "grad_norm": 0.002208840800449252, "learning_rate": 0.001, "loss": 0.4003, "step": 205 }, { "epoch": 0.005683993419260046, "grad_norm": 0.0025773572269827127, "learning_rate": 0.001, "loss": 0.4447, "step": 206 }, { "epoch": 0.005711585620324415, "grad_norm": 0.002260175533592701, "learning_rate": 0.001, "loss": 0.3893, "step": 207 }, { "epoch": 0.005739177821388784, "grad_norm": 0.0023025628179311752, "learning_rate": 0.001, "loss": 0.4072, "step": 208 }, { "epoch": 0.005766770022453154, "grad_norm": 0.0032320781610906124, "learning_rate": 0.001, "loss": 0.3924, "step": 209 }, { "epoch": 0.005794362223517523, "grad_norm": 0.0018922177841886878, "learning_rate": 0.001, "loss": 0.4094, "step": 210 }, { "epoch": 0.005821954424581892, "grad_norm": 0.0019504919182509184, "learning_rate": 0.001, "loss": 0.3948, "step": 211 }, { "epoch": 0.005849546625646261, "grad_norm": 0.002194691449403763, "learning_rate": 0.001, "loss": 0.4021, "step": 212 }, { "epoch": 0.00587713882671063, "grad_norm": 0.013725458644330502, "learning_rate": 0.001, "loss": 0.3922, "step": 213 }, { "epoch": 0.005904731027774999, "grad_norm": 0.0019010631367564201, "learning_rate": 0.001, "loss": 0.4347, "step": 214 }, { "epoch": 0.005932323228839368, "grad_norm": 0.005194473545998335, "learning_rate": 0.001, "loss": 0.4057, "step": 215 }, { "epoch": 0.005959915429903738, "grad_norm": 0.002996876835823059, "learning_rate": 0.001, "loss": 0.3763, "step": 216 }, { "epoch": 0.005987507630968107, "grad_norm": 0.0020920925308018923, "learning_rate": 0.001, "loss": 0.4245, "step": 217 }, { "epoch": 0.006015099832032476, "grad_norm": 0.0023856102488934994, "learning_rate": 0.001, "loss": 0.401, "step": 218 }, { "epoch": 0.006042692033096845, "grad_norm": 0.0022957061883062124, "learning_rate": 0.001, "loss": 0.4424, "step": 219 }, { "epoch": 0.006070284234161214, "grad_norm": 0.00439048558473587, "learning_rate": 0.001, "loss": 0.3984, "step": 220 }, { "epoch": 0.006097876435225583, "grad_norm": 0.0023554619401693344, "learning_rate": 0.001, "loss": 0.3743, "step": 221 }, { "epoch": 0.006125468636289952, "grad_norm": 0.002596774371340871, "learning_rate": 0.001, "loss": 0.3997, "step": 222 }, { "epoch": 0.006153060837354321, "grad_norm": 0.002902804408222437, "learning_rate": 0.001, "loss": 0.3741, "step": 223 }, { "epoch": 0.006180653038418691, "grad_norm": 0.002238060114905238, "learning_rate": 0.001, "loss": 0.4194, "step": 224 }, { "epoch": 0.00620824523948306, "grad_norm": 0.0024735773913562298, "learning_rate": 0.001, "loss": 0.4283, "step": 225 }, { "epoch": 0.006235837440547429, "grad_norm": 0.0043862126767635345, "learning_rate": 0.001, "loss": 0.4169, "step": 226 }, { "epoch": 0.0062634296416117985, "grad_norm": 0.002900010673329234, "learning_rate": 0.001, "loss": 0.4133, "step": 227 }, { "epoch": 0.0062910218426761675, "grad_norm": 0.0025010716635733843, "learning_rate": 0.001, "loss": 0.4121, "step": 228 }, { "epoch": 0.0063186140437405365, "grad_norm": 0.004444291349500418, "learning_rate": 0.001, "loss": 0.3828, "step": 229 }, { "epoch": 0.006346206244804906, "grad_norm": 0.0021149932872503996, "learning_rate": 0.001, "loss": 0.4356, "step": 230 }, { "epoch": 0.006373798445869275, "grad_norm": 0.001915531582199037, "learning_rate": 0.001, "loss": 0.4471, "step": 231 }, { "epoch": 0.0064013906469336445, "grad_norm": 0.001954560400918126, "learning_rate": 0.001, "loss": 0.4018, "step": 232 }, { "epoch": 0.006428982847998014, "grad_norm": 0.002395547227934003, "learning_rate": 0.001, "loss": 0.3945, "step": 233 }, { "epoch": 0.006456575049062383, "grad_norm": 0.002621949650347233, "learning_rate": 0.001, "loss": 0.4052, "step": 234 }, { "epoch": 0.006484167250126752, "grad_norm": 0.0037250854074954987, "learning_rate": 0.001, "loss": 0.4227, "step": 235 }, { "epoch": 0.006511759451191121, "grad_norm": 0.002147798193618655, "learning_rate": 0.001, "loss": 0.3914, "step": 236 }, { "epoch": 0.00653935165225549, "grad_norm": 0.002425707643851638, "learning_rate": 0.001, "loss": 0.3949, "step": 237 }, { "epoch": 0.006566943853319859, "grad_norm": 0.002085171639919281, "learning_rate": 0.001, "loss": 0.4239, "step": 238 }, { "epoch": 0.006594536054384229, "grad_norm": 0.0023823371157050133, "learning_rate": 0.001, "loss": 0.403, "step": 239 }, { "epoch": 0.006622128255448598, "grad_norm": 0.0020582638680934906, "learning_rate": 0.001, "loss": 0.402, "step": 240 }, { "epoch": 0.006649720456512967, "grad_norm": 0.002760798903182149, "learning_rate": 0.001, "loss": 0.3785, "step": 241 }, { "epoch": 0.006677312657577336, "grad_norm": 0.0030749242287129164, "learning_rate": 0.001, "loss": 0.4379, "step": 242 }, { "epoch": 0.006704904858641705, "grad_norm": 0.0023086306173354387, "learning_rate": 0.001, "loss": 0.4163, "step": 243 }, { "epoch": 0.006732497059706074, "grad_norm": 0.0025757497642189264, "learning_rate": 0.001, "loss": 0.4264, "step": 244 }, { "epoch": 0.006760089260770443, "grad_norm": 0.002158039715141058, "learning_rate": 0.001, "loss": 0.3881, "step": 245 }, { "epoch": 0.006787681461834812, "grad_norm": 0.0018623712239786983, "learning_rate": 0.001, "loss": 0.3682, "step": 246 }, { "epoch": 0.006815273662899182, "grad_norm": 0.0022342281881719828, "learning_rate": 0.001, "loss": 0.4181, "step": 247 }, { "epoch": 0.006842865863963551, "grad_norm": 0.0026764923240989447, "learning_rate": 0.001, "loss": 0.392, "step": 248 }, { "epoch": 0.00687045806502792, "grad_norm": 0.0038766246289014816, "learning_rate": 0.001, "loss": 0.4145, "step": 249 }, { "epoch": 0.006898050266092289, "grad_norm": 0.002432230394333601, "learning_rate": 0.001, "loss": 0.4382, "step": 250 }, { "epoch": 0.006925642467156658, "grad_norm": 0.003260681638494134, "learning_rate": 0.001, "loss": 0.4376, "step": 251 }, { "epoch": 0.006953234668221027, "grad_norm": 0.0023729840759187937, "learning_rate": 0.001, "loss": 0.3961, "step": 252 }, { "epoch": 0.006980826869285396, "grad_norm": 0.0032021389342844486, "learning_rate": 0.001, "loss": 0.4234, "step": 253 }, { "epoch": 0.007008419070349766, "grad_norm": 0.003267744556069374, "learning_rate": 0.001, "loss": 0.412, "step": 254 }, { "epoch": 0.007036011271414135, "grad_norm": 0.0024608762469142675, "learning_rate": 0.001, "loss": 0.3931, "step": 255 }, { "epoch": 0.007063603472478504, "grad_norm": 0.002270517172291875, "learning_rate": 0.001, "loss": 0.4049, "step": 256 }, { "epoch": 0.007091195673542873, "grad_norm": 0.002375125652179122, "learning_rate": 0.001, "loss": 0.4074, "step": 257 }, { "epoch": 0.007118787874607242, "grad_norm": 0.0021832643542438745, "learning_rate": 0.001, "loss": 0.4059, "step": 258 }, { "epoch": 0.007146380075671611, "grad_norm": 0.0018151308177039027, "learning_rate": 0.001, "loss": 0.3893, "step": 259 }, { "epoch": 0.00717397227673598, "grad_norm": 0.0018910926301032305, "learning_rate": 0.001, "loss": 0.4327, "step": 260 }, { "epoch": 0.007201564477800349, "grad_norm": 0.0024862713180482388, "learning_rate": 0.001, "loss": 0.4328, "step": 261 }, { "epoch": 0.007229156678864719, "grad_norm": 0.0027056324761360884, "learning_rate": 0.001, "loss": 0.4027, "step": 262 }, { "epoch": 0.007256748879929088, "grad_norm": 0.0018339842790737748, "learning_rate": 0.001, "loss": 0.3907, "step": 263 }, { "epoch": 0.007284341080993457, "grad_norm": 0.002902393927797675, "learning_rate": 0.001, "loss": 0.4415, "step": 264 }, { "epoch": 0.007311933282057826, "grad_norm": 0.0022214376367628574, "learning_rate": 0.001, "loss": 0.4046, "step": 265 }, { "epoch": 0.007339525483122195, "grad_norm": 0.002073576208204031, "learning_rate": 0.001, "loss": 0.4365, "step": 266 }, { "epoch": 0.0073671176841865645, "grad_norm": 0.002394103677943349, "learning_rate": 0.001, "loss": 0.4265, "step": 267 }, { "epoch": 0.0073947098852509335, "grad_norm": 0.002571861259639263, "learning_rate": 0.001, "loss": 0.4012, "step": 268 }, { "epoch": 0.007422302086315303, "grad_norm": 0.0016914806328713894, "learning_rate": 0.001, "loss": 0.3932, "step": 269 }, { "epoch": 0.0074498942873796725, "grad_norm": 0.003131776349619031, "learning_rate": 0.001, "loss": 0.3807, "step": 270 }, { "epoch": 0.0074774864884440415, "grad_norm": 0.0046376376412808895, "learning_rate": 0.001, "loss": 0.4189, "step": 271 }, { "epoch": 0.0075050786895084105, "grad_norm": 0.0030513983219861984, "learning_rate": 0.001, "loss": 0.3983, "step": 272 }, { "epoch": 0.00753267089057278, "grad_norm": 0.0018306419951841235, "learning_rate": 0.001, "loss": 0.4008, "step": 273 }, { "epoch": 0.007560263091637149, "grad_norm": 0.002655989723280072, "learning_rate": 0.001, "loss": 0.3853, "step": 274 }, { "epoch": 0.007587855292701518, "grad_norm": 0.002580752596259117, "learning_rate": 0.001, "loss": 0.4259, "step": 275 }, { "epoch": 0.007615447493765887, "grad_norm": 0.002781835151836276, "learning_rate": 0.001, "loss": 0.4023, "step": 276 }, { "epoch": 0.007643039694830257, "grad_norm": 0.0023255913984030485, "learning_rate": 0.001, "loss": 0.4107, "step": 277 }, { "epoch": 0.007670631895894626, "grad_norm": 0.0026517838705331087, "learning_rate": 0.001, "loss": 0.3877, "step": 278 }, { "epoch": 0.007698224096958995, "grad_norm": 0.009500747546553612, "learning_rate": 0.001, "loss": 0.4261, "step": 279 }, { "epoch": 0.007725816298023364, "grad_norm": 0.002862557303160429, "learning_rate": 0.001, "loss": 0.3913, "step": 280 }, { "epoch": 0.007753408499087733, "grad_norm": 0.0030510432552546263, "learning_rate": 0.001, "loss": 0.4157, "step": 281 }, { "epoch": 0.007781000700152102, "grad_norm": 0.00323986797593534, "learning_rate": 0.001, "loss": 0.4314, "step": 282 }, { "epoch": 0.007808592901216471, "grad_norm": 0.0030146867502480745, "learning_rate": 0.001, "loss": 0.4006, "step": 283 }, { "epoch": 0.00783618510228084, "grad_norm": 0.002789125544950366, "learning_rate": 0.001, "loss": 0.4179, "step": 284 }, { "epoch": 0.007863777303345209, "grad_norm": 0.0027734541799873114, "learning_rate": 0.001, "loss": 0.3941, "step": 285 }, { "epoch": 0.007891369504409579, "grad_norm": 0.0021806685253977776, "learning_rate": 0.001, "loss": 0.3948, "step": 286 }, { "epoch": 0.007918961705473947, "grad_norm": 0.003913281951099634, "learning_rate": 0.001, "loss": 0.4296, "step": 287 }, { "epoch": 0.007946553906538317, "grad_norm": 0.0021719373762607574, "learning_rate": 0.001, "loss": 0.3839, "step": 288 }, { "epoch": 0.007974146107602687, "grad_norm": 0.003087391145527363, "learning_rate": 0.001, "loss": 0.3958, "step": 289 }, { "epoch": 0.008001738308667055, "grad_norm": 0.0025539237540215254, "learning_rate": 0.001, "loss": 0.4234, "step": 290 }, { "epoch": 0.008029330509731425, "grad_norm": 0.002219514222815633, "learning_rate": 0.001, "loss": 0.4234, "step": 291 }, { "epoch": 0.008056922710795793, "grad_norm": 0.0028160493820905685, "learning_rate": 0.001, "loss": 0.3935, "step": 292 }, { "epoch": 0.008084514911860163, "grad_norm": 0.003081710310652852, "learning_rate": 0.001, "loss": 0.4197, "step": 293 }, { "epoch": 0.008112107112924531, "grad_norm": 0.0024016399402171373, "learning_rate": 0.001, "loss": 0.3715, "step": 294 }, { "epoch": 0.008139699313988901, "grad_norm": 0.0020687165670096874, "learning_rate": 0.001, "loss": 0.4132, "step": 295 }, { "epoch": 0.008167291515053271, "grad_norm": 0.002624873537570238, "learning_rate": 0.001, "loss": 0.4201, "step": 296 }, { "epoch": 0.00819488371611764, "grad_norm": 0.0027543094474822283, "learning_rate": 0.001, "loss": 0.3984, "step": 297 }, { "epoch": 0.008222475917182009, "grad_norm": 0.005131872370839119, "learning_rate": 0.001, "loss": 0.3971, "step": 298 }, { "epoch": 0.008250068118246377, "grad_norm": 0.0030383355915546417, "learning_rate": 0.001, "loss": 0.421, "step": 299 }, { "epoch": 0.008277660319310747, "grad_norm": 0.00250818463973701, "learning_rate": 0.001, "loss": 0.4311, "step": 300 }, { "epoch": 0.008305252520375115, "grad_norm": 0.006147111766040325, "learning_rate": 0.001, "loss": 0.386, "step": 301 }, { "epoch": 0.008332844721439485, "grad_norm": 0.003149918746203184, "learning_rate": 0.001, "loss": 0.4015, "step": 302 }, { "epoch": 0.008360436922503853, "grad_norm": 0.002058635698631406, "learning_rate": 0.001, "loss": 0.4586, "step": 303 }, { "epoch": 0.008388029123568223, "grad_norm": 0.002103931736201048, "learning_rate": 0.001, "loss": 0.3906, "step": 304 }, { "epoch": 0.008415621324632593, "grad_norm": 0.0036677306052297354, "learning_rate": 0.001, "loss": 0.4056, "step": 305 }, { "epoch": 0.008443213525696961, "grad_norm": 0.002338196150958538, "learning_rate": 0.001, "loss": 0.3991, "step": 306 }, { "epoch": 0.008470805726761331, "grad_norm": 0.002481523435562849, "learning_rate": 0.001, "loss": 0.4227, "step": 307 }, { "epoch": 0.0084983979278257, "grad_norm": 0.0023166737519204617, "learning_rate": 0.001, "loss": 0.3723, "step": 308 }, { "epoch": 0.00852599012889007, "grad_norm": 0.00202556187286973, "learning_rate": 0.001, "loss": 0.4171, "step": 309 }, { "epoch": 0.008553582329954438, "grad_norm": 0.0018110661767423153, "learning_rate": 0.001, "loss": 0.4258, "step": 310 }, { "epoch": 0.008581174531018807, "grad_norm": 0.003058563219383359, "learning_rate": 0.001, "loss": 0.4147, "step": 311 }, { "epoch": 0.008608766732083177, "grad_norm": 0.004681632854044437, "learning_rate": 0.001, "loss": 0.4428, "step": 312 }, { "epoch": 0.008636358933147546, "grad_norm": 0.0023308703675866127, "learning_rate": 0.001, "loss": 0.3874, "step": 313 }, { "epoch": 0.008663951134211915, "grad_norm": 0.0029331271070986986, "learning_rate": 0.001, "loss": 0.4019, "step": 314 }, { "epoch": 0.008691543335276284, "grad_norm": 0.0021851372439414263, "learning_rate": 0.001, "loss": 0.4097, "step": 315 }, { "epoch": 0.008719135536340654, "grad_norm": 0.0027176933363080025, "learning_rate": 0.001, "loss": 0.3937, "step": 316 }, { "epoch": 0.008746727737405022, "grad_norm": 0.0023573623038828373, "learning_rate": 0.001, "loss": 0.3926, "step": 317 }, { "epoch": 0.008774319938469392, "grad_norm": 0.0023507485166192055, "learning_rate": 0.001, "loss": 0.429, "step": 318 }, { "epoch": 0.008801912139533762, "grad_norm": 0.0026601781137287617, "learning_rate": 0.001, "loss": 0.4071, "step": 319 }, { "epoch": 0.00882950434059813, "grad_norm": 0.002715731505304575, "learning_rate": 0.001, "loss": 0.4148, "step": 320 }, { "epoch": 0.0088570965416625, "grad_norm": 0.004710033070296049, "learning_rate": 0.001, "loss": 0.3585, "step": 321 }, { "epoch": 0.008884688742726868, "grad_norm": 0.0027322748210281134, "learning_rate": 0.001, "loss": 0.4053, "step": 322 }, { "epoch": 0.008912280943791238, "grad_norm": 0.0024419347755610943, "learning_rate": 0.001, "loss": 0.4141, "step": 323 }, { "epoch": 0.008939873144855606, "grad_norm": 0.004387510009109974, "learning_rate": 0.001, "loss": 0.3747, "step": 324 }, { "epoch": 0.008967465345919976, "grad_norm": 0.0033073730301111937, "learning_rate": 0.001, "loss": 0.3981, "step": 325 }, { "epoch": 0.008995057546984346, "grad_norm": 0.002799189416691661, "learning_rate": 0.001, "loss": 0.3687, "step": 326 }, { "epoch": 0.009022649748048714, "grad_norm": 0.0031710139010101557, "learning_rate": 0.001, "loss": 0.3988, "step": 327 }, { "epoch": 0.009050241949113084, "grad_norm": 0.0027909427881240845, "learning_rate": 0.001, "loss": 0.3804, "step": 328 }, { "epoch": 0.009077834150177452, "grad_norm": 0.0035845122765749693, "learning_rate": 0.001, "loss": 0.4203, "step": 329 }, { "epoch": 0.009105426351241822, "grad_norm": 0.0034582631196826696, "learning_rate": 0.001, "loss": 0.404, "step": 330 }, { "epoch": 0.00913301855230619, "grad_norm": 0.005869538523256779, "learning_rate": 0.001, "loss": 0.3392, "step": 331 }, { "epoch": 0.00916061075337056, "grad_norm": 0.0037916686851531267, "learning_rate": 0.001, "loss": 0.4509, "step": 332 }, { "epoch": 0.009188202954434928, "grad_norm": 0.004066781606525183, "learning_rate": 0.001, "loss": 0.4105, "step": 333 }, { "epoch": 0.009215795155499298, "grad_norm": 0.002667912980541587, "learning_rate": 0.001, "loss": 0.4371, "step": 334 }, { "epoch": 0.009243387356563668, "grad_norm": 0.0032026427797973156, "learning_rate": 0.001, "loss": 0.3814, "step": 335 }, { "epoch": 0.009270979557628036, "grad_norm": 0.002195654669776559, "learning_rate": 0.001, "loss": 0.374, "step": 336 }, { "epoch": 0.009298571758692406, "grad_norm": 0.002170421415939927, "learning_rate": 0.001, "loss": 0.416, "step": 337 }, { "epoch": 0.009326163959756774, "grad_norm": 0.002630366710945964, "learning_rate": 0.001, "loss": 0.4068, "step": 338 }, { "epoch": 0.009353756160821144, "grad_norm": 0.0026457132771611214, "learning_rate": 0.001, "loss": 0.436, "step": 339 }, { "epoch": 0.009381348361885512, "grad_norm": 0.002360823331400752, "learning_rate": 0.001, "loss": 0.4335, "step": 340 }, { "epoch": 0.009408940562949882, "grad_norm": 0.0026195445097982883, "learning_rate": 0.001, "loss": 0.4233, "step": 341 }, { "epoch": 0.009436532764014252, "grad_norm": 0.0018392925849184394, "learning_rate": 0.001, "loss": 0.4215, "step": 342 }, { "epoch": 0.00946412496507862, "grad_norm": 0.0029209102503955364, "learning_rate": 0.001, "loss": 0.3949, "step": 343 }, { "epoch": 0.00949171716614299, "grad_norm": 0.00245369179174304, "learning_rate": 0.001, "loss": 0.4349, "step": 344 }, { "epoch": 0.009519309367207358, "grad_norm": 0.0021382428240031004, "learning_rate": 0.001, "loss": 0.4234, "step": 345 }, { "epoch": 0.009546901568271728, "grad_norm": 0.0018214443698525429, "learning_rate": 0.001, "loss": 0.3922, "step": 346 }, { "epoch": 0.009574493769336096, "grad_norm": 0.0025145094841718674, "learning_rate": 0.001, "loss": 0.3583, "step": 347 }, { "epoch": 0.009602085970400466, "grad_norm": 0.002913502510637045, "learning_rate": 0.001, "loss": 0.4189, "step": 348 }, { "epoch": 0.009629678171464836, "grad_norm": 0.002811063313856721, "learning_rate": 0.001, "loss": 0.3864, "step": 349 }, { "epoch": 0.009657270372529204, "grad_norm": 0.0031305616721510887, "learning_rate": 0.001, "loss": 0.3724, "step": 350 }, { "epoch": 0.009684862573593574, "grad_norm": 0.002753301290795207, "learning_rate": 0.001, "loss": 0.4034, "step": 351 }, { "epoch": 0.009712454774657943, "grad_norm": 0.002502492628991604, "learning_rate": 0.001, "loss": 0.4056, "step": 352 }, { "epoch": 0.009740046975722312, "grad_norm": 0.0029816054739058018, "learning_rate": 0.001, "loss": 0.4103, "step": 353 }, { "epoch": 0.00976763917678668, "grad_norm": 0.00210220436565578, "learning_rate": 0.001, "loss": 0.42, "step": 354 }, { "epoch": 0.00979523137785105, "grad_norm": 0.002407314023002982, "learning_rate": 0.001, "loss": 0.3993, "step": 355 }, { "epoch": 0.009822823578915419, "grad_norm": 0.0025853144470602274, "learning_rate": 0.001, "loss": 0.3886, "step": 356 }, { "epoch": 0.009850415779979789, "grad_norm": 0.0020144209265708923, "learning_rate": 0.001, "loss": 0.4465, "step": 357 }, { "epoch": 0.009878007981044159, "grad_norm": 0.0026473626494407654, "learning_rate": 0.001, "loss": 0.384, "step": 358 }, { "epoch": 0.009905600182108527, "grad_norm": 0.005975659471005201, "learning_rate": 0.001, "loss": 0.4328, "step": 359 }, { "epoch": 0.009933192383172897, "grad_norm": 0.0024808787275105715, "learning_rate": 0.001, "loss": 0.3949, "step": 360 }, { "epoch": 0.009960784584237265, "grad_norm": 0.011580473743379116, "learning_rate": 0.001, "loss": 0.4419, "step": 361 }, { "epoch": 0.009988376785301635, "grad_norm": 0.0025010586250573397, "learning_rate": 0.001, "loss": 0.3943, "step": 362 }, { "epoch": 0.010015968986366003, "grad_norm": 0.0029951941687613726, "learning_rate": 0.001, "loss": 0.3898, "step": 363 }, { "epoch": 0.010043561187430373, "grad_norm": 0.0038613236974924803, "learning_rate": 0.001, "loss": 0.4186, "step": 364 }, { "epoch": 0.010071153388494743, "grad_norm": 0.00325029413215816, "learning_rate": 0.001, "loss": 0.392, "step": 365 }, { "epoch": 0.01009874558955911, "grad_norm": 0.002298851264640689, "learning_rate": 0.001, "loss": 0.4364, "step": 366 }, { "epoch": 0.01012633779062348, "grad_norm": 0.003252133959904313, "learning_rate": 0.001, "loss": 0.3851, "step": 367 }, { "epoch": 0.010153929991687849, "grad_norm": 0.0026516057550907135, "learning_rate": 0.001, "loss": 0.4368, "step": 368 }, { "epoch": 0.010181522192752219, "grad_norm": 0.0033587999641895294, "learning_rate": 0.001, "loss": 0.3924, "step": 369 }, { "epoch": 0.010209114393816587, "grad_norm": 0.0030985455960035324, "learning_rate": 0.001, "loss": 0.4134, "step": 370 }, { "epoch": 0.010236706594880957, "grad_norm": 0.0028507187962532043, "learning_rate": 0.001, "loss": 0.4237, "step": 371 }, { "epoch": 0.010264298795945327, "grad_norm": 0.0024174090940505266, "learning_rate": 0.001, "loss": 0.4607, "step": 372 }, { "epoch": 0.010291890997009695, "grad_norm": 0.003416180843487382, "learning_rate": 0.001, "loss": 0.3812, "step": 373 }, { "epoch": 0.010319483198074065, "grad_norm": 0.0031029239762574434, "learning_rate": 0.001, "loss": 0.402, "step": 374 }, { "epoch": 0.010347075399138433, "grad_norm": 0.0023850388824939728, "learning_rate": 0.001, "loss": 0.3854, "step": 375 }, { "epoch": 0.010374667600202803, "grad_norm": 0.0026809065602719784, "learning_rate": 0.001, "loss": 0.4062, "step": 376 }, { "epoch": 0.010402259801267171, "grad_norm": 0.004038350190967321, "learning_rate": 0.001, "loss": 0.4223, "step": 377 }, { "epoch": 0.010429852002331541, "grad_norm": 0.0030194921419024467, "learning_rate": 0.001, "loss": 0.4525, "step": 378 }, { "epoch": 0.010457444203395911, "grad_norm": 0.0023160662967711687, "learning_rate": 0.001, "loss": 0.4256, "step": 379 }, { "epoch": 0.01048503640446028, "grad_norm": 0.0031149794813245535, "learning_rate": 0.001, "loss": 0.4334, "step": 380 }, { "epoch": 0.010512628605524649, "grad_norm": 0.0025531111750751734, "learning_rate": 0.001, "loss": 0.4142, "step": 381 }, { "epoch": 0.010540220806589017, "grad_norm": 0.003499183803796768, "learning_rate": 0.001, "loss": 0.4105, "step": 382 }, { "epoch": 0.010567813007653387, "grad_norm": 0.004215524531900883, "learning_rate": 0.001, "loss": 0.3859, "step": 383 }, { "epoch": 0.010595405208717755, "grad_norm": 0.0024398749228566885, "learning_rate": 0.001, "loss": 0.4182, "step": 384 }, { "epoch": 0.010622997409782125, "grad_norm": 0.003436741651967168, "learning_rate": 0.001, "loss": 0.4264, "step": 385 }, { "epoch": 0.010650589610846493, "grad_norm": 0.0023827480617910624, "learning_rate": 0.001, "loss": 0.4117, "step": 386 }, { "epoch": 0.010678181811910863, "grad_norm": 0.0032302262261509895, "learning_rate": 0.001, "loss": 0.399, "step": 387 }, { "epoch": 0.010705774012975233, "grad_norm": 0.004642815329134464, "learning_rate": 0.001, "loss": 0.341, "step": 388 }, { "epoch": 0.010733366214039601, "grad_norm": 0.002242898801341653, "learning_rate": 0.001, "loss": 0.4084, "step": 389 }, { "epoch": 0.010760958415103971, "grad_norm": 0.002073424868285656, "learning_rate": 0.001, "loss": 0.4344, "step": 390 }, { "epoch": 0.01078855061616834, "grad_norm": 0.0029485910199582577, "learning_rate": 0.001, "loss": 0.3803, "step": 391 }, { "epoch": 0.01081614281723271, "grad_norm": 0.003694765968248248, "learning_rate": 0.001, "loss": 0.4351, "step": 392 }, { "epoch": 0.010843735018297078, "grad_norm": 0.004921768791973591, "learning_rate": 0.001, "loss": 0.4089, "step": 393 }, { "epoch": 0.010871327219361447, "grad_norm": 0.0033451106864959, "learning_rate": 0.001, "loss": 0.4005, "step": 394 }, { "epoch": 0.010898919420425817, "grad_norm": 0.003096395405009389, "learning_rate": 0.001, "loss": 0.3951, "step": 395 }, { "epoch": 0.010926511621490186, "grad_norm": 0.002606458030641079, "learning_rate": 0.001, "loss": 0.3887, "step": 396 }, { "epoch": 0.010954103822554555, "grad_norm": 0.002552368212491274, "learning_rate": 0.001, "loss": 0.436, "step": 397 }, { "epoch": 0.010981696023618924, "grad_norm": 0.002896772464737296, "learning_rate": 0.001, "loss": 0.3605, "step": 398 }, { "epoch": 0.011009288224683294, "grad_norm": 0.004260985646396875, "learning_rate": 0.001, "loss": 0.3817, "step": 399 }, { "epoch": 0.011036880425747662, "grad_norm": 0.0023302636109292507, "learning_rate": 0.001, "loss": 0.3852, "step": 400 }, { "epoch": 0.011064472626812032, "grad_norm": 0.0028871933463960886, "learning_rate": 0.001, "loss": 0.3944, "step": 401 }, { "epoch": 0.011092064827876402, "grad_norm": 0.003007282270118594, "learning_rate": 0.001, "loss": 0.4197, "step": 402 }, { "epoch": 0.01111965702894077, "grad_norm": 0.0023298682644963264, "learning_rate": 0.001, "loss": 0.4007, "step": 403 }, { "epoch": 0.01114724923000514, "grad_norm": 0.0032247831113636494, "learning_rate": 0.001, "loss": 0.404, "step": 404 }, { "epoch": 0.011174841431069508, "grad_norm": 0.002755606546998024, "learning_rate": 0.001, "loss": 0.3675, "step": 405 }, { "epoch": 0.011202433632133878, "grad_norm": 0.0030489088967442513, "learning_rate": 0.001, "loss": 0.3863, "step": 406 }, { "epoch": 0.011230025833198246, "grad_norm": 0.0037001632153987885, "learning_rate": 0.001, "loss": 0.395, "step": 407 }, { "epoch": 0.011257618034262616, "grad_norm": 0.0033608553931117058, "learning_rate": 0.001, "loss": 0.3818, "step": 408 }, { "epoch": 0.011285210235326984, "grad_norm": 0.00226064445450902, "learning_rate": 0.001, "loss": 0.4162, "step": 409 }, { "epoch": 0.011312802436391354, "grad_norm": 0.0037739353720098734, "learning_rate": 0.001, "loss": 0.3952, "step": 410 }, { "epoch": 0.011340394637455724, "grad_norm": 0.0032893354073166847, "learning_rate": 0.001, "loss": 0.3916, "step": 411 }, { "epoch": 0.011367986838520092, "grad_norm": 0.0036774203181266785, "learning_rate": 0.001, "loss": 0.4207, "step": 412 }, { "epoch": 0.011395579039584462, "grad_norm": 0.0031027563381940126, "learning_rate": 0.001, "loss": 0.4202, "step": 413 }, { "epoch": 0.01142317124064883, "grad_norm": 0.006735049653798342, "learning_rate": 0.001, "loss": 0.4151, "step": 414 }, { "epoch": 0.0114507634417132, "grad_norm": 0.0036674696020781994, "learning_rate": 0.001, "loss": 0.3887, "step": 415 }, { "epoch": 0.011478355642777568, "grad_norm": 0.0060126762837171555, "learning_rate": 0.001, "loss": 0.4291, "step": 416 }, { "epoch": 0.011505947843841938, "grad_norm": 0.005611831322312355, "learning_rate": 0.001, "loss": 0.3769, "step": 417 }, { "epoch": 0.011533540044906308, "grad_norm": 0.004347233567386866, "learning_rate": 0.001, "loss": 0.4006, "step": 418 }, { "epoch": 0.011561132245970676, "grad_norm": 0.0035771261900663376, "learning_rate": 0.001, "loss": 0.4122, "step": 419 }, { "epoch": 0.011588724447035046, "grad_norm": 0.002888308372348547, "learning_rate": 0.001, "loss": 0.3947, "step": 420 }, { "epoch": 0.011616316648099414, "grad_norm": 0.006516223307698965, "learning_rate": 0.001, "loss": 0.395, "step": 421 }, { "epoch": 0.011643908849163784, "grad_norm": 0.004090446978807449, "learning_rate": 0.001, "loss": 0.3573, "step": 422 }, { "epoch": 0.011671501050228152, "grad_norm": 0.002728004939854145, "learning_rate": 0.001, "loss": 0.4364, "step": 423 }, { "epoch": 0.011699093251292522, "grad_norm": 0.002871421165764332, "learning_rate": 0.001, "loss": 0.3978, "step": 424 }, { "epoch": 0.011726685452356892, "grad_norm": 0.0021404619328677654, "learning_rate": 0.001, "loss": 0.4034, "step": 425 }, { "epoch": 0.01175427765342126, "grad_norm": 0.0027238999027758837, "learning_rate": 0.001, "loss": 0.4189, "step": 426 }, { "epoch": 0.01178186985448563, "grad_norm": 0.0025113308802247047, "learning_rate": 0.001, "loss": 0.4096, "step": 427 }, { "epoch": 0.011809462055549998, "grad_norm": 0.0029517943039536476, "learning_rate": 0.001, "loss": 0.415, "step": 428 }, { "epoch": 0.011837054256614368, "grad_norm": 0.0023259243462234735, "learning_rate": 0.001, "loss": 0.4034, "step": 429 }, { "epoch": 0.011864646457678736, "grad_norm": 0.002625027671456337, "learning_rate": 0.001, "loss": 0.3656, "step": 430 }, { "epoch": 0.011892238658743106, "grad_norm": 0.0032775115687400103, "learning_rate": 0.001, "loss": 0.4151, "step": 431 }, { "epoch": 0.011919830859807476, "grad_norm": 0.002229101490229368, "learning_rate": 0.001, "loss": 0.4495, "step": 432 }, { "epoch": 0.011947423060871844, "grad_norm": 0.0024553320836275816, "learning_rate": 0.001, "loss": 0.4261, "step": 433 }, { "epoch": 0.011975015261936214, "grad_norm": 0.0025780866853892803, "learning_rate": 0.001, "loss": 0.4094, "step": 434 }, { "epoch": 0.012002607463000583, "grad_norm": 0.003502671839669347, "learning_rate": 0.001, "loss": 0.3563, "step": 435 }, { "epoch": 0.012030199664064952, "grad_norm": 0.004440659191459417, "learning_rate": 0.001, "loss": 0.3973, "step": 436 }, { "epoch": 0.01205779186512932, "grad_norm": 0.0051866755820810795, "learning_rate": 0.001, "loss": 0.4232, "step": 437 }, { "epoch": 0.01208538406619369, "grad_norm": 0.002548003103584051, "learning_rate": 0.001, "loss": 0.4387, "step": 438 }, { "epoch": 0.012112976267258059, "grad_norm": 0.0030665360391139984, "learning_rate": 0.001, "loss": 0.4336, "step": 439 }, { "epoch": 0.012140568468322429, "grad_norm": 0.0032852613367140293, "learning_rate": 0.001, "loss": 0.4213, "step": 440 }, { "epoch": 0.012168160669386799, "grad_norm": 0.0030793712940067053, "learning_rate": 0.001, "loss": 0.4191, "step": 441 }, { "epoch": 0.012195752870451167, "grad_norm": 0.0028317072428762913, "learning_rate": 0.001, "loss": 0.4219, "step": 442 }, { "epoch": 0.012223345071515537, "grad_norm": 0.0029394179582595825, "learning_rate": 0.001, "loss": 0.373, "step": 443 }, { "epoch": 0.012250937272579905, "grad_norm": 0.0032694858964532614, "learning_rate": 0.001, "loss": 0.3938, "step": 444 }, { "epoch": 0.012278529473644275, "grad_norm": 0.002599178347736597, "learning_rate": 0.001, "loss": 0.4012, "step": 445 }, { "epoch": 0.012306121674708643, "grad_norm": 0.0053001102060079575, "learning_rate": 0.001, "loss": 0.393, "step": 446 }, { "epoch": 0.012333713875773013, "grad_norm": 0.0027281581424176693, "learning_rate": 0.001, "loss": 0.4207, "step": 447 }, { "epoch": 0.012361306076837383, "grad_norm": 0.0040309545584023, "learning_rate": 0.001, "loss": 0.3925, "step": 448 }, { "epoch": 0.01238889827790175, "grad_norm": 0.0024766335263848305, "learning_rate": 0.001, "loss": 0.3947, "step": 449 }, { "epoch": 0.01241649047896612, "grad_norm": 0.0033247419632971287, "learning_rate": 0.001, "loss": 0.4369, "step": 450 }, { "epoch": 0.012444082680030489, "grad_norm": 0.0030413721688091755, "learning_rate": 0.001, "loss": 0.4193, "step": 451 }, { "epoch": 0.012471674881094859, "grad_norm": 0.0068315728567540646, "learning_rate": 0.001, "loss": 0.4207, "step": 452 }, { "epoch": 0.012499267082159227, "grad_norm": 0.0032153630163520575, "learning_rate": 0.001, "loss": 0.4242, "step": 453 }, { "epoch": 0.012526859283223597, "grad_norm": 0.003297319170087576, "learning_rate": 0.001, "loss": 0.4082, "step": 454 }, { "epoch": 0.012554451484287967, "grad_norm": 0.003063608892261982, "learning_rate": 0.001, "loss": 0.44, "step": 455 }, { "epoch": 0.012582043685352335, "grad_norm": 0.0030015911906957626, "learning_rate": 0.001, "loss": 0.3988, "step": 456 }, { "epoch": 0.012609635886416705, "grad_norm": 0.0032717676367610693, "learning_rate": 0.001, "loss": 0.3561, "step": 457 }, { "epoch": 0.012637228087481073, "grad_norm": 0.0031338001135736704, "learning_rate": 0.001, "loss": 0.393, "step": 458 }, { "epoch": 0.012664820288545443, "grad_norm": 0.0055600800551474094, "learning_rate": 0.001, "loss": 0.4241, "step": 459 }, { "epoch": 0.012692412489609811, "grad_norm": 0.009609074331820011, "learning_rate": 0.001, "loss": 0.3647, "step": 460 }, { "epoch": 0.012720004690674181, "grad_norm": 0.0031838284339755774, "learning_rate": 0.001, "loss": 0.4308, "step": 461 }, { "epoch": 0.01274759689173855, "grad_norm": 0.002876470098271966, "learning_rate": 0.001, "loss": 0.3771, "step": 462 }, { "epoch": 0.01277518909280292, "grad_norm": 0.0027010440826416016, "learning_rate": 0.001, "loss": 0.4392, "step": 463 }, { "epoch": 0.012802781293867289, "grad_norm": 0.0024778309743851423, "learning_rate": 0.001, "loss": 0.446, "step": 464 }, { "epoch": 0.012830373494931657, "grad_norm": 0.0021515442058444023, "learning_rate": 0.001, "loss": 0.4114, "step": 465 }, { "epoch": 0.012857965695996027, "grad_norm": 0.0023079351522028446, "learning_rate": 0.001, "loss": 0.392, "step": 466 }, { "epoch": 0.012885557897060395, "grad_norm": 0.002527826000005007, "learning_rate": 0.001, "loss": 0.4094, "step": 467 }, { "epoch": 0.012913150098124765, "grad_norm": 0.003819882869720459, "learning_rate": 0.001, "loss": 0.3828, "step": 468 }, { "epoch": 0.012940742299189133, "grad_norm": 0.002980564022436738, "learning_rate": 0.001, "loss": 0.4015, "step": 469 }, { "epoch": 0.012968334500253503, "grad_norm": 0.004125640727579594, "learning_rate": 0.001, "loss": 0.4114, "step": 470 }, { "epoch": 0.012995926701317873, "grad_norm": 0.0025943296495825052, "learning_rate": 0.001, "loss": 0.4019, "step": 471 }, { "epoch": 0.013023518902382241, "grad_norm": 0.002629917813464999, "learning_rate": 0.001, "loss": 0.366, "step": 472 }, { "epoch": 0.013051111103446611, "grad_norm": 0.0021362872794270515, "learning_rate": 0.001, "loss": 0.3903, "step": 473 }, { "epoch": 0.01307870330451098, "grad_norm": 0.002968277782201767, "learning_rate": 0.001, "loss": 0.4124, "step": 474 }, { "epoch": 0.01310629550557535, "grad_norm": 0.002851092955097556, "learning_rate": 0.001, "loss": 0.3988, "step": 475 }, { "epoch": 0.013133887706639718, "grad_norm": 0.0023420127108693123, "learning_rate": 0.001, "loss": 0.3874, "step": 476 }, { "epoch": 0.013161479907704087, "grad_norm": 0.003359900787472725, "learning_rate": 0.001, "loss": 0.4202, "step": 477 }, { "epoch": 0.013189072108768457, "grad_norm": 0.0038361374754458666, "learning_rate": 0.001, "loss": 0.392, "step": 478 }, { "epoch": 0.013216664309832826, "grad_norm": 0.003093348117545247, "learning_rate": 0.001, "loss": 0.4009, "step": 479 }, { "epoch": 0.013244256510897195, "grad_norm": 0.003098709974437952, "learning_rate": 0.001, "loss": 0.3942, "step": 480 }, { "epoch": 0.013271848711961564, "grad_norm": 0.002880271291360259, "learning_rate": 0.001, "loss": 0.4004, "step": 481 }, { "epoch": 0.013299440913025934, "grad_norm": 0.005491374060511589, "learning_rate": 0.001, "loss": 0.3652, "step": 482 }, { "epoch": 0.013327033114090302, "grad_norm": 0.0029838404152542353, "learning_rate": 0.001, "loss": 0.4022, "step": 483 }, { "epoch": 0.013354625315154672, "grad_norm": 0.00296381744556129, "learning_rate": 0.001, "loss": 0.4335, "step": 484 }, { "epoch": 0.013382217516219042, "grad_norm": 0.0037699625827372074, "learning_rate": 0.001, "loss": 0.3784, "step": 485 }, { "epoch": 0.01340980971728341, "grad_norm": 0.0022699085529893637, "learning_rate": 0.001, "loss": 0.3953, "step": 486 }, { "epoch": 0.01343740191834778, "grad_norm": 0.003976910375058651, "learning_rate": 0.001, "loss": 0.4096, "step": 487 }, { "epoch": 0.013464994119412148, "grad_norm": 0.003803370287641883, "learning_rate": 0.001, "loss": 0.3852, "step": 488 }, { "epoch": 0.013492586320476518, "grad_norm": 0.0036107334308326244, "learning_rate": 0.001, "loss": 0.3878, "step": 489 }, { "epoch": 0.013520178521540886, "grad_norm": 0.0034353930968791246, "learning_rate": 0.001, "loss": 0.3992, "step": 490 }, { "epoch": 0.013547770722605256, "grad_norm": 0.006415998097509146, "learning_rate": 0.001, "loss": 0.4034, "step": 491 }, { "epoch": 0.013575362923669624, "grad_norm": 0.0031678853556513786, "learning_rate": 0.001, "loss": 0.4182, "step": 492 }, { "epoch": 0.013602955124733994, "grad_norm": 0.003964356612414122, "learning_rate": 0.001, "loss": 0.4311, "step": 493 }, { "epoch": 0.013630547325798364, "grad_norm": 0.004771376959979534, "learning_rate": 0.001, "loss": 0.3865, "step": 494 }, { "epoch": 0.013658139526862732, "grad_norm": 0.0035825970117002726, "learning_rate": 0.001, "loss": 0.3672, "step": 495 }, { "epoch": 0.013685731727927102, "grad_norm": 0.0036343352403491735, "learning_rate": 0.001, "loss": 0.4152, "step": 496 }, { "epoch": 0.01371332392899147, "grad_norm": 0.002659760881215334, "learning_rate": 0.001, "loss": 0.4029, "step": 497 }, { "epoch": 0.01374091613005584, "grad_norm": 0.003316852729767561, "learning_rate": 0.001, "loss": 0.4527, "step": 498 }, { "epoch": 0.013768508331120208, "grad_norm": 0.0034902123734354973, "learning_rate": 0.001, "loss": 0.4, "step": 499 }, { "epoch": 0.013796100532184578, "grad_norm": 0.002378157339990139, "learning_rate": 0.001, "loss": 0.3903, "step": 500 }, { "epoch": 0.013796100532184578, "eval_runtime": 25.0496, "eval_samples_per_second": 1.277, "eval_steps_per_second": 0.16, "step": 500 }, { "epoch": 0.013823692733248948, "grad_norm": 0.005439688451588154, "learning_rate": 0.001, "loss": 0.4152, "step": 501 }, { "epoch": 0.013851284934313316, "grad_norm": 0.0026094792410731316, "learning_rate": 0.001, "loss": 0.3904, "step": 502 }, { "epoch": 0.013878877135377686, "grad_norm": 0.002377827186137438, "learning_rate": 0.001, "loss": 0.4233, "step": 503 }, { "epoch": 0.013906469336442054, "grad_norm": 0.004557560198009014, "learning_rate": 0.001, "loss": 0.4086, "step": 504 }, { "epoch": 0.013934061537506424, "grad_norm": 0.0030535697005689144, "learning_rate": 0.001, "loss": 0.388, "step": 505 }, { "epoch": 0.013961653738570792, "grad_norm": 0.0032748053781688213, "learning_rate": 0.001, "loss": 0.386, "step": 506 }, { "epoch": 0.013989245939635162, "grad_norm": 0.003947910387068987, "learning_rate": 0.001, "loss": 0.4099, "step": 507 }, { "epoch": 0.014016838140699532, "grad_norm": 0.0033470303751528263, "learning_rate": 0.001, "loss": 0.4139, "step": 508 }, { "epoch": 0.0140444303417639, "grad_norm": 0.011195505037903786, "learning_rate": 0.001, "loss": 0.3807, "step": 509 }, { "epoch": 0.01407202254282827, "grad_norm": 0.0028728062752634287, "learning_rate": 0.001, "loss": 0.4007, "step": 510 }, { "epoch": 0.014099614743892638, "grad_norm": 0.0035205583553761244, "learning_rate": 0.001, "loss": 0.3804, "step": 511 }, { "epoch": 0.014127206944957008, "grad_norm": 0.003692334285005927, "learning_rate": 0.001, "loss": 0.3984, "step": 512 }, { "epoch": 0.014154799146021376, "grad_norm": 0.004000712651759386, "learning_rate": 0.001, "loss": 0.4239, "step": 513 }, { "epoch": 0.014182391347085746, "grad_norm": 0.0022262849379330873, "learning_rate": 0.001, "loss": 0.3848, "step": 514 }, { "epoch": 0.014209983548150115, "grad_norm": 0.005194266326725483, "learning_rate": 0.001, "loss": 0.3673, "step": 515 }, { "epoch": 0.014237575749214484, "grad_norm": 0.0022841247264295816, "learning_rate": 0.001, "loss": 0.4089, "step": 516 }, { "epoch": 0.014265167950278854, "grad_norm": 0.003351402934640646, "learning_rate": 0.001, "loss": 0.4278, "step": 517 }, { "epoch": 0.014292760151343223, "grad_norm": 0.0031243113335222006, "learning_rate": 0.001, "loss": 0.3712, "step": 518 }, { "epoch": 0.014320352352407592, "grad_norm": 0.003822315251454711, "learning_rate": 0.001, "loss": 0.4124, "step": 519 }, { "epoch": 0.01434794455347196, "grad_norm": 0.0023426790721714497, "learning_rate": 0.001, "loss": 0.3997, "step": 520 }, { "epoch": 0.01437553675453633, "grad_norm": 0.0027026657480746508, "learning_rate": 0.001, "loss": 0.373, "step": 521 }, { "epoch": 0.014403128955600699, "grad_norm": 0.0023081700783222914, "learning_rate": 0.001, "loss": 0.4221, "step": 522 }, { "epoch": 0.014430721156665069, "grad_norm": 0.003079401096329093, "learning_rate": 0.001, "loss": 0.4128, "step": 523 }, { "epoch": 0.014458313357729439, "grad_norm": 0.0027522866148501635, "learning_rate": 0.001, "loss": 0.397, "step": 524 }, { "epoch": 0.014485905558793807, "grad_norm": 0.002710319822654128, "learning_rate": 0.001, "loss": 0.4254, "step": 525 }, { "epoch": 0.014513497759858177, "grad_norm": 0.002838405082002282, "learning_rate": 0.001, "loss": 0.4054, "step": 526 }, { "epoch": 0.014541089960922545, "grad_norm": 0.0028795620892196894, "learning_rate": 0.001, "loss": 0.4186, "step": 527 }, { "epoch": 0.014568682161986915, "grad_norm": 0.002826494164764881, "learning_rate": 0.001, "loss": 0.4381, "step": 528 }, { "epoch": 0.014596274363051283, "grad_norm": 0.002902933629229665, "learning_rate": 0.001, "loss": 0.4003, "step": 529 }, { "epoch": 0.014623866564115653, "grad_norm": 0.0034344778396189213, "learning_rate": 0.001, "loss": 0.4149, "step": 530 }, { "epoch": 0.014651458765180023, "grad_norm": 0.002652675611898303, "learning_rate": 0.001, "loss": 0.4075, "step": 531 }, { "epoch": 0.01467905096624439, "grad_norm": 0.00368179427459836, "learning_rate": 0.001, "loss": 0.3899, "step": 532 }, { "epoch": 0.01470664316730876, "grad_norm": 0.0028401080053299665, "learning_rate": 0.001, "loss": 0.4247, "step": 533 }, { "epoch": 0.014734235368373129, "grad_norm": 0.0047442615032196045, "learning_rate": 0.001, "loss": 0.4046, "step": 534 }, { "epoch": 0.014761827569437499, "grad_norm": 0.0031780744902789593, "learning_rate": 0.001, "loss": 0.4043, "step": 535 }, { "epoch": 0.014789419770501867, "grad_norm": 0.004496684763580561, "learning_rate": 0.001, "loss": 0.3824, "step": 536 }, { "epoch": 0.014817011971566237, "grad_norm": 0.0026886628475040197, "learning_rate": 0.001, "loss": 0.4136, "step": 537 }, { "epoch": 0.014844604172630607, "grad_norm": 0.004109974484890699, "learning_rate": 0.001, "loss": 0.4547, "step": 538 }, { "epoch": 0.014872196373694975, "grad_norm": 0.004985136911273003, "learning_rate": 0.001, "loss": 0.4171, "step": 539 }, { "epoch": 0.014899788574759345, "grad_norm": 0.00309336488135159, "learning_rate": 0.001, "loss": 0.4374, "step": 540 }, { "epoch": 0.014927380775823713, "grad_norm": 0.0031439203303307295, "learning_rate": 0.001, "loss": 0.4415, "step": 541 }, { "epoch": 0.014954972976888083, "grad_norm": 0.0027853951323777437, "learning_rate": 0.001, "loss": 0.4088, "step": 542 }, { "epoch": 0.014982565177952451, "grad_norm": 0.0026657688431441784, "learning_rate": 0.001, "loss": 0.4178, "step": 543 }, { "epoch": 0.015010157379016821, "grad_norm": 0.01247773040086031, "learning_rate": 0.001, "loss": 0.3682, "step": 544 }, { "epoch": 0.01503774958008119, "grad_norm": 0.002995160175487399, "learning_rate": 0.001, "loss": 0.4147, "step": 545 }, { "epoch": 0.01506534178114556, "grad_norm": 0.002868333365768194, "learning_rate": 0.001, "loss": 0.435, "step": 546 }, { "epoch": 0.015092933982209929, "grad_norm": 0.004190264735370874, "learning_rate": 0.001, "loss": 0.3718, "step": 547 }, { "epoch": 0.015120526183274297, "grad_norm": 0.003989442717283964, "learning_rate": 0.001, "loss": 0.4276, "step": 548 }, { "epoch": 0.015148118384338667, "grad_norm": 0.0031920846085995436, "learning_rate": 0.001, "loss": 0.428, "step": 549 }, { "epoch": 0.015175710585403035, "grad_norm": 0.0037179081700742245, "learning_rate": 0.001, "loss": 0.3972, "step": 550 }, { "epoch": 0.015203302786467405, "grad_norm": 0.003381606424227357, "learning_rate": 0.001, "loss": 0.3809, "step": 551 }, { "epoch": 0.015230894987531773, "grad_norm": 0.0029906374402344227, "learning_rate": 0.001, "loss": 0.4199, "step": 552 }, { "epoch": 0.015258487188596143, "grad_norm": 0.0036886725574731827, "learning_rate": 0.001, "loss": 0.4128, "step": 553 }, { "epoch": 0.015286079389660513, "grad_norm": 0.0037765917368233204, "learning_rate": 0.001, "loss": 0.4126, "step": 554 }, { "epoch": 0.015313671590724881, "grad_norm": 0.00531379971653223, "learning_rate": 0.001, "loss": 0.4103, "step": 555 }, { "epoch": 0.015341263791789251, "grad_norm": 0.003965814132243395, "learning_rate": 0.001, "loss": 0.4012, "step": 556 }, { "epoch": 0.01536885599285362, "grad_norm": 0.0031218293588608503, "learning_rate": 0.001, "loss": 0.4285, "step": 557 }, { "epoch": 0.01539644819391799, "grad_norm": 0.0027389719616621733, "learning_rate": 0.001, "loss": 0.356, "step": 558 }, { "epoch": 0.015424040394982358, "grad_norm": 0.003185281064361334, "learning_rate": 0.001, "loss": 0.4471, "step": 559 }, { "epoch": 0.015451632596046727, "grad_norm": 0.003925340250134468, "learning_rate": 0.001, "loss": 0.4023, "step": 560 }, { "epoch": 0.015479224797111097, "grad_norm": 0.0033257382456213236, "learning_rate": 0.001, "loss": 0.3707, "step": 561 }, { "epoch": 0.015506816998175466, "grad_norm": 0.0033314244356006384, "learning_rate": 0.001, "loss": 0.405, "step": 562 }, { "epoch": 0.015534409199239835, "grad_norm": 0.0034566351678222418, "learning_rate": 0.001, "loss": 0.396, "step": 563 }, { "epoch": 0.015562001400304204, "grad_norm": 0.0038912983145564795, "learning_rate": 0.001, "loss": 0.3906, "step": 564 }, { "epoch": 0.015589593601368574, "grad_norm": 0.004165092017501593, "learning_rate": 0.001, "loss": 0.3926, "step": 565 }, { "epoch": 0.015617185802432942, "grad_norm": 0.004660595208406448, "learning_rate": 0.001, "loss": 0.3888, "step": 566 }, { "epoch": 0.01564477800349731, "grad_norm": 0.0033354307524859905, "learning_rate": 0.001, "loss": 0.372, "step": 567 }, { "epoch": 0.01567237020456168, "grad_norm": 0.002619788981974125, "learning_rate": 0.001, "loss": 0.3913, "step": 568 }, { "epoch": 0.01569996240562605, "grad_norm": 0.004775674548000097, "learning_rate": 0.001, "loss": 0.3792, "step": 569 }, { "epoch": 0.015727554606690418, "grad_norm": 0.0030274325981736183, "learning_rate": 0.001, "loss": 0.4075, "step": 570 }, { "epoch": 0.015755146807754788, "grad_norm": 0.0031505031511187553, "learning_rate": 0.001, "loss": 0.4031, "step": 571 }, { "epoch": 0.015782739008819158, "grad_norm": 0.005169673822820187, "learning_rate": 0.001, "loss": 0.3862, "step": 572 }, { "epoch": 0.015810331209883528, "grad_norm": 0.002902763895690441, "learning_rate": 0.001, "loss": 0.3947, "step": 573 }, { "epoch": 0.015837923410947894, "grad_norm": 0.0037871890235692263, "learning_rate": 0.001, "loss": 0.3957, "step": 574 }, { "epoch": 0.015865515612012264, "grad_norm": 0.004014831036329269, "learning_rate": 0.001, "loss": 0.4278, "step": 575 }, { "epoch": 0.015893107813076634, "grad_norm": 0.004346000496298075, "learning_rate": 0.001, "loss": 0.3972, "step": 576 }, { "epoch": 0.015920700014141004, "grad_norm": 0.0033372112084180117, "learning_rate": 0.001, "loss": 0.4178, "step": 577 }, { "epoch": 0.015948292215205374, "grad_norm": 0.0031993126031011343, "learning_rate": 0.001, "loss": 0.3842, "step": 578 }, { "epoch": 0.01597588441626974, "grad_norm": 0.0026338782627135515, "learning_rate": 0.001, "loss": 0.422, "step": 579 }, { "epoch": 0.01600347661733411, "grad_norm": 0.003543695667758584, "learning_rate": 0.001, "loss": 0.385, "step": 580 }, { "epoch": 0.01603106881839848, "grad_norm": 0.003536064876243472, "learning_rate": 0.001, "loss": 0.3892, "step": 581 }, { "epoch": 0.01605866101946285, "grad_norm": 0.0031016895081847906, "learning_rate": 0.001, "loss": 0.4282, "step": 582 }, { "epoch": 0.016086253220527216, "grad_norm": 0.004082076251506805, "learning_rate": 0.001, "loss": 0.3919, "step": 583 }, { "epoch": 0.016113845421591586, "grad_norm": 0.003274913877248764, "learning_rate": 0.001, "loss": 0.4003, "step": 584 }, { "epoch": 0.016141437622655956, "grad_norm": 0.003890890395268798, "learning_rate": 0.001, "loss": 0.4164, "step": 585 }, { "epoch": 0.016169029823720326, "grad_norm": 0.0035965372808277607, "learning_rate": 0.001, "loss": 0.3856, "step": 586 }, { "epoch": 0.016196622024784696, "grad_norm": 0.02042904868721962, "learning_rate": 0.001, "loss": 0.3851, "step": 587 }, { "epoch": 0.016224214225849062, "grad_norm": 0.00438309321179986, "learning_rate": 0.001, "loss": 0.363, "step": 588 }, { "epoch": 0.016251806426913432, "grad_norm": 0.004080008715391159, "learning_rate": 0.001, "loss": 0.4066, "step": 589 }, { "epoch": 0.016279398627977802, "grad_norm": 0.004915320780128241, "learning_rate": 0.001, "loss": 0.3786, "step": 590 }, { "epoch": 0.016306990829042172, "grad_norm": 0.0037770781200379133, "learning_rate": 0.001, "loss": 0.451, "step": 591 }, { "epoch": 0.016334583030106542, "grad_norm": 0.0039979214780032635, "learning_rate": 0.001, "loss": 0.3812, "step": 592 }, { "epoch": 0.01636217523117091, "grad_norm": 0.0035175576340407133, "learning_rate": 0.001, "loss": 0.3929, "step": 593 }, { "epoch": 0.01638976743223528, "grad_norm": 0.003597306553274393, "learning_rate": 0.001, "loss": 0.3986, "step": 594 }, { "epoch": 0.016417359633299648, "grad_norm": 0.002302913460880518, "learning_rate": 0.001, "loss": 0.423, "step": 595 }, { "epoch": 0.016444951834364018, "grad_norm": 0.0026150753255933523, "learning_rate": 0.001, "loss": 0.3875, "step": 596 }, { "epoch": 0.016472544035428385, "grad_norm": 0.002459397306665778, "learning_rate": 0.001, "loss": 0.4173, "step": 597 }, { "epoch": 0.016500136236492755, "grad_norm": 0.00271104765124619, "learning_rate": 0.001, "loss": 0.417, "step": 598 }, { "epoch": 0.016527728437557124, "grad_norm": 0.003122537862509489, "learning_rate": 0.001, "loss": 0.383, "step": 599 }, { "epoch": 0.016555320638621494, "grad_norm": 0.0028217623475939035, "learning_rate": 0.001, "loss": 0.4076, "step": 600 }, { "epoch": 0.016582912839685864, "grad_norm": 0.0029432664159685373, "learning_rate": 0.001, "loss": 0.4022, "step": 601 }, { "epoch": 0.01661050504075023, "grad_norm": 0.0030998094007372856, "learning_rate": 0.001, "loss": 0.4058, "step": 602 }, { "epoch": 0.0166380972418146, "grad_norm": 0.005838985554873943, "learning_rate": 0.001, "loss": 0.3998, "step": 603 }, { "epoch": 0.01666568944287897, "grad_norm": 0.003850255161523819, "learning_rate": 0.001, "loss": 0.3664, "step": 604 }, { "epoch": 0.01669328164394334, "grad_norm": 0.0026384994853287935, "learning_rate": 0.001, "loss": 0.4035, "step": 605 }, { "epoch": 0.016720873845007707, "grad_norm": 0.003034224035218358, "learning_rate": 0.001, "loss": 0.4005, "step": 606 }, { "epoch": 0.016748466046072077, "grad_norm": 0.003741856198757887, "learning_rate": 0.001, "loss": 0.3892, "step": 607 }, { "epoch": 0.016776058247136447, "grad_norm": 0.0029668582137674093, "learning_rate": 0.001, "loss": 0.4492, "step": 608 }, { "epoch": 0.016803650448200817, "grad_norm": 0.006931662559509277, "learning_rate": 0.001, "loss": 0.3865, "step": 609 }, { "epoch": 0.016831242649265186, "grad_norm": 0.005429959390312433, "learning_rate": 0.001, "loss": 0.3945, "step": 610 }, { "epoch": 0.016858834850329553, "grad_norm": 0.008053823374211788, "learning_rate": 0.001, "loss": 0.3765, "step": 611 }, { "epoch": 0.016886427051393923, "grad_norm": 0.0056150988675653934, "learning_rate": 0.001, "loss": 0.3773, "step": 612 }, { "epoch": 0.016914019252458293, "grad_norm": 0.005078485235571861, "learning_rate": 0.001, "loss": 0.3796, "step": 613 }, { "epoch": 0.016941611453522663, "grad_norm": 0.0043738181702792645, "learning_rate": 0.001, "loss": 0.4368, "step": 614 }, { "epoch": 0.016969203654587033, "grad_norm": 0.004265911877155304, "learning_rate": 0.001, "loss": 0.4181, "step": 615 }, { "epoch": 0.0169967958556514, "grad_norm": 0.004496406763792038, "learning_rate": 0.001, "loss": 0.3882, "step": 616 }, { "epoch": 0.01702438805671577, "grad_norm": 0.004225629381835461, "learning_rate": 0.001, "loss": 0.4094, "step": 617 }, { "epoch": 0.01705198025778014, "grad_norm": 0.004265735857188702, "learning_rate": 0.001, "loss": 0.3624, "step": 618 }, { "epoch": 0.01707957245884451, "grad_norm": 0.0045761000365018845, "learning_rate": 0.001, "loss": 0.4298, "step": 619 }, { "epoch": 0.017107164659908875, "grad_norm": 0.0046028876677155495, "learning_rate": 0.001, "loss": 0.4366, "step": 620 }, { "epoch": 0.017134756860973245, "grad_norm": 0.003382893279194832, "learning_rate": 0.001, "loss": 0.385, "step": 621 }, { "epoch": 0.017162349062037615, "grad_norm": 0.003314296016469598, "learning_rate": 0.001, "loss": 0.3811, "step": 622 }, { "epoch": 0.017189941263101985, "grad_norm": 0.0035887316334992647, "learning_rate": 0.001, "loss": 0.4035, "step": 623 }, { "epoch": 0.017217533464166355, "grad_norm": 0.003954799845814705, "learning_rate": 0.001, "loss": 0.4236, "step": 624 }, { "epoch": 0.01724512566523072, "grad_norm": 0.003979894332587719, "learning_rate": 0.001, "loss": 0.3853, "step": 625 }, { "epoch": 0.01727271786629509, "grad_norm": 0.0029897873755544424, "learning_rate": 0.001, "loss": 0.3907, "step": 626 }, { "epoch": 0.01730031006735946, "grad_norm": 0.0038952503819018602, "learning_rate": 0.001, "loss": 0.4244, "step": 627 }, { "epoch": 0.01732790226842383, "grad_norm": 0.004283791407942772, "learning_rate": 0.001, "loss": 0.3884, "step": 628 }, { "epoch": 0.0173554944694882, "grad_norm": 0.004268042277544737, "learning_rate": 0.001, "loss": 0.4143, "step": 629 }, { "epoch": 0.017383086670552567, "grad_norm": 0.0034829305950552225, "learning_rate": 0.001, "loss": 0.4026, "step": 630 }, { "epoch": 0.017410678871616937, "grad_norm": 0.004891110584139824, "learning_rate": 0.001, "loss": 0.4131, "step": 631 }, { "epoch": 0.017438271072681307, "grad_norm": 0.009508724324405193, "learning_rate": 0.001, "loss": 0.4033, "step": 632 }, { "epoch": 0.017465863273745677, "grad_norm": 0.003979192115366459, "learning_rate": 0.001, "loss": 0.3876, "step": 633 }, { "epoch": 0.017493455474810043, "grad_norm": 0.006190054584294558, "learning_rate": 0.001, "loss": 0.4007, "step": 634 }, { "epoch": 0.017521047675874413, "grad_norm": 0.003986046649515629, "learning_rate": 0.001, "loss": 0.4067, "step": 635 }, { "epoch": 0.017548639876938783, "grad_norm": 0.002939463360235095, "learning_rate": 0.001, "loss": 0.4167, "step": 636 }, { "epoch": 0.017576232078003153, "grad_norm": 0.005479493178427219, "learning_rate": 0.001, "loss": 0.3996, "step": 637 }, { "epoch": 0.017603824279067523, "grad_norm": 0.00527595542371273, "learning_rate": 0.001, "loss": 0.4107, "step": 638 }, { "epoch": 0.01763141648013189, "grad_norm": 0.004213378299027681, "learning_rate": 0.001, "loss": 0.3912, "step": 639 }, { "epoch": 0.01765900868119626, "grad_norm": 0.004745858255773783, "learning_rate": 0.001, "loss": 0.4167, "step": 640 }, { "epoch": 0.01768660088226063, "grad_norm": 0.0038344142958521843, "learning_rate": 0.001, "loss": 0.3954, "step": 641 }, { "epoch": 0.017714193083325, "grad_norm": 0.004831280559301376, "learning_rate": 0.001, "loss": 0.3904, "step": 642 }, { "epoch": 0.017741785284389366, "grad_norm": 0.0035769357345998287, "learning_rate": 0.001, "loss": 0.3951, "step": 643 }, { "epoch": 0.017769377485453736, "grad_norm": 0.00572391739115119, "learning_rate": 0.001, "loss": 0.4168, "step": 644 }, { "epoch": 0.017796969686518106, "grad_norm": 0.005403329152613878, "learning_rate": 0.001, "loss": 0.4049, "step": 645 }, { "epoch": 0.017824561887582475, "grad_norm": 0.003341693663969636, "learning_rate": 0.001, "loss": 0.3905, "step": 646 }, { "epoch": 0.017852154088646845, "grad_norm": 0.00487833097577095, "learning_rate": 0.001, "loss": 0.3853, "step": 647 }, { "epoch": 0.017879746289711212, "grad_norm": 0.004141534212976694, "learning_rate": 0.001, "loss": 0.3956, "step": 648 }, { "epoch": 0.01790733849077558, "grad_norm": 0.004081532824784517, "learning_rate": 0.001, "loss": 0.3974, "step": 649 }, { "epoch": 0.01793493069183995, "grad_norm": 0.004535014741122723, "learning_rate": 0.001, "loss": 0.4088, "step": 650 }, { "epoch": 0.01796252289290432, "grad_norm": 0.003384925192221999, "learning_rate": 0.001, "loss": 0.3795, "step": 651 }, { "epoch": 0.01799011509396869, "grad_norm": 0.003582439851015806, "learning_rate": 0.001, "loss": 0.3926, "step": 652 }, { "epoch": 0.018017707295033058, "grad_norm": 0.0033751516602933407, "learning_rate": 0.001, "loss": 0.3909, "step": 653 }, { "epoch": 0.018045299496097428, "grad_norm": 0.004208603408187628, "learning_rate": 0.001, "loss": 0.4124, "step": 654 }, { "epoch": 0.018072891697161798, "grad_norm": 0.005743230227380991, "learning_rate": 0.001, "loss": 0.3685, "step": 655 }, { "epoch": 0.018100483898226168, "grad_norm": 0.00536683714017272, "learning_rate": 0.001, "loss": 0.3488, "step": 656 }, { "epoch": 0.018128076099290534, "grad_norm": 0.003961362410336733, "learning_rate": 0.001, "loss": 0.4241, "step": 657 }, { "epoch": 0.018155668300354904, "grad_norm": 0.004491451662033796, "learning_rate": 0.001, "loss": 0.3928, "step": 658 }, { "epoch": 0.018183260501419274, "grad_norm": 0.005195736885070801, "learning_rate": 0.001, "loss": 0.406, "step": 659 }, { "epoch": 0.018210852702483644, "grad_norm": 0.003914379980415106, "learning_rate": 0.001, "loss": 0.3541, "step": 660 }, { "epoch": 0.018238444903548014, "grad_norm": 0.00485312519595027, "learning_rate": 0.001, "loss": 0.3992, "step": 661 }, { "epoch": 0.01826603710461238, "grad_norm": 0.00337631581351161, "learning_rate": 0.001, "loss": 0.4469, "step": 662 }, { "epoch": 0.01829362930567675, "grad_norm": 0.00616971543058753, "learning_rate": 0.001, "loss": 0.3783, "step": 663 }, { "epoch": 0.01832122150674112, "grad_norm": 0.0032724293414503336, "learning_rate": 0.001, "loss": 0.3717, "step": 664 }, { "epoch": 0.01834881370780549, "grad_norm": 0.003196743782609701, "learning_rate": 0.001, "loss": 0.39, "step": 665 }, { "epoch": 0.018376405908869856, "grad_norm": 0.00492246774956584, "learning_rate": 0.001, "loss": 0.4178, "step": 666 }, { "epoch": 0.018403998109934226, "grad_norm": 0.0029899398796260357, "learning_rate": 0.001, "loss": 0.4323, "step": 667 }, { "epoch": 0.018431590310998596, "grad_norm": 0.00527157774195075, "learning_rate": 0.001, "loss": 0.4103, "step": 668 }, { "epoch": 0.018459182512062966, "grad_norm": 0.002288073068484664, "learning_rate": 0.001, "loss": 0.407, "step": 669 }, { "epoch": 0.018486774713127336, "grad_norm": 0.0031164512038230896, "learning_rate": 0.001, "loss": 0.4287, "step": 670 }, { "epoch": 0.018514366914191702, "grad_norm": 0.0031951169949024916, "learning_rate": 0.001, "loss": 0.3753, "step": 671 }, { "epoch": 0.018541959115256072, "grad_norm": 0.0035726516507565975, "learning_rate": 0.001, "loss": 0.4032, "step": 672 }, { "epoch": 0.018569551316320442, "grad_norm": 0.0036449262406677008, "learning_rate": 0.001, "loss": 0.4154, "step": 673 }, { "epoch": 0.018597143517384812, "grad_norm": 0.003955410327762365, "learning_rate": 0.001, "loss": 0.3787, "step": 674 }, { "epoch": 0.018624735718449182, "grad_norm": 0.0030794877093285322, "learning_rate": 0.001, "loss": 0.3982, "step": 675 }, { "epoch": 0.01865232791951355, "grad_norm": 0.00874530989676714, "learning_rate": 0.001, "loss": 0.3859, "step": 676 }, { "epoch": 0.01867992012057792, "grad_norm": 0.0042561995796859264, "learning_rate": 0.001, "loss": 0.4258, "step": 677 }, { "epoch": 0.018707512321642288, "grad_norm": 0.00404794467613101, "learning_rate": 0.001, "loss": 0.4028, "step": 678 }, { "epoch": 0.018735104522706658, "grad_norm": 0.004145196173340082, "learning_rate": 0.001, "loss": 0.4131, "step": 679 }, { "epoch": 0.018762696723771025, "grad_norm": 0.00520210200920701, "learning_rate": 0.001, "loss": 0.375, "step": 680 }, { "epoch": 0.018790288924835395, "grad_norm": 0.004079956095665693, "learning_rate": 0.001, "loss": 0.377, "step": 681 }, { "epoch": 0.018817881125899764, "grad_norm": 0.0040541659109294415, "learning_rate": 0.001, "loss": 0.3939, "step": 682 }, { "epoch": 0.018845473326964134, "grad_norm": 0.004334924276918173, "learning_rate": 0.001, "loss": 0.414, "step": 683 }, { "epoch": 0.018873065528028504, "grad_norm": 0.0037169347051531076, "learning_rate": 0.001, "loss": 0.4317, "step": 684 }, { "epoch": 0.01890065772909287, "grad_norm": 0.005052134394645691, "learning_rate": 0.001, "loss": 0.3915, "step": 685 }, { "epoch": 0.01892824993015724, "grad_norm": 0.003371543250977993, "learning_rate": 0.001, "loss": 0.4131, "step": 686 }, { "epoch": 0.01895584213122161, "grad_norm": 0.002939490368589759, "learning_rate": 0.001, "loss": 0.3933, "step": 687 }, { "epoch": 0.01898343433228598, "grad_norm": 0.003637490328401327, "learning_rate": 0.001, "loss": 0.3654, "step": 688 }, { "epoch": 0.019011026533350347, "grad_norm": 0.003198450431227684, "learning_rate": 0.001, "loss": 0.4128, "step": 689 }, { "epoch": 0.019038618734414717, "grad_norm": 0.004216643515974283, "learning_rate": 0.001, "loss": 0.4033, "step": 690 }, { "epoch": 0.019066210935479087, "grad_norm": 0.004692459478974342, "learning_rate": 0.001, "loss": 0.3592, "step": 691 }, { "epoch": 0.019093803136543457, "grad_norm": 0.0043529337272048, "learning_rate": 0.001, "loss": 0.4099, "step": 692 }, { "epoch": 0.019121395337607826, "grad_norm": 0.004598288331180811, "learning_rate": 0.001, "loss": 0.3994, "step": 693 }, { "epoch": 0.019148987538672193, "grad_norm": 0.0035506533458828926, "learning_rate": 0.001, "loss": 0.3788, "step": 694 }, { "epoch": 0.019176579739736563, "grad_norm": 0.0035839497577399015, "learning_rate": 0.001, "loss": 0.4209, "step": 695 }, { "epoch": 0.019204171940800933, "grad_norm": 0.005143923219293356, "learning_rate": 0.001, "loss": 0.4011, "step": 696 }, { "epoch": 0.019231764141865303, "grad_norm": 0.003638223512098193, "learning_rate": 0.001, "loss": 0.4007, "step": 697 }, { "epoch": 0.019259356342929673, "grad_norm": 0.003875188762322068, "learning_rate": 0.001, "loss": 0.3868, "step": 698 }, { "epoch": 0.01928694854399404, "grad_norm": 0.004542836919426918, "learning_rate": 0.001, "loss": 0.4026, "step": 699 }, { "epoch": 0.01931454074505841, "grad_norm": 0.00382284470833838, "learning_rate": 0.001, "loss": 0.4094, "step": 700 }, { "epoch": 0.01934213294612278, "grad_norm": 0.004041844513267279, "learning_rate": 0.001, "loss": 0.4014, "step": 701 }, { "epoch": 0.01936972514718715, "grad_norm": 0.0031541790813207626, "learning_rate": 0.001, "loss": 0.4363, "step": 702 }, { "epoch": 0.019397317348251515, "grad_norm": 0.00377883343026042, "learning_rate": 0.001, "loss": 0.3953, "step": 703 }, { "epoch": 0.019424909549315885, "grad_norm": 0.0043754116632044315, "learning_rate": 0.001, "loss": 0.4191, "step": 704 }, { "epoch": 0.019452501750380255, "grad_norm": 0.0037639569491147995, "learning_rate": 0.001, "loss": 0.399, "step": 705 }, { "epoch": 0.019480093951444625, "grad_norm": 0.003134689759463072, "learning_rate": 0.001, "loss": 0.3875, "step": 706 }, { "epoch": 0.019507686152508995, "grad_norm": 0.0035348469391465187, "learning_rate": 0.001, "loss": 0.3759, "step": 707 }, { "epoch": 0.01953527835357336, "grad_norm": 0.0043303752318024635, "learning_rate": 0.001, "loss": 0.4274, "step": 708 }, { "epoch": 0.01956287055463773, "grad_norm": 0.004089695867151022, "learning_rate": 0.001, "loss": 0.4217, "step": 709 }, { "epoch": 0.0195904627557021, "grad_norm": 0.004323967732489109, "learning_rate": 0.001, "loss": 0.4269, "step": 710 }, { "epoch": 0.01961805495676647, "grad_norm": 0.003221880178898573, "learning_rate": 0.001, "loss": 0.4217, "step": 711 }, { "epoch": 0.019645647157830837, "grad_norm": 0.004038861952722073, "learning_rate": 0.001, "loss": 0.3987, "step": 712 }, { "epoch": 0.019673239358895207, "grad_norm": 0.00399352191016078, "learning_rate": 0.001, "loss": 0.4151, "step": 713 }, { "epoch": 0.019700831559959577, "grad_norm": 0.003673392115160823, "learning_rate": 0.001, "loss": 0.408, "step": 714 }, { "epoch": 0.019728423761023947, "grad_norm": 0.004011491779237986, "learning_rate": 0.001, "loss": 0.4067, "step": 715 }, { "epoch": 0.019756015962088317, "grad_norm": 0.005186786409467459, "learning_rate": 0.001, "loss": 0.4198, "step": 716 }, { "epoch": 0.019783608163152683, "grad_norm": 0.005963923409581184, "learning_rate": 0.001, "loss": 0.3882, "step": 717 }, { "epoch": 0.019811200364217053, "grad_norm": 0.004075867123901844, "learning_rate": 0.001, "loss": 0.4152, "step": 718 }, { "epoch": 0.019838792565281423, "grad_norm": 0.0034671409521251917, "learning_rate": 0.001, "loss": 0.4068, "step": 719 }, { "epoch": 0.019866384766345793, "grad_norm": 0.004385409411042929, "learning_rate": 0.001, "loss": 0.3911, "step": 720 }, { "epoch": 0.019893976967410163, "grad_norm": 0.0036052153445780277, "learning_rate": 0.001, "loss": 0.4123, "step": 721 }, { "epoch": 0.01992156916847453, "grad_norm": 0.0038659870624542236, "learning_rate": 0.001, "loss": 0.415, "step": 722 }, { "epoch": 0.0199491613695389, "grad_norm": 0.006984284613281488, "learning_rate": 0.001, "loss": 0.3863, "step": 723 }, { "epoch": 0.01997675357060327, "grad_norm": 0.00536124873906374, "learning_rate": 0.001, "loss": 0.4265, "step": 724 }, { "epoch": 0.02000434577166764, "grad_norm": 0.005374076310545206, "learning_rate": 0.001, "loss": 0.3992, "step": 725 }, { "epoch": 0.020031937972732006, "grad_norm": 0.004354424774646759, "learning_rate": 0.001, "loss": 0.4103, "step": 726 }, { "epoch": 0.020059530173796376, "grad_norm": 0.004792310297489166, "learning_rate": 0.001, "loss": 0.4311, "step": 727 }, { "epoch": 0.020087122374860746, "grad_norm": 0.004100026097148657, "learning_rate": 0.001, "loss": 0.384, "step": 728 }, { "epoch": 0.020114714575925115, "grad_norm": 0.00429933238774538, "learning_rate": 0.001, "loss": 0.4231, "step": 729 }, { "epoch": 0.020142306776989485, "grad_norm": 0.003623842727392912, "learning_rate": 0.001, "loss": 0.3872, "step": 730 }, { "epoch": 0.020169898978053852, "grad_norm": 0.003992657642811537, "learning_rate": 0.001, "loss": 0.4407, "step": 731 }, { "epoch": 0.02019749117911822, "grad_norm": 0.004881622269749641, "learning_rate": 0.001, "loss": 0.3723, "step": 732 }, { "epoch": 0.02022508338018259, "grad_norm": 0.004593881778419018, "learning_rate": 0.001, "loss": 0.4067, "step": 733 }, { "epoch": 0.02025267558124696, "grad_norm": 0.00592020945623517, "learning_rate": 0.001, "loss": 0.4004, "step": 734 }, { "epoch": 0.02028026778231133, "grad_norm": 0.004086300730705261, "learning_rate": 0.001, "loss": 0.365, "step": 735 }, { "epoch": 0.020307859983375698, "grad_norm": 0.008120741695165634, "learning_rate": 0.001, "loss": 0.4389, "step": 736 }, { "epoch": 0.020335452184440068, "grad_norm": 0.003741480875760317, "learning_rate": 0.001, "loss": 0.4148, "step": 737 }, { "epoch": 0.020363044385504438, "grad_norm": 0.00764830969274044, "learning_rate": 0.001, "loss": 0.3817, "step": 738 }, { "epoch": 0.020390636586568808, "grad_norm": 0.0035037719644606113, "learning_rate": 0.001, "loss": 0.4163, "step": 739 }, { "epoch": 0.020418228787633174, "grad_norm": 0.00437674205750227, "learning_rate": 0.001, "loss": 0.4059, "step": 740 }, { "epoch": 0.020445820988697544, "grad_norm": 0.004506263881921768, "learning_rate": 0.001, "loss": 0.4221, "step": 741 }, { "epoch": 0.020473413189761914, "grad_norm": 0.004545763600617647, "learning_rate": 0.001, "loss": 0.3621, "step": 742 }, { "epoch": 0.020501005390826284, "grad_norm": 0.0050647263415157795, "learning_rate": 0.001, "loss": 0.3866, "step": 743 }, { "epoch": 0.020528597591890654, "grad_norm": 0.006431067828088999, "learning_rate": 0.001, "loss": 0.3693, "step": 744 }, { "epoch": 0.02055618979295502, "grad_norm": 0.00419000955298543, "learning_rate": 0.001, "loss": 0.4093, "step": 745 }, { "epoch": 0.02058378199401939, "grad_norm": 0.005267234519124031, "learning_rate": 0.001, "loss": 0.3822, "step": 746 }, { "epoch": 0.02061137419508376, "grad_norm": 0.0034249969758093357, "learning_rate": 0.001, "loss": 0.4225, "step": 747 }, { "epoch": 0.02063896639614813, "grad_norm": 0.00436822697520256, "learning_rate": 0.001, "loss": 0.3655, "step": 748 }, { "epoch": 0.020666558597212496, "grad_norm": 0.002724075224250555, "learning_rate": 0.001, "loss": 0.4055, "step": 749 }, { "epoch": 0.020694150798276866, "grad_norm": 0.0030281811486929655, "learning_rate": 0.001, "loss": 0.3682, "step": 750 }, { "epoch": 0.020721742999341236, "grad_norm": 0.004672218579798937, "learning_rate": 0.001, "loss": 0.4083, "step": 751 }, { "epoch": 0.020749335200405606, "grad_norm": 0.004708775784820318, "learning_rate": 0.001, "loss": 0.3751, "step": 752 }, { "epoch": 0.020776927401469976, "grad_norm": 0.009867183864116669, "learning_rate": 0.001, "loss": 0.3943, "step": 753 }, { "epoch": 0.020804519602534342, "grad_norm": 0.003236403688788414, "learning_rate": 0.001, "loss": 0.3786, "step": 754 }, { "epoch": 0.020832111803598712, "grad_norm": 0.003183891996741295, "learning_rate": 0.001, "loss": 0.3921, "step": 755 }, { "epoch": 0.020859704004663082, "grad_norm": 0.0026451335288584232, "learning_rate": 0.001, "loss": 0.3793, "step": 756 }, { "epoch": 0.020887296205727452, "grad_norm": 0.0030694296583533287, "learning_rate": 0.001, "loss": 0.4227, "step": 757 }, { "epoch": 0.020914888406791822, "grad_norm": 0.004503907170146704, "learning_rate": 0.001, "loss": 0.365, "step": 758 }, { "epoch": 0.02094248060785619, "grad_norm": 0.0033429849427193403, "learning_rate": 0.001, "loss": 0.4116, "step": 759 }, { "epoch": 0.02097007280892056, "grad_norm": 0.005442556459456682, "learning_rate": 0.001, "loss": 0.3935, "step": 760 }, { "epoch": 0.020997665009984928, "grad_norm": 0.0036932167131453753, "learning_rate": 0.001, "loss": 0.4113, "step": 761 }, { "epoch": 0.021025257211049298, "grad_norm": 0.004506903700530529, "learning_rate": 0.001, "loss": 0.4098, "step": 762 }, { "epoch": 0.021052849412113665, "grad_norm": 0.003803919767960906, "learning_rate": 0.001, "loss": 0.3865, "step": 763 }, { "epoch": 0.021080441613178034, "grad_norm": 0.005300893913954496, "learning_rate": 0.001, "loss": 0.4023, "step": 764 }, { "epoch": 0.021108033814242404, "grad_norm": 0.005235253367573023, "learning_rate": 0.001, "loss": 0.4339, "step": 765 }, { "epoch": 0.021135626015306774, "grad_norm": 0.00439833290874958, "learning_rate": 0.001, "loss": 0.4012, "step": 766 }, { "epoch": 0.021163218216371144, "grad_norm": 0.003949869889765978, "learning_rate": 0.001, "loss": 0.3549, "step": 767 }, { "epoch": 0.02119081041743551, "grad_norm": 0.0055135395377874374, "learning_rate": 0.001, "loss": 0.3876, "step": 768 }, { "epoch": 0.02121840261849988, "grad_norm": 0.004492396488785744, "learning_rate": 0.001, "loss": 0.3639, "step": 769 }, { "epoch": 0.02124599481956425, "grad_norm": 0.0040711937472224236, "learning_rate": 0.001, "loss": 0.3678, "step": 770 }, { "epoch": 0.02127358702062862, "grad_norm": 0.0038428730331361294, "learning_rate": 0.001, "loss": 0.437, "step": 771 }, { "epoch": 0.021301179221692987, "grad_norm": 0.003672838443890214, "learning_rate": 0.001, "loss": 0.397, "step": 772 }, { "epoch": 0.021328771422757357, "grad_norm": 0.003955157473683357, "learning_rate": 0.001, "loss": 0.4375, "step": 773 }, { "epoch": 0.021356363623821727, "grad_norm": 0.005334306508302689, "learning_rate": 0.001, "loss": 0.4313, "step": 774 }, { "epoch": 0.021383955824886097, "grad_norm": 0.004772811662405729, "learning_rate": 0.001, "loss": 0.4244, "step": 775 }, { "epoch": 0.021411548025950466, "grad_norm": 0.005606191698461771, "learning_rate": 0.001, "loss": 0.3974, "step": 776 }, { "epoch": 0.021439140227014833, "grad_norm": 0.004244519397616386, "learning_rate": 0.001, "loss": 0.3942, "step": 777 }, { "epoch": 0.021466732428079203, "grad_norm": 0.0028992686420679092, "learning_rate": 0.001, "loss": 0.4309, "step": 778 }, { "epoch": 0.021494324629143573, "grad_norm": 0.0031759096309542656, "learning_rate": 0.001, "loss": 0.437, "step": 779 }, { "epoch": 0.021521916830207943, "grad_norm": 0.007329195272177458, "learning_rate": 0.001, "loss": 0.3619, "step": 780 }, { "epoch": 0.021549509031272313, "grad_norm": 0.006688808090984821, "learning_rate": 0.001, "loss": 0.4491, "step": 781 }, { "epoch": 0.02157710123233668, "grad_norm": 0.00896657258272171, "learning_rate": 0.001, "loss": 0.4072, "step": 782 }, { "epoch": 0.02160469343340105, "grad_norm": 0.005213500466197729, "learning_rate": 0.001, "loss": 0.4435, "step": 783 }, { "epoch": 0.02163228563446542, "grad_norm": 0.0036038621328771114, "learning_rate": 0.001, "loss": 0.4152, "step": 784 }, { "epoch": 0.02165987783552979, "grad_norm": 0.003994919825345278, "learning_rate": 0.001, "loss": 0.3968, "step": 785 }, { "epoch": 0.021687470036594155, "grad_norm": 0.004139062017202377, "learning_rate": 0.001, "loss": 0.395, "step": 786 }, { "epoch": 0.021715062237658525, "grad_norm": 0.008915436454117298, "learning_rate": 0.001, "loss": 0.421, "step": 787 }, { "epoch": 0.021742654438722895, "grad_norm": 0.003705317387357354, "learning_rate": 0.001, "loss": 0.3576, "step": 788 }, { "epoch": 0.021770246639787265, "grad_norm": 0.004881418775767088, "learning_rate": 0.001, "loss": 0.4031, "step": 789 }, { "epoch": 0.021797838840851635, "grad_norm": 0.00573571864515543, "learning_rate": 0.001, "loss": 0.4116, "step": 790 }, { "epoch": 0.021825431041916, "grad_norm": 0.002695683157071471, "learning_rate": 0.001, "loss": 0.4335, "step": 791 }, { "epoch": 0.02185302324298037, "grad_norm": 0.003691880265250802, "learning_rate": 0.001, "loss": 0.4108, "step": 792 }, { "epoch": 0.02188061544404474, "grad_norm": 0.003528386354446411, "learning_rate": 0.001, "loss": 0.4062, "step": 793 }, { "epoch": 0.02190820764510911, "grad_norm": 0.0033985383342951536, "learning_rate": 0.001, "loss": 0.4327, "step": 794 }, { "epoch": 0.021935799846173477, "grad_norm": 0.003107238095253706, "learning_rate": 0.001, "loss": 0.3966, "step": 795 }, { "epoch": 0.021963392047237847, "grad_norm": 0.0034598596394062042, "learning_rate": 0.001, "loss": 0.3998, "step": 796 }, { "epoch": 0.021990984248302217, "grad_norm": 0.004219945520162582, "learning_rate": 0.001, "loss": 0.4331, "step": 797 }, { "epoch": 0.022018576449366587, "grad_norm": 0.003744914662092924, "learning_rate": 0.001, "loss": 0.4155, "step": 798 }, { "epoch": 0.022046168650430957, "grad_norm": 0.004566433373838663, "learning_rate": 0.001, "loss": 0.4304, "step": 799 }, { "epoch": 0.022073760851495323, "grad_norm": 0.003758675418794155, "learning_rate": 0.001, "loss": 0.3851, "step": 800 }, { "epoch": 0.022101353052559693, "grad_norm": 0.0030292419251054525, "learning_rate": 0.001, "loss": 0.4417, "step": 801 }, { "epoch": 0.022128945253624063, "grad_norm": 0.004074092488735914, "learning_rate": 0.001, "loss": 0.4053, "step": 802 }, { "epoch": 0.022156537454688433, "grad_norm": 0.00471070921048522, "learning_rate": 0.001, "loss": 0.4152, "step": 803 }, { "epoch": 0.022184129655752803, "grad_norm": 0.007867252454161644, "learning_rate": 0.001, "loss": 0.3932, "step": 804 }, { "epoch": 0.02221172185681717, "grad_norm": 0.003963668830692768, "learning_rate": 0.001, "loss": 0.4058, "step": 805 }, { "epoch": 0.02223931405788154, "grad_norm": 0.005508980248123407, "learning_rate": 0.001, "loss": 0.395, "step": 806 }, { "epoch": 0.02226690625894591, "grad_norm": 0.003475068835541606, "learning_rate": 0.001, "loss": 0.434, "step": 807 }, { "epoch": 0.02229449846001028, "grad_norm": 0.0032727334182709455, "learning_rate": 0.001, "loss": 0.3785, "step": 808 }, { "epoch": 0.022322090661074646, "grad_norm": 0.0029324067290872335, "learning_rate": 0.001, "loss": 0.4136, "step": 809 }, { "epoch": 0.022349682862139016, "grad_norm": 0.005346233956515789, "learning_rate": 0.001, "loss": 0.3863, "step": 810 }, { "epoch": 0.022377275063203386, "grad_norm": 0.004121492151170969, "learning_rate": 0.001, "loss": 0.3875, "step": 811 }, { "epoch": 0.022404867264267755, "grad_norm": 0.004381257575005293, "learning_rate": 0.001, "loss": 0.4042, "step": 812 }, { "epoch": 0.022432459465332125, "grad_norm": 0.005230156239122152, "learning_rate": 0.001, "loss": 0.4351, "step": 813 }, { "epoch": 0.022460051666396492, "grad_norm": 0.004103715531527996, "learning_rate": 0.001, "loss": 0.3795, "step": 814 }, { "epoch": 0.02248764386746086, "grad_norm": 0.007542972918599844, "learning_rate": 0.001, "loss": 0.4058, "step": 815 }, { "epoch": 0.02251523606852523, "grad_norm": 0.003434807062149048, "learning_rate": 0.001, "loss": 0.4029, "step": 816 }, { "epoch": 0.0225428282695896, "grad_norm": 0.003715425031259656, "learning_rate": 0.001, "loss": 0.3636, "step": 817 }, { "epoch": 0.022570420470653968, "grad_norm": 0.005767806898802519, "learning_rate": 0.001, "loss": 0.433, "step": 818 }, { "epoch": 0.022598012671718338, "grad_norm": 0.007371674291789532, "learning_rate": 0.001, "loss": 0.3893, "step": 819 }, { "epoch": 0.022625604872782708, "grad_norm": 0.0067810118198394775, "learning_rate": 0.001, "loss": 0.4248, "step": 820 }, { "epoch": 0.022653197073847078, "grad_norm": 0.004905116744339466, "learning_rate": 0.001, "loss": 0.3493, "step": 821 }, { "epoch": 0.022680789274911448, "grad_norm": 0.0027144188061356544, "learning_rate": 0.001, "loss": 0.424, "step": 822 }, { "epoch": 0.022708381475975814, "grad_norm": 0.013933762907981873, "learning_rate": 0.001, "loss": 0.4002, "step": 823 }, { "epoch": 0.022735973677040184, "grad_norm": 0.002978452481329441, "learning_rate": 0.001, "loss": 0.4361, "step": 824 }, { "epoch": 0.022763565878104554, "grad_norm": 0.0060105458833277225, "learning_rate": 0.001, "loss": 0.3998, "step": 825 }, { "epoch": 0.022791158079168924, "grad_norm": 0.004003115464001894, "learning_rate": 0.001, "loss": 0.4344, "step": 826 }, { "epoch": 0.022818750280233294, "grad_norm": 0.008088911883533001, "learning_rate": 0.001, "loss": 0.3831, "step": 827 }, { "epoch": 0.02284634248129766, "grad_norm": 0.005061788484454155, "learning_rate": 0.001, "loss": 0.4043, "step": 828 }, { "epoch": 0.02287393468236203, "grad_norm": 0.00426626717671752, "learning_rate": 0.001, "loss": 0.3992, "step": 829 }, { "epoch": 0.0229015268834264, "grad_norm": 0.0029426752589643, "learning_rate": 0.001, "loss": 0.3841, "step": 830 }, { "epoch": 0.02292911908449077, "grad_norm": 0.005651662591844797, "learning_rate": 0.001, "loss": 0.4277, "step": 831 }, { "epoch": 0.022956711285555136, "grad_norm": 0.0026404529344290495, "learning_rate": 0.001, "loss": 0.4194, "step": 832 }, { "epoch": 0.022984303486619506, "grad_norm": 0.0029586381278932095, "learning_rate": 0.001, "loss": 0.3983, "step": 833 }, { "epoch": 0.023011895687683876, "grad_norm": 0.0026806823443621397, "learning_rate": 0.001, "loss": 0.3844, "step": 834 }, { "epoch": 0.023039487888748246, "grad_norm": 0.004135414958000183, "learning_rate": 0.001, "loss": 0.4138, "step": 835 }, { "epoch": 0.023067080089812616, "grad_norm": 0.0023586770985275507, "learning_rate": 0.001, "loss": 0.4143, "step": 836 }, { "epoch": 0.023094672290876982, "grad_norm": 0.004498126916587353, "learning_rate": 0.001, "loss": 0.4021, "step": 837 }, { "epoch": 0.023122264491941352, "grad_norm": 0.003083957824856043, "learning_rate": 0.001, "loss": 0.3931, "step": 838 }, { "epoch": 0.023149856693005722, "grad_norm": 0.002510181860998273, "learning_rate": 0.001, "loss": 0.3618, "step": 839 }, { "epoch": 0.023177448894070092, "grad_norm": 0.002793237566947937, "learning_rate": 0.001, "loss": 0.3774, "step": 840 }, { "epoch": 0.023205041095134462, "grad_norm": 0.002880933927372098, "learning_rate": 0.001, "loss": 0.4165, "step": 841 }, { "epoch": 0.02323263329619883, "grad_norm": 0.004375714808702469, "learning_rate": 0.001, "loss": 0.4275, "step": 842 }, { "epoch": 0.0232602254972632, "grad_norm": 0.0033162750769406557, "learning_rate": 0.001, "loss": 0.4052, "step": 843 }, { "epoch": 0.023287817698327568, "grad_norm": 0.004250579979270697, "learning_rate": 0.001, "loss": 0.4024, "step": 844 }, { "epoch": 0.023315409899391938, "grad_norm": 0.0036853316705673933, "learning_rate": 0.001, "loss": 0.3553, "step": 845 }, { "epoch": 0.023343002100456305, "grad_norm": 0.005420148838311434, "learning_rate": 0.001, "loss": 0.3904, "step": 846 }, { "epoch": 0.023370594301520674, "grad_norm": 0.002736524445936084, "learning_rate": 0.001, "loss": 0.3655, "step": 847 }, { "epoch": 0.023398186502585044, "grad_norm": 0.0048865810967981815, "learning_rate": 0.001, "loss": 0.3967, "step": 848 }, { "epoch": 0.023425778703649414, "grad_norm": 0.0033940684515982866, "learning_rate": 0.001, "loss": 0.3891, "step": 849 }, { "epoch": 0.023453370904713784, "grad_norm": 0.00402069790288806, "learning_rate": 0.001, "loss": 0.3791, "step": 850 }, { "epoch": 0.02348096310577815, "grad_norm": 0.004255734849721193, "learning_rate": 0.001, "loss": 0.3838, "step": 851 }, { "epoch": 0.02350855530684252, "grad_norm": 0.0029521863907575607, "learning_rate": 0.001, "loss": 0.3948, "step": 852 }, { "epoch": 0.02353614750790689, "grad_norm": 0.007674784865230322, "learning_rate": 0.001, "loss": 0.399, "step": 853 }, { "epoch": 0.02356373970897126, "grad_norm": 0.003159287618473172, "learning_rate": 0.001, "loss": 0.3804, "step": 854 }, { "epoch": 0.023591331910035627, "grad_norm": 0.00475959200412035, "learning_rate": 0.001, "loss": 0.4162, "step": 855 }, { "epoch": 0.023618924111099997, "grad_norm": 0.003215026343241334, "learning_rate": 0.001, "loss": 0.4014, "step": 856 }, { "epoch": 0.023646516312164367, "grad_norm": 0.0038892014417797327, "learning_rate": 0.001, "loss": 0.4367, "step": 857 }, { "epoch": 0.023674108513228737, "grad_norm": 0.003636479377746582, "learning_rate": 0.001, "loss": 0.408, "step": 858 }, { "epoch": 0.023701700714293106, "grad_norm": 0.0037935448344796896, "learning_rate": 0.001, "loss": 0.381, "step": 859 }, { "epoch": 0.023729292915357473, "grad_norm": 0.004558536224067211, "learning_rate": 0.001, "loss": 0.3901, "step": 860 }, { "epoch": 0.023756885116421843, "grad_norm": 0.003610727610066533, "learning_rate": 0.001, "loss": 0.3836, "step": 861 }, { "epoch": 0.023784477317486213, "grad_norm": 0.0035163855645805597, "learning_rate": 0.001, "loss": 0.4254, "step": 862 }, { "epoch": 0.023812069518550583, "grad_norm": 0.003148287069052458, "learning_rate": 0.001, "loss": 0.3916, "step": 863 }, { "epoch": 0.023839661719614953, "grad_norm": 0.0030976871494203806, "learning_rate": 0.001, "loss": 0.3976, "step": 864 }, { "epoch": 0.02386725392067932, "grad_norm": 0.003255886258557439, "learning_rate": 0.001, "loss": 0.3968, "step": 865 }, { "epoch": 0.02389484612174369, "grad_norm": 0.003951660823076963, "learning_rate": 0.001, "loss": 0.4325, "step": 866 }, { "epoch": 0.02392243832280806, "grad_norm": 0.0038296151906251907, "learning_rate": 0.001, "loss": 0.3865, "step": 867 }, { "epoch": 0.02395003052387243, "grad_norm": 0.003523425431922078, "learning_rate": 0.001, "loss": 0.3773, "step": 868 }, { "epoch": 0.023977622724936795, "grad_norm": 0.003752518445253372, "learning_rate": 0.001, "loss": 0.4031, "step": 869 }, { "epoch": 0.024005214926001165, "grad_norm": 0.005763133056461811, "learning_rate": 0.001, "loss": 0.4345, "step": 870 }, { "epoch": 0.024032807127065535, "grad_norm": 0.009727811440825462, "learning_rate": 0.001, "loss": 0.3966, "step": 871 }, { "epoch": 0.024060399328129905, "grad_norm": 0.006550495512783527, "learning_rate": 0.001, "loss": 0.434, "step": 872 }, { "epoch": 0.024087991529194275, "grad_norm": 0.003153527621179819, "learning_rate": 0.001, "loss": 0.416, "step": 873 }, { "epoch": 0.02411558373025864, "grad_norm": 0.004990814719349146, "learning_rate": 0.001, "loss": 0.4099, "step": 874 }, { "epoch": 0.02414317593132301, "grad_norm": 0.0033743572421371937, "learning_rate": 0.001, "loss": 0.4336, "step": 875 }, { "epoch": 0.02417076813238738, "grad_norm": 0.004232621286064386, "learning_rate": 0.001, "loss": 0.407, "step": 876 }, { "epoch": 0.02419836033345175, "grad_norm": 0.0045753102749586105, "learning_rate": 0.001, "loss": 0.3933, "step": 877 }, { "epoch": 0.024225952534516117, "grad_norm": 0.005658434238284826, "learning_rate": 0.001, "loss": 0.4274, "step": 878 }, { "epoch": 0.024253544735580487, "grad_norm": 0.003705628216266632, "learning_rate": 0.001, "loss": 0.4084, "step": 879 }, { "epoch": 0.024281136936644857, "grad_norm": 0.004948351066559553, "learning_rate": 0.001, "loss": 0.4735, "step": 880 }, { "epoch": 0.024308729137709227, "grad_norm": 0.003841443918645382, "learning_rate": 0.001, "loss": 0.4017, "step": 881 }, { "epoch": 0.024336321338773597, "grad_norm": 0.006151879671961069, "learning_rate": 0.001, "loss": 0.3591, "step": 882 }, { "epoch": 0.024363913539837963, "grad_norm": 0.004619830287992954, "learning_rate": 0.001, "loss": 0.3906, "step": 883 }, { "epoch": 0.024391505740902333, "grad_norm": 0.004188140854239464, "learning_rate": 0.001, "loss": 0.3842, "step": 884 }, { "epoch": 0.024419097941966703, "grad_norm": 0.004255149979144335, "learning_rate": 0.001, "loss": 0.3664, "step": 885 }, { "epoch": 0.024446690143031073, "grad_norm": 0.00479822838678956, "learning_rate": 0.001, "loss": 0.3781, "step": 886 }, { "epoch": 0.024474282344095443, "grad_norm": 0.004328257869929075, "learning_rate": 0.001, "loss": 0.4, "step": 887 }, { "epoch": 0.02450187454515981, "grad_norm": 0.0029929610900580883, "learning_rate": 0.001, "loss": 0.4315, "step": 888 }, { "epoch": 0.02452946674622418, "grad_norm": 0.003600528696551919, "learning_rate": 0.001, "loss": 0.393, "step": 889 }, { "epoch": 0.02455705894728855, "grad_norm": 0.0042820703238248825, "learning_rate": 0.001, "loss": 0.4092, "step": 890 }, { "epoch": 0.02458465114835292, "grad_norm": 0.004282411653548479, "learning_rate": 0.001, "loss": 0.4112, "step": 891 }, { "epoch": 0.024612243349417286, "grad_norm": 0.0038429568521678448, "learning_rate": 0.001, "loss": 0.416, "step": 892 }, { "epoch": 0.024639835550481656, "grad_norm": 0.0035555907525122166, "learning_rate": 0.001, "loss": 0.4325, "step": 893 }, { "epoch": 0.024667427751546026, "grad_norm": 0.0035739324521273375, "learning_rate": 0.001, "loss": 0.4009, "step": 894 }, { "epoch": 0.024695019952610395, "grad_norm": 0.004270533565431833, "learning_rate": 0.001, "loss": 0.4254, "step": 895 }, { "epoch": 0.024722612153674765, "grad_norm": 0.003811136120930314, "learning_rate": 0.001, "loss": 0.3913, "step": 896 }, { "epoch": 0.024750204354739132, "grad_norm": 0.004315483383834362, "learning_rate": 0.001, "loss": 0.3805, "step": 897 }, { "epoch": 0.0247777965558035, "grad_norm": 0.003949417732656002, "learning_rate": 0.001, "loss": 0.403, "step": 898 }, { "epoch": 0.02480538875686787, "grad_norm": 0.004281886387616396, "learning_rate": 0.001, "loss": 0.4046, "step": 899 }, { "epoch": 0.02483298095793224, "grad_norm": 0.004681427031755447, "learning_rate": 0.001, "loss": 0.3914, "step": 900 }, { "epoch": 0.024860573158996608, "grad_norm": 0.008973667398095131, "learning_rate": 0.001, "loss": 0.4357, "step": 901 }, { "epoch": 0.024888165360060978, "grad_norm": 0.004130321089178324, "learning_rate": 0.001, "loss": 0.4063, "step": 902 }, { "epoch": 0.024915757561125348, "grad_norm": 0.005490110721439123, "learning_rate": 0.001, "loss": 0.3905, "step": 903 }, { "epoch": 0.024943349762189718, "grad_norm": 0.007906914688646793, "learning_rate": 0.001, "loss": 0.3685, "step": 904 }, { "epoch": 0.024970941963254088, "grad_norm": 0.022035721689462662, "learning_rate": 0.001, "loss": 0.3882, "step": 905 }, { "epoch": 0.024998534164318454, "grad_norm": 0.0064789773896336555, "learning_rate": 0.001, "loss": 0.3644, "step": 906 }, { "epoch": 0.025026126365382824, "grad_norm": 0.003105413168668747, "learning_rate": 0.001, "loss": 0.3742, "step": 907 }, { "epoch": 0.025053718566447194, "grad_norm": 0.0031007244251668453, "learning_rate": 0.001, "loss": 0.3836, "step": 908 }, { "epoch": 0.025081310767511564, "grad_norm": 0.003298420924693346, "learning_rate": 0.001, "loss": 0.3963, "step": 909 }, { "epoch": 0.025108902968575934, "grad_norm": 0.0028220806270837784, "learning_rate": 0.001, "loss": 0.428, "step": 910 }, { "epoch": 0.0251364951696403, "grad_norm": 0.0031455964781343937, "learning_rate": 0.001, "loss": 0.3877, "step": 911 }, { "epoch": 0.02516408737070467, "grad_norm": 0.0031188016291707754, "learning_rate": 0.001, "loss": 0.403, "step": 912 }, { "epoch": 0.02519167957176904, "grad_norm": 0.0035306380596011877, "learning_rate": 0.001, "loss": 0.4075, "step": 913 }, { "epoch": 0.02521927177283341, "grad_norm": 0.010045260190963745, "learning_rate": 0.001, "loss": 0.3853, "step": 914 }, { "epoch": 0.025246863973897776, "grad_norm": 0.002441234653815627, "learning_rate": 0.001, "loss": 0.3652, "step": 915 }, { "epoch": 0.025274456174962146, "grad_norm": 0.0037013038527220488, "learning_rate": 0.001, "loss": 0.3761, "step": 916 }, { "epoch": 0.025302048376026516, "grad_norm": 0.004198992624878883, "learning_rate": 0.001, "loss": 0.4195, "step": 917 }, { "epoch": 0.025329640577090886, "grad_norm": 0.0041293492540717125, "learning_rate": 0.001, "loss": 0.3822, "step": 918 }, { "epoch": 0.025357232778155256, "grad_norm": 0.0031841343734413385, "learning_rate": 0.001, "loss": 0.4145, "step": 919 }, { "epoch": 0.025384824979219622, "grad_norm": 0.003239045385271311, "learning_rate": 0.001, "loss": 0.3771, "step": 920 }, { "epoch": 0.025412417180283992, "grad_norm": 0.004458332899957895, "learning_rate": 0.001, "loss": 0.4111, "step": 921 }, { "epoch": 0.025440009381348362, "grad_norm": 0.00542887207120657, "learning_rate": 0.001, "loss": 0.4112, "step": 922 }, { "epoch": 0.025467601582412732, "grad_norm": 0.0039135608822107315, "learning_rate": 0.001, "loss": 0.4327, "step": 923 }, { "epoch": 0.0254951937834771, "grad_norm": 0.003864714875817299, "learning_rate": 0.001, "loss": 0.399, "step": 924 }, { "epoch": 0.02552278598454147, "grad_norm": 0.003554902272298932, "learning_rate": 0.001, "loss": 0.3696, "step": 925 }, { "epoch": 0.02555037818560584, "grad_norm": 0.003268434898927808, "learning_rate": 0.001, "loss": 0.437, "step": 926 }, { "epoch": 0.025577970386670208, "grad_norm": 0.003844626247882843, "learning_rate": 0.001, "loss": 0.3928, "step": 927 }, { "epoch": 0.025605562587734578, "grad_norm": 0.0027172230184078217, "learning_rate": 0.001, "loss": 0.4315, "step": 928 }, { "epoch": 0.025633154788798945, "grad_norm": 0.0031102465000003576, "learning_rate": 0.001, "loss": 0.4023, "step": 929 }, { "epoch": 0.025660746989863314, "grad_norm": 0.00434950040653348, "learning_rate": 0.001, "loss": 0.4048, "step": 930 }, { "epoch": 0.025688339190927684, "grad_norm": 0.0034932976122945547, "learning_rate": 0.001, "loss": 0.4578, "step": 931 }, { "epoch": 0.025715931391992054, "grad_norm": 0.00359813729301095, "learning_rate": 0.001, "loss": 0.4268, "step": 932 }, { "epoch": 0.025743523593056424, "grad_norm": 0.004753883462399244, "learning_rate": 0.001, "loss": 0.3659, "step": 933 }, { "epoch": 0.02577111579412079, "grad_norm": 0.0034384452737867832, "learning_rate": 0.001, "loss": 0.3892, "step": 934 }, { "epoch": 0.02579870799518516, "grad_norm": 0.0031536207534372807, "learning_rate": 0.001, "loss": 0.4061, "step": 935 }, { "epoch": 0.02582630019624953, "grad_norm": 0.0027636385057121515, "learning_rate": 0.001, "loss": 0.4256, "step": 936 }, { "epoch": 0.0258538923973139, "grad_norm": 0.002858042251318693, "learning_rate": 0.001, "loss": 0.4199, "step": 937 }, { "epoch": 0.025881484598378267, "grad_norm": 0.0022810858208686113, "learning_rate": 0.001, "loss": 0.3972, "step": 938 }, { "epoch": 0.025909076799442637, "grad_norm": 0.004132518544793129, "learning_rate": 0.001, "loss": 0.3967, "step": 939 }, { "epoch": 0.025936669000507007, "grad_norm": 0.00525138434022665, "learning_rate": 0.001, "loss": 0.4237, "step": 940 }, { "epoch": 0.025964261201571377, "grad_norm": 0.003772285534068942, "learning_rate": 0.001, "loss": 0.4164, "step": 941 }, { "epoch": 0.025991853402635746, "grad_norm": 0.0035187045577913523, "learning_rate": 0.001, "loss": 0.3605, "step": 942 }, { "epoch": 0.026019445603700113, "grad_norm": 0.00876245740801096, "learning_rate": 0.001, "loss": 0.3699, "step": 943 }, { "epoch": 0.026047037804764483, "grad_norm": 0.002588030882179737, "learning_rate": 0.001, "loss": 0.404, "step": 944 }, { "epoch": 0.026074630005828853, "grad_norm": 0.003442909335717559, "learning_rate": 0.001, "loss": 0.3946, "step": 945 }, { "epoch": 0.026102222206893223, "grad_norm": 0.003080985974520445, "learning_rate": 0.001, "loss": 0.3941, "step": 946 }, { "epoch": 0.02612981440795759, "grad_norm": 0.0026055227499455214, "learning_rate": 0.001, "loss": 0.4081, "step": 947 }, { "epoch": 0.02615740660902196, "grad_norm": 0.00331856869161129, "learning_rate": 0.001, "loss": 0.4073, "step": 948 }, { "epoch": 0.02618499881008633, "grad_norm": 0.003704390488564968, "learning_rate": 0.001, "loss": 0.409, "step": 949 }, { "epoch": 0.0262125910111507, "grad_norm": 0.0028117795009166002, "learning_rate": 0.001, "loss": 0.4347, "step": 950 }, { "epoch": 0.02624018321221507, "grad_norm": 0.005540241952985525, "learning_rate": 0.001, "loss": 0.394, "step": 951 }, { "epoch": 0.026267775413279435, "grad_norm": 0.0033867203164845705, "learning_rate": 0.001, "loss": 0.3855, "step": 952 }, { "epoch": 0.026295367614343805, "grad_norm": 0.005016230046749115, "learning_rate": 0.001, "loss": 0.4041, "step": 953 }, { "epoch": 0.026322959815408175, "grad_norm": 0.003560574259608984, "learning_rate": 0.001, "loss": 0.4006, "step": 954 }, { "epoch": 0.026350552016472545, "grad_norm": 0.006354731973260641, "learning_rate": 0.001, "loss": 0.3982, "step": 955 }, { "epoch": 0.026378144217536915, "grad_norm": 0.0036503588780760765, "learning_rate": 0.001, "loss": 0.4549, "step": 956 }, { "epoch": 0.02640573641860128, "grad_norm": 0.0061042881570756435, "learning_rate": 0.001, "loss": 0.419, "step": 957 }, { "epoch": 0.02643332861966565, "grad_norm": 0.0033794385381042957, "learning_rate": 0.001, "loss": 0.3847, "step": 958 }, { "epoch": 0.02646092082073002, "grad_norm": 0.00359528511762619, "learning_rate": 0.001, "loss": 0.3874, "step": 959 }, { "epoch": 0.02648851302179439, "grad_norm": 0.0028623088728636503, "learning_rate": 0.001, "loss": 0.3936, "step": 960 }, { "epoch": 0.026516105222858757, "grad_norm": 0.0029379641637206078, "learning_rate": 0.001, "loss": 0.3741, "step": 961 }, { "epoch": 0.026543697423923127, "grad_norm": 0.0034645479172468185, "learning_rate": 0.001, "loss": 0.3783, "step": 962 }, { "epoch": 0.026571289624987497, "grad_norm": 0.0062490347772836685, "learning_rate": 0.001, "loss": 0.3959, "step": 963 }, { "epoch": 0.026598881826051867, "grad_norm": 0.010679258033633232, "learning_rate": 0.001, "loss": 0.362, "step": 964 }, { "epoch": 0.026626474027116237, "grad_norm": 0.003056140150874853, "learning_rate": 0.001, "loss": 0.3784, "step": 965 }, { "epoch": 0.026654066228180603, "grad_norm": 0.0043676625937223434, "learning_rate": 0.001, "loss": 0.4242, "step": 966 }, { "epoch": 0.026681658429244973, "grad_norm": 0.004542070906609297, "learning_rate": 0.001, "loss": 0.4068, "step": 967 }, { "epoch": 0.026709250630309343, "grad_norm": 0.003998064436018467, "learning_rate": 0.001, "loss": 0.4086, "step": 968 }, { "epoch": 0.026736842831373713, "grad_norm": 0.00380288646556437, "learning_rate": 0.001, "loss": 0.4274, "step": 969 }, { "epoch": 0.026764435032438083, "grad_norm": 0.005197952967137098, "learning_rate": 0.001, "loss": 0.37, "step": 970 }, { "epoch": 0.02679202723350245, "grad_norm": 0.0045735882595181465, "learning_rate": 0.001, "loss": 0.3623, "step": 971 }, { "epoch": 0.02681961943456682, "grad_norm": 0.013644758611917496, "learning_rate": 0.001, "loss": 0.3709, "step": 972 }, { "epoch": 0.02684721163563119, "grad_norm": 0.0032162275165319443, "learning_rate": 0.001, "loss": 0.4419, "step": 973 }, { "epoch": 0.02687480383669556, "grad_norm": 0.003487576497718692, "learning_rate": 0.001, "loss": 0.4018, "step": 974 }, { "epoch": 0.026902396037759926, "grad_norm": 0.0028352616354823112, "learning_rate": 0.001, "loss": 0.4059, "step": 975 }, { "epoch": 0.026929988238824296, "grad_norm": 0.0032060358207672834, "learning_rate": 0.001, "loss": 0.4277, "step": 976 }, { "epoch": 0.026957580439888666, "grad_norm": 0.004065185319632292, "learning_rate": 0.001, "loss": 0.4403, "step": 977 }, { "epoch": 0.026985172640953035, "grad_norm": 0.0032863083761185408, "learning_rate": 0.001, "loss": 0.4257, "step": 978 }, { "epoch": 0.027012764842017405, "grad_norm": 0.0028903197962790728, "learning_rate": 0.001, "loss": 0.4371, "step": 979 }, { "epoch": 0.027040357043081772, "grad_norm": 0.0034798365086317062, "learning_rate": 0.001, "loss": 0.4378, "step": 980 }, { "epoch": 0.02706794924414614, "grad_norm": 0.0034869504161179066, "learning_rate": 0.001, "loss": 0.451, "step": 981 }, { "epoch": 0.02709554144521051, "grad_norm": 0.003647441975772381, "learning_rate": 0.001, "loss": 0.3957, "step": 982 }, { "epoch": 0.02712313364627488, "grad_norm": 0.003412257879972458, "learning_rate": 0.001, "loss": 0.427, "step": 983 }, { "epoch": 0.027150725847339248, "grad_norm": 0.0046013942919671535, "learning_rate": 0.001, "loss": 0.4154, "step": 984 }, { "epoch": 0.027178318048403618, "grad_norm": 0.00263599562458694, "learning_rate": 0.001, "loss": 0.438, "step": 985 }, { "epoch": 0.027205910249467988, "grad_norm": 0.00280582788400352, "learning_rate": 0.001, "loss": 0.3824, "step": 986 }, { "epoch": 0.027233502450532358, "grad_norm": 0.004769660532474518, "learning_rate": 0.001, "loss": 0.3607, "step": 987 }, { "epoch": 0.027261094651596728, "grad_norm": 0.004607087001204491, "learning_rate": 0.001, "loss": 0.4218, "step": 988 }, { "epoch": 0.027288686852661094, "grad_norm": 0.003465674351900816, "learning_rate": 0.001, "loss": 0.4187, "step": 989 }, { "epoch": 0.027316279053725464, "grad_norm": 0.003429507138207555, "learning_rate": 0.001, "loss": 0.3691, "step": 990 }, { "epoch": 0.027343871254789834, "grad_norm": 0.004496248438954353, "learning_rate": 0.001, "loss": 0.4095, "step": 991 }, { "epoch": 0.027371463455854204, "grad_norm": 0.0036759956274181604, "learning_rate": 0.001, "loss": 0.4263, "step": 992 }, { "epoch": 0.027399055656918574, "grad_norm": 0.003747584531083703, "learning_rate": 0.001, "loss": 0.43, "step": 993 }, { "epoch": 0.02742664785798294, "grad_norm": 0.0032122142147272825, "learning_rate": 0.001, "loss": 0.4167, "step": 994 }, { "epoch": 0.02745424005904731, "grad_norm": 0.0035362818744033575, "learning_rate": 0.001, "loss": 0.4097, "step": 995 }, { "epoch": 0.02748183226011168, "grad_norm": 0.002750742482021451, "learning_rate": 0.001, "loss": 0.4027, "step": 996 }, { "epoch": 0.02750942446117605, "grad_norm": 0.004634341225028038, "learning_rate": 0.001, "loss": 0.3845, "step": 997 }, { "epoch": 0.027537016662240416, "grad_norm": 0.0034089027903974056, "learning_rate": 0.001, "loss": 0.4119, "step": 998 }, { "epoch": 0.027564608863304786, "grad_norm": 0.008999533019959927, "learning_rate": 0.001, "loss": 0.3786, "step": 999 }, { "epoch": 0.027592201064369156, "grad_norm": 0.007176951505243778, "learning_rate": 0.001, "loss": 0.3667, "step": 1000 }, { "epoch": 0.027592201064369156, "eval_runtime": 23.9716, "eval_samples_per_second": 1.335, "eval_steps_per_second": 0.167, "step": 1000 }, { "epoch": 0.027619793265433526, "grad_norm": 0.0036988353822380304, "learning_rate": 0.001, "loss": 0.3749, "step": 1001 }, { "epoch": 0.027647385466497896, "grad_norm": 0.0030345297418534756, "learning_rate": 0.001, "loss": 0.4101, "step": 1002 }, { "epoch": 0.027674977667562262, "grad_norm": 0.003801414743065834, "learning_rate": 0.001, "loss": 0.405, "step": 1003 }, { "epoch": 0.027702569868626632, "grad_norm": 0.002585576381534338, "learning_rate": 0.001, "loss": 0.4069, "step": 1004 }, { "epoch": 0.027730162069691002, "grad_norm": 0.002826697425916791, "learning_rate": 0.001, "loss": 0.4051, "step": 1005 }, { "epoch": 0.027757754270755372, "grad_norm": 0.0023764553479850292, "learning_rate": 0.001, "loss": 0.4434, "step": 1006 }, { "epoch": 0.02778534647181974, "grad_norm": 0.002361831720918417, "learning_rate": 0.001, "loss": 0.4271, "step": 1007 }, { "epoch": 0.02781293867288411, "grad_norm": 0.0033377348445355892, "learning_rate": 0.001, "loss": 0.4132, "step": 1008 }, { "epoch": 0.02784053087394848, "grad_norm": 0.004634097684174776, "learning_rate": 0.001, "loss": 0.421, "step": 1009 }, { "epoch": 0.027868123075012848, "grad_norm": 0.0033739793580025434, "learning_rate": 0.001, "loss": 0.398, "step": 1010 }, { "epoch": 0.027895715276077218, "grad_norm": 0.003304282436147332, "learning_rate": 0.001, "loss": 0.4162, "step": 1011 }, { "epoch": 0.027923307477141585, "grad_norm": 0.007290132809430361, "learning_rate": 0.001, "loss": 0.3914, "step": 1012 }, { "epoch": 0.027950899678205954, "grad_norm": 0.008183951489627361, "learning_rate": 0.001, "loss": 0.39, "step": 1013 }, { "epoch": 0.027978491879270324, "grad_norm": 0.012678315863013268, "learning_rate": 0.001, "loss": 0.3956, "step": 1014 }, { "epoch": 0.028006084080334694, "grad_norm": 0.00686604343354702, "learning_rate": 0.001, "loss": 0.4036, "step": 1015 }, { "epoch": 0.028033676281399064, "grad_norm": 0.004761406686156988, "learning_rate": 0.001, "loss": 0.4203, "step": 1016 }, { "epoch": 0.02806126848246343, "grad_norm": 0.004743502475321293, "learning_rate": 0.001, "loss": 0.3745, "step": 1017 }, { "epoch": 0.0280888606835278, "grad_norm": 0.003992531448602676, "learning_rate": 0.001, "loss": 0.3979, "step": 1018 }, { "epoch": 0.02811645288459217, "grad_norm": 0.0027658091858029366, "learning_rate": 0.001, "loss": 0.4219, "step": 1019 }, { "epoch": 0.02814404508565654, "grad_norm": 0.002945561660453677, "learning_rate": 0.001, "loss": 0.3572, "step": 1020 }, { "epoch": 0.028171637286720907, "grad_norm": 0.004641372710466385, "learning_rate": 0.001, "loss": 0.4017, "step": 1021 }, { "epoch": 0.028199229487785277, "grad_norm": 0.0027934517711400986, "learning_rate": 0.001, "loss": 0.4321, "step": 1022 }, { "epoch": 0.028226821688849647, "grad_norm": 0.0028974004089832306, "learning_rate": 0.001, "loss": 0.417, "step": 1023 }, { "epoch": 0.028254413889914017, "grad_norm": 0.0028739396948367357, "learning_rate": 0.001, "loss": 0.3921, "step": 1024 }, { "epoch": 0.028282006090978386, "grad_norm": 0.0029768026433885098, "learning_rate": 0.001, "loss": 0.4339, "step": 1025 }, { "epoch": 0.028309598292042753, "grad_norm": 0.0032322900369763374, "learning_rate": 0.001, "loss": 0.4045, "step": 1026 }, { "epoch": 0.028337190493107123, "grad_norm": 0.0037773947697132826, "learning_rate": 0.001, "loss": 0.3926, "step": 1027 }, { "epoch": 0.028364782694171493, "grad_norm": 0.0037146552931517363, "learning_rate": 0.001, "loss": 0.3743, "step": 1028 }, { "epoch": 0.028392374895235863, "grad_norm": 0.003289048792794347, "learning_rate": 0.001, "loss": 0.4035, "step": 1029 }, { "epoch": 0.02841996709630023, "grad_norm": 0.003675120184198022, "learning_rate": 0.001, "loss": 0.3811, "step": 1030 }, { "epoch": 0.0284475592973646, "grad_norm": 0.002748252125456929, "learning_rate": 0.001, "loss": 0.457, "step": 1031 }, { "epoch": 0.02847515149842897, "grad_norm": 0.0026871277950704098, "learning_rate": 0.001, "loss": 0.4112, "step": 1032 }, { "epoch": 0.02850274369949334, "grad_norm": 0.0031060713808983564, "learning_rate": 0.001, "loss": 0.4268, "step": 1033 }, { "epoch": 0.02853033590055771, "grad_norm": 0.0029715097043663263, "learning_rate": 0.001, "loss": 0.3883, "step": 1034 }, { "epoch": 0.028557928101622075, "grad_norm": 0.004726918879896402, "learning_rate": 0.001, "loss": 0.4021, "step": 1035 }, { "epoch": 0.028585520302686445, "grad_norm": 0.0036529232747852802, "learning_rate": 0.001, "loss": 0.3947, "step": 1036 }, { "epoch": 0.028613112503750815, "grad_norm": 0.003458012593910098, "learning_rate": 0.001, "loss": 0.3919, "step": 1037 }, { "epoch": 0.028640704704815185, "grad_norm": 0.0026079469826072454, "learning_rate": 0.001, "loss": 0.3977, "step": 1038 }, { "epoch": 0.028668296905879555, "grad_norm": 0.004490693099796772, "learning_rate": 0.001, "loss": 0.3769, "step": 1039 }, { "epoch": 0.02869588910694392, "grad_norm": 0.0030477193649858236, "learning_rate": 0.001, "loss": 0.3832, "step": 1040 }, { "epoch": 0.02872348130800829, "grad_norm": 0.0033396866638213396, "learning_rate": 0.001, "loss": 0.4095, "step": 1041 }, { "epoch": 0.02875107350907266, "grad_norm": 0.0030391959007829428, "learning_rate": 0.001, "loss": 0.3874, "step": 1042 }, { "epoch": 0.02877866571013703, "grad_norm": 0.0035509227309376, "learning_rate": 0.001, "loss": 0.4058, "step": 1043 }, { "epoch": 0.028806257911201397, "grad_norm": 0.0039217835292220116, "learning_rate": 0.001, "loss": 0.3737, "step": 1044 }, { "epoch": 0.028833850112265767, "grad_norm": 0.007786846719682217, "learning_rate": 0.001, "loss": 0.3881, "step": 1045 }, { "epoch": 0.028861442313330137, "grad_norm": 0.0030108222272247076, "learning_rate": 0.001, "loss": 0.4365, "step": 1046 }, { "epoch": 0.028889034514394507, "grad_norm": 0.0058325594291090965, "learning_rate": 0.001, "loss": 0.4182, "step": 1047 }, { "epoch": 0.028916626715458877, "grad_norm": 0.0032810927368700504, "learning_rate": 0.001, "loss": 0.38, "step": 1048 }, { "epoch": 0.028944218916523243, "grad_norm": 0.0035062895622104406, "learning_rate": 0.001, "loss": 0.3968, "step": 1049 }, { "epoch": 0.028971811117587613, "grad_norm": 0.003582380712032318, "learning_rate": 0.001, "loss": 0.4174, "step": 1050 }, { "epoch": 0.028999403318651983, "grad_norm": 0.003514527576044202, "learning_rate": 0.001, "loss": 0.4019, "step": 1051 }, { "epoch": 0.029026995519716353, "grad_norm": 0.003413026686757803, "learning_rate": 0.001, "loss": 0.3962, "step": 1052 }, { "epoch": 0.02905458772078072, "grad_norm": 0.0038671360816806555, "learning_rate": 0.001, "loss": 0.4344, "step": 1053 }, { "epoch": 0.02908217992184509, "grad_norm": 0.004694768693298101, "learning_rate": 0.001, "loss": 0.3934, "step": 1054 }, { "epoch": 0.02910977212290946, "grad_norm": 0.004935418255627155, "learning_rate": 0.001, "loss": 0.3878, "step": 1055 }, { "epoch": 0.02913736432397383, "grad_norm": 0.0034861708991229534, "learning_rate": 0.001, "loss": 0.3961, "step": 1056 }, { "epoch": 0.0291649565250382, "grad_norm": 0.00353567604906857, "learning_rate": 0.001, "loss": 0.3977, "step": 1057 }, { "epoch": 0.029192548726102566, "grad_norm": 0.0043948497623205185, "learning_rate": 0.001, "loss": 0.4041, "step": 1058 }, { "epoch": 0.029220140927166936, "grad_norm": 0.00446993438526988, "learning_rate": 0.001, "loss": 0.4099, "step": 1059 }, { "epoch": 0.029247733128231305, "grad_norm": 0.003901879768818617, "learning_rate": 0.001, "loss": 0.4197, "step": 1060 }, { "epoch": 0.029275325329295675, "grad_norm": 0.004764191340655088, "learning_rate": 0.001, "loss": 0.3927, "step": 1061 }, { "epoch": 0.029302917530360045, "grad_norm": 0.0031485301442444324, "learning_rate": 0.001, "loss": 0.4176, "step": 1062 }, { "epoch": 0.029330509731424412, "grad_norm": 0.004180070944130421, "learning_rate": 0.001, "loss": 0.3713, "step": 1063 }, { "epoch": 0.02935810193248878, "grad_norm": 0.005659409333020449, "learning_rate": 0.001, "loss": 0.3968, "step": 1064 }, { "epoch": 0.02938569413355315, "grad_norm": 0.0038438751362264156, "learning_rate": 0.001, "loss": 0.4278, "step": 1065 }, { "epoch": 0.02941328633461752, "grad_norm": 0.00400773249566555, "learning_rate": 0.001, "loss": 0.4241, "step": 1066 }, { "epoch": 0.029440878535681888, "grad_norm": 0.005502818617969751, "learning_rate": 0.001, "loss": 0.4205, "step": 1067 }, { "epoch": 0.029468470736746258, "grad_norm": 0.003952248953282833, "learning_rate": 0.001, "loss": 0.4407, "step": 1068 }, { "epoch": 0.029496062937810628, "grad_norm": 0.011100285686552525, "learning_rate": 0.001, "loss": 0.3699, "step": 1069 }, { "epoch": 0.029523655138874998, "grad_norm": 0.00686876242980361, "learning_rate": 0.001, "loss": 0.3705, "step": 1070 }, { "epoch": 0.029551247339939368, "grad_norm": 0.007713994476944208, "learning_rate": 0.001, "loss": 0.3842, "step": 1071 }, { "epoch": 0.029578839541003734, "grad_norm": 0.009867096319794655, "learning_rate": 0.001, "loss": 0.3383, "step": 1072 }, { "epoch": 0.029606431742068104, "grad_norm": 0.004925290122628212, "learning_rate": 0.001, "loss": 0.3811, "step": 1073 }, { "epoch": 0.029634023943132474, "grad_norm": 0.004095774609595537, "learning_rate": 0.001, "loss": 0.3824, "step": 1074 }, { "epoch": 0.029661616144196844, "grad_norm": 0.0056510199792683125, "learning_rate": 0.001, "loss": 0.386, "step": 1075 }, { "epoch": 0.029689208345261214, "grad_norm": 0.003869240405037999, "learning_rate": 0.001, "loss": 0.4113, "step": 1076 }, { "epoch": 0.02971680054632558, "grad_norm": 0.0034619278740137815, "learning_rate": 0.001, "loss": 0.392, "step": 1077 }, { "epoch": 0.02974439274738995, "grad_norm": 0.003166050184518099, "learning_rate": 0.001, "loss": 0.3854, "step": 1078 }, { "epoch": 0.02977198494845432, "grad_norm": 0.004651426337659359, "learning_rate": 0.001, "loss": 0.4369, "step": 1079 }, { "epoch": 0.02979957714951869, "grad_norm": 0.00306075275875628, "learning_rate": 0.001, "loss": 0.3843, "step": 1080 }, { "epoch": 0.029827169350583056, "grad_norm": 0.0034789969213306904, "learning_rate": 0.001, "loss": 0.4118, "step": 1081 }, { "epoch": 0.029854761551647426, "grad_norm": 0.004120729863643646, "learning_rate": 0.001, "loss": 0.3697, "step": 1082 }, { "epoch": 0.029882353752711796, "grad_norm": 0.005171756725758314, "learning_rate": 0.001, "loss": 0.4336, "step": 1083 }, { "epoch": 0.029909945953776166, "grad_norm": 0.00312454323284328, "learning_rate": 0.001, "loss": 0.4105, "step": 1084 }, { "epoch": 0.029937538154840536, "grad_norm": 0.003719372907653451, "learning_rate": 0.001, "loss": 0.3919, "step": 1085 }, { "epoch": 0.029965130355904902, "grad_norm": 0.003698839107528329, "learning_rate": 0.001, "loss": 0.3745, "step": 1086 }, { "epoch": 0.029992722556969272, "grad_norm": 0.0035393829457461834, "learning_rate": 0.001, "loss": 0.3922, "step": 1087 }, { "epoch": 0.030020314758033642, "grad_norm": 0.004070476163178682, "learning_rate": 0.001, "loss": 0.3524, "step": 1088 }, { "epoch": 0.030047906959098012, "grad_norm": 0.004007712937891483, "learning_rate": 0.001, "loss": 0.4154, "step": 1089 }, { "epoch": 0.03007549916016238, "grad_norm": 0.0035324685741215944, "learning_rate": 0.001, "loss": 0.3845, "step": 1090 }, { "epoch": 0.03010309136122675, "grad_norm": 0.007462177891284227, "learning_rate": 0.001, "loss": 0.4082, "step": 1091 }, { "epoch": 0.03013068356229112, "grad_norm": 0.003764525754377246, "learning_rate": 0.001, "loss": 0.4065, "step": 1092 }, { "epoch": 0.030158275763355488, "grad_norm": 0.0034473277628421783, "learning_rate": 0.001, "loss": 0.4106, "step": 1093 }, { "epoch": 0.030185867964419858, "grad_norm": 0.0025921028573065996, "learning_rate": 0.001, "loss": 0.4125, "step": 1094 }, { "epoch": 0.030213460165484225, "grad_norm": 0.004729332402348518, "learning_rate": 0.001, "loss": 0.3841, "step": 1095 }, { "epoch": 0.030241052366548594, "grad_norm": 0.0037650500889867544, "learning_rate": 0.001, "loss": 0.3903, "step": 1096 }, { "epoch": 0.030268644567612964, "grad_norm": 0.0038791224360466003, "learning_rate": 0.001, "loss": 0.4233, "step": 1097 }, { "epoch": 0.030296236768677334, "grad_norm": 0.004454473964869976, "learning_rate": 0.001, "loss": 0.3714, "step": 1098 }, { "epoch": 0.030323828969741704, "grad_norm": 0.0029884730465710163, "learning_rate": 0.001, "loss": 0.4107, "step": 1099 }, { "epoch": 0.03035142117080607, "grad_norm": 0.0049886396154761314, "learning_rate": 0.001, "loss": 0.3922, "step": 1100 }, { "epoch": 0.03037901337187044, "grad_norm": 0.0025764929596334696, "learning_rate": 0.001, "loss": 0.4182, "step": 1101 }, { "epoch": 0.03040660557293481, "grad_norm": 0.0029421220533549786, "learning_rate": 0.001, "loss": 0.4079, "step": 1102 }, { "epoch": 0.03043419777399918, "grad_norm": 0.014296631328761578, "learning_rate": 0.001, "loss": 0.3668, "step": 1103 }, { "epoch": 0.030461789975063547, "grad_norm": 0.0035509790759533644, "learning_rate": 0.001, "loss": 0.3593, "step": 1104 }, { "epoch": 0.030489382176127917, "grad_norm": 0.0033370009623467922, "learning_rate": 0.001, "loss": 0.4534, "step": 1105 }, { "epoch": 0.030516974377192287, "grad_norm": 0.004134595859795809, "learning_rate": 0.001, "loss": 0.3947, "step": 1106 }, { "epoch": 0.030544566578256657, "grad_norm": 0.005164284259080887, "learning_rate": 0.001, "loss": 0.3831, "step": 1107 }, { "epoch": 0.030572158779321026, "grad_norm": 0.004688777029514313, "learning_rate": 0.001, "loss": 0.3772, "step": 1108 }, { "epoch": 0.030599750980385393, "grad_norm": 0.0034766956232488155, "learning_rate": 0.001, "loss": 0.4172, "step": 1109 }, { "epoch": 0.030627343181449763, "grad_norm": 0.0031806135084480047, "learning_rate": 0.001, "loss": 0.3803, "step": 1110 }, { "epoch": 0.030654935382514133, "grad_norm": 0.0031535644084215164, "learning_rate": 0.001, "loss": 0.4076, "step": 1111 }, { "epoch": 0.030682527583578503, "grad_norm": 0.004275255836546421, "learning_rate": 0.001, "loss": 0.397, "step": 1112 }, { "epoch": 0.03071011978464287, "grad_norm": 0.002956011099740863, "learning_rate": 0.001, "loss": 0.415, "step": 1113 }, { "epoch": 0.03073771198570724, "grad_norm": 0.002727919491007924, "learning_rate": 0.001, "loss": 0.4248, "step": 1114 }, { "epoch": 0.03076530418677161, "grad_norm": 0.0037477565929293633, "learning_rate": 0.001, "loss": 0.3827, "step": 1115 }, { "epoch": 0.03079289638783598, "grad_norm": 0.002299915300682187, "learning_rate": 0.001, "loss": 0.4314, "step": 1116 }, { "epoch": 0.03082048858890035, "grad_norm": 0.0031318028923124075, "learning_rate": 0.001, "loss": 0.4159, "step": 1117 }, { "epoch": 0.030848080789964715, "grad_norm": 0.004523344803601503, "learning_rate": 0.001, "loss": 0.3997, "step": 1118 }, { "epoch": 0.030875672991029085, "grad_norm": 0.0020371493883430958, "learning_rate": 0.001, "loss": 0.4182, "step": 1119 }, { "epoch": 0.030903265192093455, "grad_norm": 0.0022572767920792103, "learning_rate": 0.001, "loss": 0.4066, "step": 1120 }, { "epoch": 0.030930857393157825, "grad_norm": 0.0032437213230878115, "learning_rate": 0.001, "loss": 0.3895, "step": 1121 }, { "epoch": 0.030958449594222195, "grad_norm": 0.004357442259788513, "learning_rate": 0.001, "loss": 0.3938, "step": 1122 }, { "epoch": 0.03098604179528656, "grad_norm": 0.0027816223446279764, "learning_rate": 0.001, "loss": 0.3732, "step": 1123 }, { "epoch": 0.03101363399635093, "grad_norm": 0.002367631997913122, "learning_rate": 0.001, "loss": 0.4242, "step": 1124 }, { "epoch": 0.0310412261974153, "grad_norm": 0.007127678487449884, "learning_rate": 0.001, "loss": 0.4038, "step": 1125 }, { "epoch": 0.03106881839847967, "grad_norm": 0.003470206633210182, "learning_rate": 0.001, "loss": 0.4224, "step": 1126 }, { "epoch": 0.031096410599544037, "grad_norm": 0.003238279139623046, "learning_rate": 0.001, "loss": 0.4325, "step": 1127 }, { "epoch": 0.031124002800608407, "grad_norm": 0.0037913359701633453, "learning_rate": 0.001, "loss": 0.4244, "step": 1128 }, { "epoch": 0.031151595001672777, "grad_norm": 0.006213251501321793, "learning_rate": 0.001, "loss": 0.4206, "step": 1129 }, { "epoch": 0.031179187202737147, "grad_norm": 0.004787992220371962, "learning_rate": 0.001, "loss": 0.3691, "step": 1130 }, { "epoch": 0.031206779403801517, "grad_norm": 0.0029551470652222633, "learning_rate": 0.001, "loss": 0.3784, "step": 1131 }, { "epoch": 0.031234371604865883, "grad_norm": 0.0033430701587349176, "learning_rate": 0.001, "loss": 0.4101, "step": 1132 }, { "epoch": 0.03126196380593026, "grad_norm": 0.0027807389851659536, "learning_rate": 0.001, "loss": 0.397, "step": 1133 }, { "epoch": 0.03128955600699462, "grad_norm": 0.003867300460115075, "learning_rate": 0.001, "loss": 0.4063, "step": 1134 }, { "epoch": 0.03131714820805899, "grad_norm": 0.0034717791713774204, "learning_rate": 0.001, "loss": 0.3979, "step": 1135 }, { "epoch": 0.03134474040912336, "grad_norm": 0.0047624786384403706, "learning_rate": 0.001, "loss": 0.415, "step": 1136 }, { "epoch": 0.03137233261018773, "grad_norm": 0.0029204641468822956, "learning_rate": 0.001, "loss": 0.4426, "step": 1137 }, { "epoch": 0.0313999248112521, "grad_norm": 0.0029500045347958803, "learning_rate": 0.001, "loss": 0.4429, "step": 1138 }, { "epoch": 0.03142751701231647, "grad_norm": 0.0030622980557382107, "learning_rate": 0.001, "loss": 0.3874, "step": 1139 }, { "epoch": 0.031455109213380836, "grad_norm": 0.006081267725676298, "learning_rate": 0.001, "loss": 0.4088, "step": 1140 }, { "epoch": 0.03148270141444521, "grad_norm": 0.0026580330450087786, "learning_rate": 0.001, "loss": 0.4361, "step": 1141 }, { "epoch": 0.031510293615509576, "grad_norm": 0.0036745185498148203, "learning_rate": 0.001, "loss": 0.3995, "step": 1142 }, { "epoch": 0.03153788581657394, "grad_norm": 0.0035352655686438084, "learning_rate": 0.001, "loss": 0.3758, "step": 1143 }, { "epoch": 0.031565478017638315, "grad_norm": 0.005509037058800459, "learning_rate": 0.001, "loss": 0.4058, "step": 1144 }, { "epoch": 0.03159307021870268, "grad_norm": 0.0026996792294085026, "learning_rate": 0.001, "loss": 0.4276, "step": 1145 }, { "epoch": 0.031620662419767055, "grad_norm": 0.0030703977681696415, "learning_rate": 0.001, "loss": 0.4071, "step": 1146 }, { "epoch": 0.03164825462083142, "grad_norm": 0.004798520356416702, "learning_rate": 0.001, "loss": 0.3623, "step": 1147 }, { "epoch": 0.03167584682189579, "grad_norm": 0.0030252067372202873, "learning_rate": 0.001, "loss": 0.4195, "step": 1148 }, { "epoch": 0.03170343902296016, "grad_norm": 0.0031654182821512222, "learning_rate": 0.001, "loss": 0.4016, "step": 1149 }, { "epoch": 0.03173103122402453, "grad_norm": 0.005452923942357302, "learning_rate": 0.001, "loss": 0.4021, "step": 1150 }, { "epoch": 0.0317586234250889, "grad_norm": 0.004767664708197117, "learning_rate": 0.001, "loss": 0.3967, "step": 1151 }, { "epoch": 0.03178621562615327, "grad_norm": 0.0034988594707101583, "learning_rate": 0.001, "loss": 0.3939, "step": 1152 }, { "epoch": 0.031813807827217634, "grad_norm": 0.0034391777589917183, "learning_rate": 0.001, "loss": 0.407, "step": 1153 }, { "epoch": 0.03184140002828201, "grad_norm": 0.003413598518818617, "learning_rate": 0.001, "loss": 0.4151, "step": 1154 }, { "epoch": 0.031868992229346374, "grad_norm": 0.00447838706895709, "learning_rate": 0.001, "loss": 0.4024, "step": 1155 }, { "epoch": 0.03189658443041075, "grad_norm": 0.002987177576869726, "learning_rate": 0.001, "loss": 0.385, "step": 1156 }, { "epoch": 0.031924176631475114, "grad_norm": 0.002857605693861842, "learning_rate": 0.001, "loss": 0.3813, "step": 1157 }, { "epoch": 0.03195176883253948, "grad_norm": 0.0050466121174395084, "learning_rate": 0.001, "loss": 0.4231, "step": 1158 }, { "epoch": 0.031979361033603854, "grad_norm": 0.003727944800630212, "learning_rate": 0.001, "loss": 0.4096, "step": 1159 }, { "epoch": 0.03200695323466822, "grad_norm": 0.003613363951444626, "learning_rate": 0.001, "loss": 0.408, "step": 1160 }, { "epoch": 0.03203454543573259, "grad_norm": 0.003519849618896842, "learning_rate": 0.001, "loss": 0.4054, "step": 1161 }, { "epoch": 0.03206213763679696, "grad_norm": 0.005940241273492575, "learning_rate": 0.001, "loss": 0.423, "step": 1162 }, { "epoch": 0.032089729837861326, "grad_norm": 0.0036697194445878267, "learning_rate": 0.001, "loss": 0.4025, "step": 1163 }, { "epoch": 0.0321173220389257, "grad_norm": 0.0032535314094275236, "learning_rate": 0.001, "loss": 0.3744, "step": 1164 }, { "epoch": 0.032144914239990066, "grad_norm": 0.0032929456792771816, "learning_rate": 0.001, "loss": 0.435, "step": 1165 }, { "epoch": 0.03217250644105443, "grad_norm": 0.0031699042301625013, "learning_rate": 0.001, "loss": 0.3564, "step": 1166 }, { "epoch": 0.032200098642118806, "grad_norm": 0.004080107901245356, "learning_rate": 0.001, "loss": 0.4004, "step": 1167 }, { "epoch": 0.03222769084318317, "grad_norm": 0.0031074616126716137, "learning_rate": 0.001, "loss": 0.4407, "step": 1168 }, { "epoch": 0.032255283044247546, "grad_norm": 0.004778381437063217, "learning_rate": 0.001, "loss": 0.4701, "step": 1169 }, { "epoch": 0.03228287524531191, "grad_norm": 0.004103434272110462, "learning_rate": 0.001, "loss": 0.427, "step": 1170 }, { "epoch": 0.03231046744637628, "grad_norm": 0.0030324216932058334, "learning_rate": 0.001, "loss": 0.4087, "step": 1171 }, { "epoch": 0.03233805964744065, "grad_norm": 0.0036339950747787952, "learning_rate": 0.001, "loss": 0.3929, "step": 1172 }, { "epoch": 0.03236565184850502, "grad_norm": 0.008613920770585537, "learning_rate": 0.001, "loss": 0.4442, "step": 1173 }, { "epoch": 0.03239324404956939, "grad_norm": 0.014312573708593845, "learning_rate": 0.001, "loss": 0.383, "step": 1174 }, { "epoch": 0.03242083625063376, "grad_norm": 0.00535425404086709, "learning_rate": 0.001, "loss": 0.398, "step": 1175 }, { "epoch": 0.032448428451698125, "grad_norm": 0.007150169927626848, "learning_rate": 0.001, "loss": 0.3847, "step": 1176 }, { "epoch": 0.0324760206527625, "grad_norm": 0.0030312181916087866, "learning_rate": 0.001, "loss": 0.4539, "step": 1177 }, { "epoch": 0.032503612853826865, "grad_norm": 0.003885834477841854, "learning_rate": 0.001, "loss": 0.3887, "step": 1178 }, { "epoch": 0.03253120505489124, "grad_norm": 0.0037573217414319515, "learning_rate": 0.001, "loss": 0.3754, "step": 1179 }, { "epoch": 0.032558797255955604, "grad_norm": 0.015517042018473148, "learning_rate": 0.001, "loss": 0.3809, "step": 1180 }, { "epoch": 0.03258638945701997, "grad_norm": 0.004123013466596603, "learning_rate": 0.001, "loss": 0.4034, "step": 1181 }, { "epoch": 0.032613981658084344, "grad_norm": 0.00358961196616292, "learning_rate": 0.001, "loss": 0.4251, "step": 1182 }, { "epoch": 0.03264157385914871, "grad_norm": 0.002588262315839529, "learning_rate": 0.001, "loss": 0.4649, "step": 1183 }, { "epoch": 0.032669166060213084, "grad_norm": 0.0033034030348062515, "learning_rate": 0.001, "loss": 0.4165, "step": 1184 }, { "epoch": 0.03269675826127745, "grad_norm": 0.0026907999999821186, "learning_rate": 0.001, "loss": 0.435, "step": 1185 }, { "epoch": 0.03272435046234182, "grad_norm": 0.0029784864746034145, "learning_rate": 0.001, "loss": 0.4088, "step": 1186 }, { "epoch": 0.03275194266340619, "grad_norm": 0.004101334605365992, "learning_rate": 0.001, "loss": 0.4322, "step": 1187 }, { "epoch": 0.03277953486447056, "grad_norm": 0.008756603114306927, "learning_rate": 0.001, "loss": 0.3735, "step": 1188 }, { "epoch": 0.03280712706553492, "grad_norm": 0.005191429052501917, "learning_rate": 0.001, "loss": 0.4222, "step": 1189 }, { "epoch": 0.032834719266599297, "grad_norm": 0.00770029379054904, "learning_rate": 0.001, "loss": 0.3631, "step": 1190 }, { "epoch": 0.03286231146766366, "grad_norm": 0.005141628440469503, "learning_rate": 0.001, "loss": 0.4402, "step": 1191 }, { "epoch": 0.032889903668728036, "grad_norm": 0.004422733094543219, "learning_rate": 0.001, "loss": 0.3201, "step": 1192 }, { "epoch": 0.0329174958697924, "grad_norm": 0.0060617136768996716, "learning_rate": 0.001, "loss": 0.4299, "step": 1193 }, { "epoch": 0.03294508807085677, "grad_norm": 0.007132702972739935, "learning_rate": 0.001, "loss": 0.4178, "step": 1194 }, { "epoch": 0.03297268027192114, "grad_norm": 0.006239313166588545, "learning_rate": 0.001, "loss": 0.4001, "step": 1195 }, { "epoch": 0.03300027247298551, "grad_norm": 0.00840328261256218, "learning_rate": 0.001, "loss": 0.4236, "step": 1196 }, { "epoch": 0.03302786467404988, "grad_norm": 0.004313162062317133, "learning_rate": 0.001, "loss": 0.3821, "step": 1197 }, { "epoch": 0.03305545687511425, "grad_norm": 0.004403871949762106, "learning_rate": 0.001, "loss": 0.4372, "step": 1198 }, { "epoch": 0.033083049076178615, "grad_norm": 0.005073420237749815, "learning_rate": 0.001, "loss": 0.3998, "step": 1199 }, { "epoch": 0.03311064127724299, "grad_norm": 0.00479225255548954, "learning_rate": 0.001, "loss": 0.398, "step": 1200 }, { "epoch": 0.033138233478307355, "grad_norm": 0.003564857877790928, "learning_rate": 0.001, "loss": 0.4359, "step": 1201 }, { "epoch": 0.03316582567937173, "grad_norm": 0.004907668102532625, "learning_rate": 0.001, "loss": 0.3734, "step": 1202 }, { "epoch": 0.033193417880436095, "grad_norm": 0.003451892174780369, "learning_rate": 0.001, "loss": 0.3853, "step": 1203 }, { "epoch": 0.03322101008150046, "grad_norm": 0.0029975506477057934, "learning_rate": 0.001, "loss": 0.3931, "step": 1204 }, { "epoch": 0.033248602282564835, "grad_norm": 0.005138040985912085, "learning_rate": 0.001, "loss": 0.3977, "step": 1205 }, { "epoch": 0.0332761944836292, "grad_norm": 0.004696134477853775, "learning_rate": 0.001, "loss": 0.3863, "step": 1206 }, { "epoch": 0.033303786684693575, "grad_norm": 0.005408620461821556, "learning_rate": 0.001, "loss": 0.3861, "step": 1207 }, { "epoch": 0.03333137888575794, "grad_norm": 0.004990086425095797, "learning_rate": 0.001, "loss": 0.408, "step": 1208 }, { "epoch": 0.03335897108682231, "grad_norm": 0.003546757623553276, "learning_rate": 0.001, "loss": 0.4335, "step": 1209 }, { "epoch": 0.03338656328788668, "grad_norm": 0.003440044354647398, "learning_rate": 0.001, "loss": 0.4024, "step": 1210 }, { "epoch": 0.03341415548895105, "grad_norm": 0.005536787211894989, "learning_rate": 0.001, "loss": 0.3986, "step": 1211 }, { "epoch": 0.033441747690015414, "grad_norm": 0.005742164328694344, "learning_rate": 0.001, "loss": 0.386, "step": 1212 }, { "epoch": 0.03346933989107979, "grad_norm": 0.009792082943022251, "learning_rate": 0.001, "loss": 0.3656, "step": 1213 }, { "epoch": 0.033496932092144154, "grad_norm": 0.004209910985082388, "learning_rate": 0.001, "loss": 0.3601, "step": 1214 }, { "epoch": 0.03352452429320853, "grad_norm": 0.0035643631126731634, "learning_rate": 0.001, "loss": 0.4034, "step": 1215 }, { "epoch": 0.03355211649427289, "grad_norm": 0.0032242017332464457, "learning_rate": 0.001, "loss": 0.4123, "step": 1216 }, { "epoch": 0.03357970869533726, "grad_norm": 0.004148339852690697, "learning_rate": 0.001, "loss": 0.4498, "step": 1217 }, { "epoch": 0.03360730089640163, "grad_norm": 0.004815380088984966, "learning_rate": 0.001, "loss": 0.4287, "step": 1218 }, { "epoch": 0.033634893097466, "grad_norm": 0.0036340258084237576, "learning_rate": 0.001, "loss": 0.4204, "step": 1219 }, { "epoch": 0.03366248529853037, "grad_norm": 0.0031413130927830935, "learning_rate": 0.001, "loss": 0.4011, "step": 1220 }, { "epoch": 0.03369007749959474, "grad_norm": 0.005375964101403952, "learning_rate": 0.001, "loss": 0.3973, "step": 1221 }, { "epoch": 0.033717669700659106, "grad_norm": 0.0039574578404426575, "learning_rate": 0.001, "loss": 0.3736, "step": 1222 }, { "epoch": 0.03374526190172348, "grad_norm": 0.0040151095017790794, "learning_rate": 0.001, "loss": 0.4118, "step": 1223 }, { "epoch": 0.033772854102787846, "grad_norm": 0.005083186086267233, "learning_rate": 0.001, "loss": 0.411, "step": 1224 }, { "epoch": 0.03380044630385222, "grad_norm": 0.00605596462264657, "learning_rate": 0.001, "loss": 0.3944, "step": 1225 }, { "epoch": 0.033828038504916585, "grad_norm": 0.0039448305033147335, "learning_rate": 0.001, "loss": 0.4024, "step": 1226 }, { "epoch": 0.03385563070598095, "grad_norm": 0.012280398979783058, "learning_rate": 0.001, "loss": 0.377, "step": 1227 }, { "epoch": 0.033883222907045325, "grad_norm": 0.049376230686903, "learning_rate": 0.001, "loss": 0.3919, "step": 1228 }, { "epoch": 0.03391081510810969, "grad_norm": 0.0036013920325785875, "learning_rate": 0.001, "loss": 0.4092, "step": 1229 }, { "epoch": 0.033938407309174065, "grad_norm": 0.0094405896961689, "learning_rate": 0.001, "loss": 0.3937, "step": 1230 }, { "epoch": 0.03396599951023843, "grad_norm": 0.007406734395772219, "learning_rate": 0.001, "loss": 0.3865, "step": 1231 }, { "epoch": 0.0339935917113028, "grad_norm": 0.004268816206604242, "learning_rate": 0.001, "loss": 0.3914, "step": 1232 }, { "epoch": 0.03402118391236717, "grad_norm": 0.003923129290342331, "learning_rate": 0.001, "loss": 0.3959, "step": 1233 }, { "epoch": 0.03404877611343154, "grad_norm": 0.003082707989960909, "learning_rate": 0.001, "loss": 0.3886, "step": 1234 }, { "epoch": 0.03407636831449591, "grad_norm": 0.004590165335685015, "learning_rate": 0.001, "loss": 0.3789, "step": 1235 }, { "epoch": 0.03410396051556028, "grad_norm": 0.003626961726695299, "learning_rate": 0.001, "loss": 0.4374, "step": 1236 }, { "epoch": 0.034131552716624644, "grad_norm": 0.003703797934576869, "learning_rate": 0.001, "loss": 0.382, "step": 1237 }, { "epoch": 0.03415914491768902, "grad_norm": 0.005130970384925604, "learning_rate": 0.001, "loss": 0.3956, "step": 1238 }, { "epoch": 0.034186737118753384, "grad_norm": 0.0035557232331484556, "learning_rate": 0.001, "loss": 0.4152, "step": 1239 }, { "epoch": 0.03421432931981775, "grad_norm": 0.0043634334579110146, "learning_rate": 0.001, "loss": 0.4072, "step": 1240 }, { "epoch": 0.034241921520882124, "grad_norm": 0.006564748473465443, "learning_rate": 0.001, "loss": 0.3998, "step": 1241 }, { "epoch": 0.03426951372194649, "grad_norm": 0.0034478590823709965, "learning_rate": 0.001, "loss": 0.3809, "step": 1242 }, { "epoch": 0.034297105923010864, "grad_norm": 0.003941735252737999, "learning_rate": 0.001, "loss": 0.3988, "step": 1243 }, { "epoch": 0.03432469812407523, "grad_norm": 0.004107107874006033, "learning_rate": 0.001, "loss": 0.3597, "step": 1244 }, { "epoch": 0.034352290325139596, "grad_norm": 0.0032025808468461037, "learning_rate": 0.001, "loss": 0.3771, "step": 1245 }, { "epoch": 0.03437988252620397, "grad_norm": 0.0033102971501648426, "learning_rate": 0.001, "loss": 0.4277, "step": 1246 }, { "epoch": 0.034407474727268336, "grad_norm": 0.00529972231015563, "learning_rate": 0.001, "loss": 0.4127, "step": 1247 }, { "epoch": 0.03443506692833271, "grad_norm": 0.004502330906689167, "learning_rate": 0.001, "loss": 0.4212, "step": 1248 }, { "epoch": 0.034462659129397076, "grad_norm": 0.0027463252190500498, "learning_rate": 0.001, "loss": 0.4051, "step": 1249 }, { "epoch": 0.03449025133046144, "grad_norm": 0.0033640682231634855, "learning_rate": 0.001, "loss": 0.388, "step": 1250 }, { "epoch": 0.034517843531525816, "grad_norm": 0.007285924628376961, "learning_rate": 0.001, "loss": 0.413, "step": 1251 }, { "epoch": 0.03454543573259018, "grad_norm": 0.004217895213514566, "learning_rate": 0.001, "loss": 0.4238, "step": 1252 }, { "epoch": 0.034573027933654556, "grad_norm": 0.0027172528207302094, "learning_rate": 0.001, "loss": 0.3733, "step": 1253 }, { "epoch": 0.03460062013471892, "grad_norm": 0.0033437691163271666, "learning_rate": 0.001, "loss": 0.412, "step": 1254 }, { "epoch": 0.03462821233578329, "grad_norm": 0.003804217092692852, "learning_rate": 0.001, "loss": 0.4009, "step": 1255 }, { "epoch": 0.03465580453684766, "grad_norm": 0.0027781634125858545, "learning_rate": 0.001, "loss": 0.456, "step": 1256 }, { "epoch": 0.03468339673791203, "grad_norm": 0.003424674505367875, "learning_rate": 0.001, "loss": 0.3924, "step": 1257 }, { "epoch": 0.0347109889389764, "grad_norm": 0.003052354324609041, "learning_rate": 0.001, "loss": 0.4331, "step": 1258 }, { "epoch": 0.03473858114004077, "grad_norm": 0.004982203710824251, "learning_rate": 0.001, "loss": 0.3557, "step": 1259 }, { "epoch": 0.034766173341105135, "grad_norm": 0.003158049425110221, "learning_rate": 0.001, "loss": 0.4131, "step": 1260 }, { "epoch": 0.03479376554216951, "grad_norm": 0.0035426870454102755, "learning_rate": 0.001, "loss": 0.3496, "step": 1261 }, { "epoch": 0.034821357743233874, "grad_norm": 0.0026156532112509012, "learning_rate": 0.001, "loss": 0.4396, "step": 1262 }, { "epoch": 0.03484894994429824, "grad_norm": 0.0027896466199308634, "learning_rate": 0.001, "loss": 0.4073, "step": 1263 }, { "epoch": 0.034876542145362614, "grad_norm": 0.002534053521230817, "learning_rate": 0.001, "loss": 0.379, "step": 1264 }, { "epoch": 0.03490413434642698, "grad_norm": 0.005910890176892281, "learning_rate": 0.001, "loss": 0.3777, "step": 1265 }, { "epoch": 0.034931726547491354, "grad_norm": 0.004372291266918182, "learning_rate": 0.001, "loss": 0.4066, "step": 1266 }, { "epoch": 0.03495931874855572, "grad_norm": 0.0036974658723920584, "learning_rate": 0.001, "loss": 0.4099, "step": 1267 }, { "epoch": 0.03498691094962009, "grad_norm": 0.04189533367753029, "learning_rate": 0.001, "loss": 0.4203, "step": 1268 }, { "epoch": 0.03501450315068446, "grad_norm": 0.0037905664648860693, "learning_rate": 0.001, "loss": 0.4287, "step": 1269 }, { "epoch": 0.03504209535174883, "grad_norm": 0.004496126435697079, "learning_rate": 0.001, "loss": 0.3499, "step": 1270 }, { "epoch": 0.0350696875528132, "grad_norm": 0.002956201322376728, "learning_rate": 0.001, "loss": 0.4148, "step": 1271 }, { "epoch": 0.03509727975387757, "grad_norm": 0.004545163828879595, "learning_rate": 0.001, "loss": 0.399, "step": 1272 }, { "epoch": 0.03512487195494193, "grad_norm": 0.005399242043495178, "learning_rate": 0.001, "loss": 0.409, "step": 1273 }, { "epoch": 0.035152464156006306, "grad_norm": 0.003836257616057992, "learning_rate": 0.001, "loss": 0.3758, "step": 1274 }, { "epoch": 0.03518005635707067, "grad_norm": 0.0035388548858463764, "learning_rate": 0.001, "loss": 0.3961, "step": 1275 }, { "epoch": 0.035207648558135046, "grad_norm": 0.009006824344396591, "learning_rate": 0.001, "loss": 0.4311, "step": 1276 }, { "epoch": 0.03523524075919941, "grad_norm": 0.006458511110395193, "learning_rate": 0.001, "loss": 0.4337, "step": 1277 }, { "epoch": 0.03526283296026378, "grad_norm": 0.0033469260670244694, "learning_rate": 0.001, "loss": 0.4188, "step": 1278 }, { "epoch": 0.03529042516132815, "grad_norm": 0.003662231145426631, "learning_rate": 0.001, "loss": 0.3961, "step": 1279 }, { "epoch": 0.03531801736239252, "grad_norm": 0.004838781896978617, "learning_rate": 0.001, "loss": 0.4223, "step": 1280 }, { "epoch": 0.03534560956345689, "grad_norm": 0.003931723535060883, "learning_rate": 0.001, "loss": 0.3892, "step": 1281 }, { "epoch": 0.03537320176452126, "grad_norm": 0.0037028163205832243, "learning_rate": 0.001, "loss": 0.4293, "step": 1282 }, { "epoch": 0.035400793965585625, "grad_norm": 0.00402813870459795, "learning_rate": 0.001, "loss": 0.4242, "step": 1283 }, { "epoch": 0.03542838616665, "grad_norm": 0.0033616770524531603, "learning_rate": 0.001, "loss": 0.3881, "step": 1284 }, { "epoch": 0.035455978367714365, "grad_norm": 0.002920625265687704, "learning_rate": 0.001, "loss": 0.4269, "step": 1285 }, { "epoch": 0.03548357056877873, "grad_norm": 0.007799847517162561, "learning_rate": 0.001, "loss": 0.3389, "step": 1286 }, { "epoch": 0.035511162769843105, "grad_norm": 0.0034114914014935493, "learning_rate": 0.001, "loss": 0.3903, "step": 1287 }, { "epoch": 0.03553875497090747, "grad_norm": 0.0037257294170558453, "learning_rate": 0.001, "loss": 0.4003, "step": 1288 }, { "epoch": 0.035566347171971845, "grad_norm": 0.0022869498934596777, "learning_rate": 0.001, "loss": 0.4179, "step": 1289 }, { "epoch": 0.03559393937303621, "grad_norm": 0.006633399520069361, "learning_rate": 0.001, "loss": 0.3713, "step": 1290 }, { "epoch": 0.03562153157410058, "grad_norm": 0.004205191507935524, "learning_rate": 0.001, "loss": 0.4066, "step": 1291 }, { "epoch": 0.03564912377516495, "grad_norm": 0.0037389290519058704, "learning_rate": 0.001, "loss": 0.3952, "step": 1292 }, { "epoch": 0.03567671597622932, "grad_norm": 0.00401865690946579, "learning_rate": 0.001, "loss": 0.411, "step": 1293 }, { "epoch": 0.03570430817729369, "grad_norm": 0.0036637093871831894, "learning_rate": 0.001, "loss": 0.3678, "step": 1294 }, { "epoch": 0.03573190037835806, "grad_norm": 0.002707039937376976, "learning_rate": 0.001, "loss": 0.4078, "step": 1295 }, { "epoch": 0.035759492579422424, "grad_norm": 0.004088058602064848, "learning_rate": 0.001, "loss": 0.388, "step": 1296 }, { "epoch": 0.0357870847804868, "grad_norm": 0.0029987411107867956, "learning_rate": 0.001, "loss": 0.3935, "step": 1297 }, { "epoch": 0.03581467698155116, "grad_norm": 0.0037499302998185158, "learning_rate": 0.001, "loss": 0.4239, "step": 1298 }, { "epoch": 0.03584226918261554, "grad_norm": 0.00426569813862443, "learning_rate": 0.001, "loss": 0.3822, "step": 1299 }, { "epoch": 0.0358698613836799, "grad_norm": 0.0036391124594956636, "learning_rate": 0.001, "loss": 0.3956, "step": 1300 }, { "epoch": 0.03589745358474427, "grad_norm": 0.003542504971846938, "learning_rate": 0.001, "loss": 0.4064, "step": 1301 }, { "epoch": 0.03592504578580864, "grad_norm": 0.003770799608901143, "learning_rate": 0.001, "loss": 0.3766, "step": 1302 }, { "epoch": 0.03595263798687301, "grad_norm": 0.006019794847816229, "learning_rate": 0.001, "loss": 0.4044, "step": 1303 }, { "epoch": 0.03598023018793738, "grad_norm": 0.0027127231005579233, "learning_rate": 0.001, "loss": 0.4451, "step": 1304 }, { "epoch": 0.03600782238900175, "grad_norm": 0.004511113744229078, "learning_rate": 0.001, "loss": 0.3737, "step": 1305 }, { "epoch": 0.036035414590066116, "grad_norm": 0.0061105103231966496, "learning_rate": 0.001, "loss": 0.4041, "step": 1306 }, { "epoch": 0.03606300679113049, "grad_norm": 0.003959209658205509, "learning_rate": 0.001, "loss": 0.3976, "step": 1307 }, { "epoch": 0.036090598992194856, "grad_norm": 0.006086795590817928, "learning_rate": 0.001, "loss": 0.4124, "step": 1308 }, { "epoch": 0.03611819119325922, "grad_norm": 0.003370011458173394, "learning_rate": 0.001, "loss": 0.447, "step": 1309 }, { "epoch": 0.036145783394323595, "grad_norm": 0.004544582683593035, "learning_rate": 0.001, "loss": 0.3875, "step": 1310 }, { "epoch": 0.03617337559538796, "grad_norm": 0.004398183431476355, "learning_rate": 0.001, "loss": 0.3935, "step": 1311 }, { "epoch": 0.036200967796452335, "grad_norm": 0.0034012263640761375, "learning_rate": 0.001, "loss": 0.3787, "step": 1312 }, { "epoch": 0.0362285599975167, "grad_norm": 0.004038006532937288, "learning_rate": 0.001, "loss": 0.4019, "step": 1313 }, { "epoch": 0.03625615219858107, "grad_norm": 0.004931545816361904, "learning_rate": 0.001, "loss": 0.3872, "step": 1314 }, { "epoch": 0.03628374439964544, "grad_norm": 0.004279931075870991, "learning_rate": 0.001, "loss": 0.4528, "step": 1315 }, { "epoch": 0.03631133660070981, "grad_norm": 0.0031895472202450037, "learning_rate": 0.001, "loss": 0.4364, "step": 1316 }, { "epoch": 0.03633892880177418, "grad_norm": 0.005345645360648632, "learning_rate": 0.001, "loss": 0.4076, "step": 1317 }, { "epoch": 0.03636652100283855, "grad_norm": 0.004938568454235792, "learning_rate": 0.001, "loss": 0.3869, "step": 1318 }, { "epoch": 0.036394113203902914, "grad_norm": 0.004648009780794382, "learning_rate": 0.001, "loss": 0.4285, "step": 1319 }, { "epoch": 0.03642170540496729, "grad_norm": 0.006550830788910389, "learning_rate": 0.001, "loss": 0.4224, "step": 1320 }, { "epoch": 0.036449297606031654, "grad_norm": 0.002846076153218746, "learning_rate": 0.001, "loss": 0.4178, "step": 1321 }, { "epoch": 0.03647688980709603, "grad_norm": 0.004674985073506832, "learning_rate": 0.001, "loss": 0.4307, "step": 1322 }, { "epoch": 0.036504482008160394, "grad_norm": 0.003952328581362963, "learning_rate": 0.001, "loss": 0.4039, "step": 1323 }, { "epoch": 0.03653207420922476, "grad_norm": 0.0029479297809302807, "learning_rate": 0.001, "loss": 0.405, "step": 1324 }, { "epoch": 0.036559666410289134, "grad_norm": 0.0032731324899941683, "learning_rate": 0.001, "loss": 0.3883, "step": 1325 }, { "epoch": 0.0365872586113535, "grad_norm": 0.003791957162320614, "learning_rate": 0.001, "loss": 0.4212, "step": 1326 }, { "epoch": 0.03661485081241787, "grad_norm": 0.009052555076777935, "learning_rate": 0.001, "loss": 0.4102, "step": 1327 }, { "epoch": 0.03664244301348224, "grad_norm": 0.005174124613404274, "learning_rate": 0.001, "loss": 0.4333, "step": 1328 }, { "epoch": 0.036670035214546606, "grad_norm": 0.008013852871954441, "learning_rate": 0.001, "loss": 0.4314, "step": 1329 }, { "epoch": 0.03669762741561098, "grad_norm": 0.006423450540751219, "learning_rate": 0.001, "loss": 0.402, "step": 1330 }, { "epoch": 0.036725219616675346, "grad_norm": 0.006040682550519705, "learning_rate": 0.001, "loss": 0.4004, "step": 1331 }, { "epoch": 0.03675281181773971, "grad_norm": 0.0055701639503240585, "learning_rate": 0.001, "loss": 0.3934, "step": 1332 }, { "epoch": 0.036780404018804086, "grad_norm": 0.01270906999707222, "learning_rate": 0.001, "loss": 0.3981, "step": 1333 }, { "epoch": 0.03680799621986845, "grad_norm": 0.003626336809247732, "learning_rate": 0.001, "loss": 0.3984, "step": 1334 }, { "epoch": 0.036835588420932826, "grad_norm": 0.004632920026779175, "learning_rate": 0.001, "loss": 0.3836, "step": 1335 }, { "epoch": 0.03686318062199719, "grad_norm": 0.0060633327811956406, "learning_rate": 0.001, "loss": 0.424, "step": 1336 }, { "epoch": 0.03689077282306156, "grad_norm": 0.004871148616075516, "learning_rate": 0.001, "loss": 0.4247, "step": 1337 }, { "epoch": 0.03691836502412593, "grad_norm": 0.0031172886956483126, "learning_rate": 0.001, "loss": 0.4316, "step": 1338 }, { "epoch": 0.0369459572251903, "grad_norm": 0.003916094545274973, "learning_rate": 0.001, "loss": 0.3908, "step": 1339 }, { "epoch": 0.03697354942625467, "grad_norm": 0.0040051937103271484, "learning_rate": 0.001, "loss": 0.3949, "step": 1340 }, { "epoch": 0.03700114162731904, "grad_norm": 0.006250888109207153, "learning_rate": 0.001, "loss": 0.402, "step": 1341 }, { "epoch": 0.037028733828383405, "grad_norm": 0.002888569375500083, "learning_rate": 0.001, "loss": 0.405, "step": 1342 }, { "epoch": 0.03705632602944778, "grad_norm": 0.004510107915848494, "learning_rate": 0.001, "loss": 0.368, "step": 1343 }, { "epoch": 0.037083918230512145, "grad_norm": 0.007241697516292334, "learning_rate": 0.001, "loss": 0.3993, "step": 1344 }, { "epoch": 0.03711151043157652, "grad_norm": 0.0032798638567328453, "learning_rate": 0.001, "loss": 0.3932, "step": 1345 }, { "epoch": 0.037139102632640884, "grad_norm": 0.003440143307670951, "learning_rate": 0.001, "loss": 0.4474, "step": 1346 }, { "epoch": 0.03716669483370525, "grad_norm": 0.0044672004878520966, "learning_rate": 0.001, "loss": 0.4135, "step": 1347 }, { "epoch": 0.037194287034769624, "grad_norm": 0.0031135319732129574, "learning_rate": 0.001, "loss": 0.389, "step": 1348 }, { "epoch": 0.03722187923583399, "grad_norm": 0.004170152824372053, "learning_rate": 0.001, "loss": 0.4193, "step": 1349 }, { "epoch": 0.037249471436898364, "grad_norm": 0.0036481074057519436, "learning_rate": 0.001, "loss": 0.4454, "step": 1350 }, { "epoch": 0.03727706363796273, "grad_norm": 0.003243829123675823, "learning_rate": 0.001, "loss": 0.4246, "step": 1351 }, { "epoch": 0.0373046558390271, "grad_norm": 0.0034886065404862165, "learning_rate": 0.001, "loss": 0.4065, "step": 1352 }, { "epoch": 0.03733224804009147, "grad_norm": 0.004647396504878998, "learning_rate": 0.001, "loss": 0.4131, "step": 1353 }, { "epoch": 0.03735984024115584, "grad_norm": 0.004046002868562937, "learning_rate": 0.001, "loss": 0.3903, "step": 1354 }, { "epoch": 0.0373874324422202, "grad_norm": 0.004573929589241743, "learning_rate": 0.001, "loss": 0.3851, "step": 1355 }, { "epoch": 0.037415024643284576, "grad_norm": 0.006424955558031797, "learning_rate": 0.001, "loss": 0.3623, "step": 1356 }, { "epoch": 0.03744261684434894, "grad_norm": 0.0033393288031220436, "learning_rate": 0.001, "loss": 0.4311, "step": 1357 }, { "epoch": 0.037470209045413316, "grad_norm": 0.0031134155578911304, "learning_rate": 0.001, "loss": 0.3876, "step": 1358 }, { "epoch": 0.03749780124647768, "grad_norm": 0.00366019899956882, "learning_rate": 0.001, "loss": 0.4282, "step": 1359 }, { "epoch": 0.03752539344754205, "grad_norm": 0.003400568151846528, "learning_rate": 0.001, "loss": 0.4119, "step": 1360 }, { "epoch": 0.03755298564860642, "grad_norm": 0.002846767893061042, "learning_rate": 0.001, "loss": 0.4286, "step": 1361 }, { "epoch": 0.03758057784967079, "grad_norm": 0.0031303889118134975, "learning_rate": 0.001, "loss": 0.3936, "step": 1362 }, { "epoch": 0.03760817005073516, "grad_norm": 0.0043816519901156425, "learning_rate": 0.001, "loss": 0.4158, "step": 1363 }, { "epoch": 0.03763576225179953, "grad_norm": 0.004520198330283165, "learning_rate": 0.001, "loss": 0.349, "step": 1364 }, { "epoch": 0.037663354452863895, "grad_norm": 0.005284131038933992, "learning_rate": 0.001, "loss": 0.3745, "step": 1365 }, { "epoch": 0.03769094665392827, "grad_norm": 0.0037800793070346117, "learning_rate": 0.001, "loss": 0.3821, "step": 1366 }, { "epoch": 0.037718538854992635, "grad_norm": 0.004985132720321417, "learning_rate": 0.001, "loss": 0.4065, "step": 1367 }, { "epoch": 0.03774613105605701, "grad_norm": 0.0036822801921516657, "learning_rate": 0.001, "loss": 0.3809, "step": 1368 }, { "epoch": 0.037773723257121375, "grad_norm": 0.0032694186083972454, "learning_rate": 0.001, "loss": 0.4116, "step": 1369 }, { "epoch": 0.03780131545818574, "grad_norm": 0.003384978510439396, "learning_rate": 0.001, "loss": 0.3715, "step": 1370 }, { "epoch": 0.037828907659250115, "grad_norm": 0.0035624897573143244, "learning_rate": 0.001, "loss": 0.3843, "step": 1371 }, { "epoch": 0.03785649986031448, "grad_norm": 0.004096219781786203, "learning_rate": 0.001, "loss": 0.3674, "step": 1372 }, { "epoch": 0.037884092061378855, "grad_norm": 0.004491012543439865, "learning_rate": 0.001, "loss": 0.4039, "step": 1373 }, { "epoch": 0.03791168426244322, "grad_norm": 0.0034480481408536434, "learning_rate": 0.001, "loss": 0.422, "step": 1374 }, { "epoch": 0.03793927646350759, "grad_norm": 0.006217781454324722, "learning_rate": 0.001, "loss": 0.3801, "step": 1375 }, { "epoch": 0.03796686866457196, "grad_norm": 0.004664869979023933, "learning_rate": 0.001, "loss": 0.423, "step": 1376 }, { "epoch": 0.03799446086563633, "grad_norm": 0.008887716569006443, "learning_rate": 0.001, "loss": 0.425, "step": 1377 }, { "epoch": 0.038022053066700694, "grad_norm": 0.003177997190505266, "learning_rate": 0.001, "loss": 0.4169, "step": 1378 }, { "epoch": 0.03804964526776507, "grad_norm": 0.0035175783559679985, "learning_rate": 0.001, "loss": 0.4148, "step": 1379 }, { "epoch": 0.038077237468829433, "grad_norm": 0.0047409930266439915, "learning_rate": 0.001, "loss": 0.4128, "step": 1380 }, { "epoch": 0.03810482966989381, "grad_norm": 0.007139190100133419, "learning_rate": 0.001, "loss": 0.3857, "step": 1381 }, { "epoch": 0.03813242187095817, "grad_norm": 0.0083334194496274, "learning_rate": 0.001, "loss": 0.3634, "step": 1382 }, { "epoch": 0.03816001407202254, "grad_norm": 0.0037119935732334852, "learning_rate": 0.001, "loss": 0.4357, "step": 1383 }, { "epoch": 0.03818760627308691, "grad_norm": 0.00669982610270381, "learning_rate": 0.001, "loss": 0.386, "step": 1384 }, { "epoch": 0.03821519847415128, "grad_norm": 0.00357433152385056, "learning_rate": 0.001, "loss": 0.4024, "step": 1385 }, { "epoch": 0.03824279067521565, "grad_norm": 0.004873959813266993, "learning_rate": 0.001, "loss": 0.4139, "step": 1386 }, { "epoch": 0.03827038287628002, "grad_norm": 0.011126353405416012, "learning_rate": 0.001, "loss": 0.3646, "step": 1387 }, { "epoch": 0.038297975077344386, "grad_norm": 0.0038117689546197653, "learning_rate": 0.001, "loss": 0.4382, "step": 1388 }, { "epoch": 0.03832556727840876, "grad_norm": 0.011077326722443104, "learning_rate": 0.001, "loss": 0.3894, "step": 1389 }, { "epoch": 0.038353159479473126, "grad_norm": 0.005909190978854895, "learning_rate": 0.001, "loss": 0.3893, "step": 1390 }, { "epoch": 0.0383807516805375, "grad_norm": 0.0056834593415260315, "learning_rate": 0.001, "loss": 0.4396, "step": 1391 }, { "epoch": 0.038408343881601865, "grad_norm": 0.0038277830462902784, "learning_rate": 0.001, "loss": 0.3663, "step": 1392 }, { "epoch": 0.03843593608266623, "grad_norm": 0.005587390623986721, "learning_rate": 0.001, "loss": 0.4298, "step": 1393 }, { "epoch": 0.038463528283730605, "grad_norm": 0.004197390284389257, "learning_rate": 0.001, "loss": 0.4228, "step": 1394 }, { "epoch": 0.03849112048479497, "grad_norm": 0.0047178626991808414, "learning_rate": 0.001, "loss": 0.4275, "step": 1395 }, { "epoch": 0.038518712685859345, "grad_norm": 0.0031104707159101963, "learning_rate": 0.001, "loss": 0.4309, "step": 1396 }, { "epoch": 0.03854630488692371, "grad_norm": 0.005666974000632763, "learning_rate": 0.001, "loss": 0.378, "step": 1397 }, { "epoch": 0.03857389708798808, "grad_norm": 0.006076369900256395, "learning_rate": 0.001, "loss": 0.388, "step": 1398 }, { "epoch": 0.03860148928905245, "grad_norm": 0.004276493098586798, "learning_rate": 0.001, "loss": 0.3863, "step": 1399 }, { "epoch": 0.03862908149011682, "grad_norm": 0.00408798037096858, "learning_rate": 0.001, "loss": 0.3718, "step": 1400 }, { "epoch": 0.038656673691181184, "grad_norm": 0.003366732969880104, "learning_rate": 0.001, "loss": 0.3514, "step": 1401 }, { "epoch": 0.03868426589224556, "grad_norm": 0.003257429925724864, "learning_rate": 0.001, "loss": 0.414, "step": 1402 }, { "epoch": 0.038711858093309924, "grad_norm": 0.0036397224757820368, "learning_rate": 0.001, "loss": 0.4286, "step": 1403 }, { "epoch": 0.0387394502943743, "grad_norm": 0.00421003857627511, "learning_rate": 0.001, "loss": 0.3951, "step": 1404 }, { "epoch": 0.038767042495438664, "grad_norm": 0.004263239912688732, "learning_rate": 0.001, "loss": 0.3903, "step": 1405 }, { "epoch": 0.03879463469650303, "grad_norm": 0.0025550604332238436, "learning_rate": 0.001, "loss": 0.403, "step": 1406 }, { "epoch": 0.038822226897567404, "grad_norm": 0.003278963966295123, "learning_rate": 0.001, "loss": 0.401, "step": 1407 }, { "epoch": 0.03884981909863177, "grad_norm": 0.002250393619760871, "learning_rate": 0.001, "loss": 0.4521, "step": 1408 }, { "epoch": 0.038877411299696144, "grad_norm": 0.002963767386972904, "learning_rate": 0.001, "loss": 0.4566, "step": 1409 }, { "epoch": 0.03890500350076051, "grad_norm": 0.006573919206857681, "learning_rate": 0.001, "loss": 0.3568, "step": 1410 }, { "epoch": 0.038932595701824876, "grad_norm": 0.005289596039801836, "learning_rate": 0.001, "loss": 0.3729, "step": 1411 }, { "epoch": 0.03896018790288925, "grad_norm": 0.0031945211812853813, "learning_rate": 0.001, "loss": 0.4238, "step": 1412 }, { "epoch": 0.038987780103953616, "grad_norm": 0.002856782404705882, "learning_rate": 0.001, "loss": 0.4219, "step": 1413 }, { "epoch": 0.03901537230501799, "grad_norm": 0.0037729961331933737, "learning_rate": 0.001, "loss": 0.4379, "step": 1414 }, { "epoch": 0.039042964506082356, "grad_norm": 0.00266630039550364, "learning_rate": 0.001, "loss": 0.4188, "step": 1415 }, { "epoch": 0.03907055670714672, "grad_norm": 0.0042518191039562225, "learning_rate": 0.001, "loss": 0.3827, "step": 1416 }, { "epoch": 0.039098148908211096, "grad_norm": 0.0030879939440637827, "learning_rate": 0.001, "loss": 0.3825, "step": 1417 }, { "epoch": 0.03912574110927546, "grad_norm": 0.0036084537860006094, "learning_rate": 0.001, "loss": 0.4009, "step": 1418 }, { "epoch": 0.039153333310339836, "grad_norm": 0.003634381340816617, "learning_rate": 0.001, "loss": 0.4245, "step": 1419 }, { "epoch": 0.0391809255114042, "grad_norm": 0.0030327397398650646, "learning_rate": 0.001, "loss": 0.4399, "step": 1420 }, { "epoch": 0.03920851771246857, "grad_norm": 0.008019665256142616, "learning_rate": 0.001, "loss": 0.4233, "step": 1421 }, { "epoch": 0.03923610991353294, "grad_norm": 0.004183803219348192, "learning_rate": 0.001, "loss": 0.4155, "step": 1422 }, { "epoch": 0.03926370211459731, "grad_norm": 0.005366318393498659, "learning_rate": 0.001, "loss": 0.4228, "step": 1423 }, { "epoch": 0.039291294315661675, "grad_norm": 0.003394525730982423, "learning_rate": 0.001, "loss": 0.4053, "step": 1424 }, { "epoch": 0.03931888651672605, "grad_norm": 0.003373740240931511, "learning_rate": 0.001, "loss": 0.4043, "step": 1425 }, { "epoch": 0.039346478717790415, "grad_norm": 0.002769649960100651, "learning_rate": 0.001, "loss": 0.4116, "step": 1426 }, { "epoch": 0.03937407091885479, "grad_norm": 0.005319521296769381, "learning_rate": 0.001, "loss": 0.3995, "step": 1427 }, { "epoch": 0.039401663119919154, "grad_norm": 0.0031118986662477255, "learning_rate": 0.001, "loss": 0.4008, "step": 1428 }, { "epoch": 0.03942925532098352, "grad_norm": 0.0032665557228028774, "learning_rate": 0.001, "loss": 0.4214, "step": 1429 }, { "epoch": 0.039456847522047894, "grad_norm": 0.0047190007753670216, "learning_rate": 0.001, "loss": 0.3242, "step": 1430 }, { "epoch": 0.03948443972311226, "grad_norm": 0.0038909264840185642, "learning_rate": 0.001, "loss": 0.4022, "step": 1431 }, { "epoch": 0.039512031924176634, "grad_norm": 0.004970925394445658, "learning_rate": 0.001, "loss": 0.4257, "step": 1432 }, { "epoch": 0.039539624125241, "grad_norm": 0.004649787209928036, "learning_rate": 0.001, "loss": 0.4018, "step": 1433 }, { "epoch": 0.03956721632630537, "grad_norm": 0.0030645502265542746, "learning_rate": 0.001, "loss": 0.4151, "step": 1434 }, { "epoch": 0.03959480852736974, "grad_norm": 0.005270305555313826, "learning_rate": 0.001, "loss": 0.4114, "step": 1435 }, { "epoch": 0.03962240072843411, "grad_norm": 0.004368067253381014, "learning_rate": 0.001, "loss": 0.3898, "step": 1436 }, { "epoch": 0.03964999292949848, "grad_norm": 0.0032691999804228544, "learning_rate": 0.001, "loss": 0.4158, "step": 1437 }, { "epoch": 0.03967758513056285, "grad_norm": 0.003509074915200472, "learning_rate": 0.001, "loss": 0.4044, "step": 1438 }, { "epoch": 0.03970517733162721, "grad_norm": 0.00402647303417325, "learning_rate": 0.001, "loss": 0.3958, "step": 1439 }, { "epoch": 0.039732769532691586, "grad_norm": 0.003934496082365513, "learning_rate": 0.001, "loss": 0.4351, "step": 1440 }, { "epoch": 0.03976036173375595, "grad_norm": 0.0035782591439783573, "learning_rate": 0.001, "loss": 0.4143, "step": 1441 }, { "epoch": 0.039787953934820326, "grad_norm": 0.0036837344523519278, "learning_rate": 0.001, "loss": 0.4007, "step": 1442 }, { "epoch": 0.03981554613588469, "grad_norm": 0.003719213418662548, "learning_rate": 0.001, "loss": 0.4019, "step": 1443 }, { "epoch": 0.03984313833694906, "grad_norm": 0.008284253068268299, "learning_rate": 0.001, "loss": 0.3995, "step": 1444 }, { "epoch": 0.03987073053801343, "grad_norm": 0.0037160192150622606, "learning_rate": 0.001, "loss": 0.4298, "step": 1445 }, { "epoch": 0.0398983227390778, "grad_norm": 0.003967109136283398, "learning_rate": 0.001, "loss": 0.4195, "step": 1446 }, { "epoch": 0.039925914940142165, "grad_norm": 0.003119664965197444, "learning_rate": 0.001, "loss": 0.3624, "step": 1447 }, { "epoch": 0.03995350714120654, "grad_norm": 0.0027750935405492783, "learning_rate": 0.001, "loss": 0.403, "step": 1448 }, { "epoch": 0.039981099342270905, "grad_norm": 0.008331545628607273, "learning_rate": 0.001, "loss": 0.4148, "step": 1449 }, { "epoch": 0.04000869154333528, "grad_norm": 0.004883471876382828, "learning_rate": 0.001, "loss": 0.3759, "step": 1450 }, { "epoch": 0.040036283744399645, "grad_norm": 0.0037747775204479694, "learning_rate": 0.001, "loss": 0.4103, "step": 1451 }, { "epoch": 0.04006387594546401, "grad_norm": 0.0035403715446591377, "learning_rate": 0.001, "loss": 0.3694, "step": 1452 }, { "epoch": 0.040091468146528385, "grad_norm": 0.012222831137478352, "learning_rate": 0.001, "loss": 0.3925, "step": 1453 }, { "epoch": 0.04011906034759275, "grad_norm": 0.029686246067285538, "learning_rate": 0.001, "loss": 0.3688, "step": 1454 }, { "epoch": 0.040146652548657125, "grad_norm": 0.007045884151011705, "learning_rate": 0.001, "loss": 0.3971, "step": 1455 }, { "epoch": 0.04017424474972149, "grad_norm": 0.0033973727840930223, "learning_rate": 0.001, "loss": 0.3764, "step": 1456 }, { "epoch": 0.04020183695078586, "grad_norm": 0.004133992828428745, "learning_rate": 0.001, "loss": 0.3862, "step": 1457 }, { "epoch": 0.04022942915185023, "grad_norm": 0.003264515893533826, "learning_rate": 0.001, "loss": 0.384, "step": 1458 }, { "epoch": 0.0402570213529146, "grad_norm": 0.00320844491943717, "learning_rate": 0.001, "loss": 0.4153, "step": 1459 }, { "epoch": 0.04028461355397897, "grad_norm": 0.0038754413835704327, "learning_rate": 0.001, "loss": 0.3661, "step": 1460 }, { "epoch": 0.04031220575504334, "grad_norm": 0.009661386720836163, "learning_rate": 0.001, "loss": 0.4309, "step": 1461 }, { "epoch": 0.040339797956107704, "grad_norm": 0.010238132439553738, "learning_rate": 0.001, "loss": 0.3922, "step": 1462 }, { "epoch": 0.04036739015717208, "grad_norm": 0.04491569846868515, "learning_rate": 0.001, "loss": 0.4166, "step": 1463 }, { "epoch": 0.04039498235823644, "grad_norm": 0.0031146227847784758, "learning_rate": 0.001, "loss": 0.4181, "step": 1464 }, { "epoch": 0.04042257455930082, "grad_norm": 0.0035386565141379833, "learning_rate": 0.001, "loss": 0.3915, "step": 1465 }, { "epoch": 0.04045016676036518, "grad_norm": 0.0033056430984288454, "learning_rate": 0.001, "loss": 0.3864, "step": 1466 }, { "epoch": 0.04047775896142955, "grad_norm": 0.0025265736039727926, "learning_rate": 0.001, "loss": 0.3693, "step": 1467 }, { "epoch": 0.04050535116249392, "grad_norm": 0.004877384752035141, "learning_rate": 0.001, "loss": 0.3686, "step": 1468 }, { "epoch": 0.04053294336355829, "grad_norm": 0.006324070505797863, "learning_rate": 0.001, "loss": 0.3858, "step": 1469 }, { "epoch": 0.04056053556462266, "grad_norm": 0.004497391637414694, "learning_rate": 0.001, "loss": 0.4021, "step": 1470 }, { "epoch": 0.04058812776568703, "grad_norm": 0.003843271406367421, "learning_rate": 0.001, "loss": 0.4008, "step": 1471 }, { "epoch": 0.040615719966751396, "grad_norm": 0.003053538501262665, "learning_rate": 0.001, "loss": 0.4392, "step": 1472 }, { "epoch": 0.04064331216781577, "grad_norm": 0.0038446690887212753, "learning_rate": 0.001, "loss": 0.3751, "step": 1473 }, { "epoch": 0.040670904368880136, "grad_norm": 0.003199394093826413, "learning_rate": 0.001, "loss": 0.3877, "step": 1474 }, { "epoch": 0.0406984965699445, "grad_norm": 0.003342741634696722, "learning_rate": 0.001, "loss": 0.4147, "step": 1475 }, { "epoch": 0.040726088771008875, "grad_norm": 0.004331924952566624, "learning_rate": 0.001, "loss": 0.3963, "step": 1476 }, { "epoch": 0.04075368097207324, "grad_norm": 0.00260713673196733, "learning_rate": 0.001, "loss": 0.4192, "step": 1477 }, { "epoch": 0.040781273173137615, "grad_norm": 0.003122882917523384, "learning_rate": 0.001, "loss": 0.3598, "step": 1478 }, { "epoch": 0.04080886537420198, "grad_norm": 0.0028676025103777647, "learning_rate": 0.001, "loss": 0.4095, "step": 1479 }, { "epoch": 0.04083645757526635, "grad_norm": 0.0038910373114049435, "learning_rate": 0.001, "loss": 0.3938, "step": 1480 }, { "epoch": 0.04086404977633072, "grad_norm": 0.003291686065495014, "learning_rate": 0.001, "loss": 0.4073, "step": 1481 }, { "epoch": 0.04089164197739509, "grad_norm": 0.006192247848957777, "learning_rate": 0.001, "loss": 0.4321, "step": 1482 }, { "epoch": 0.04091923417845946, "grad_norm": 0.002540907124057412, "learning_rate": 0.001, "loss": 0.4048, "step": 1483 }, { "epoch": 0.04094682637952383, "grad_norm": 0.003824718063697219, "learning_rate": 0.001, "loss": 0.3796, "step": 1484 }, { "epoch": 0.040974418580588194, "grad_norm": 0.0036246173549443483, "learning_rate": 0.001, "loss": 0.3983, "step": 1485 }, { "epoch": 0.04100201078165257, "grad_norm": 0.030295656993985176, "learning_rate": 0.001, "loss": 0.3849, "step": 1486 }, { "epoch": 0.041029602982716934, "grad_norm": 0.0050461613573133945, "learning_rate": 0.001, "loss": 0.412, "step": 1487 }, { "epoch": 0.04105719518378131, "grad_norm": 0.0023631087969988585, "learning_rate": 0.001, "loss": 0.4146, "step": 1488 }, { "epoch": 0.041084787384845674, "grad_norm": 0.002529110526666045, "learning_rate": 0.001, "loss": 0.4547, "step": 1489 }, { "epoch": 0.04111237958591004, "grad_norm": 0.00316584762185812, "learning_rate": 0.001, "loss": 0.3747, "step": 1490 }, { "epoch": 0.041139971786974414, "grad_norm": 0.002411734312772751, "learning_rate": 0.001, "loss": 0.4104, "step": 1491 }, { "epoch": 0.04116756398803878, "grad_norm": 0.0029997879173606634, "learning_rate": 0.001, "loss": 0.3994, "step": 1492 }, { "epoch": 0.04119515618910315, "grad_norm": 0.003948535770177841, "learning_rate": 0.001, "loss": 0.3833, "step": 1493 }, { "epoch": 0.04122274839016752, "grad_norm": 0.002781339455395937, "learning_rate": 0.001, "loss": 0.4324, "step": 1494 }, { "epoch": 0.041250340591231886, "grad_norm": 0.015317432582378387, "learning_rate": 0.001, "loss": 0.4044, "step": 1495 }, { "epoch": 0.04127793279229626, "grad_norm": 0.0075756567530334, "learning_rate": 0.001, "loss": 0.4164, "step": 1496 }, { "epoch": 0.041305524993360626, "grad_norm": 0.002881971187889576, "learning_rate": 0.001, "loss": 0.4122, "step": 1497 }, { "epoch": 0.04133311719442499, "grad_norm": 0.004069055896252394, "learning_rate": 0.001, "loss": 0.4491, "step": 1498 }, { "epoch": 0.041360709395489366, "grad_norm": 0.002320400904864073, "learning_rate": 0.001, "loss": 0.4208, "step": 1499 }, { "epoch": 0.04138830159655373, "grad_norm": 0.003089721780270338, "learning_rate": 0.001, "loss": 0.4066, "step": 1500 }, { "epoch": 0.04138830159655373, "eval_runtime": 23.6686, "eval_samples_per_second": 1.352, "eval_steps_per_second": 0.169, "step": 1500 }, { "epoch": 0.041415893797618106, "grad_norm": 0.004915047902613878, "learning_rate": 0.001, "loss": 0.4313, "step": 1501 }, { "epoch": 0.04144348599868247, "grad_norm": 0.00290488894097507, "learning_rate": 0.001, "loss": 0.3915, "step": 1502 }, { "epoch": 0.04147107819974684, "grad_norm": 0.0034425961785018444, "learning_rate": 0.001, "loss": 0.3732, "step": 1503 }, { "epoch": 0.04149867040081121, "grad_norm": 0.003592686727643013, "learning_rate": 0.001, "loss": 0.3947, "step": 1504 }, { "epoch": 0.04152626260187558, "grad_norm": 0.005649790167808533, "learning_rate": 0.001, "loss": 0.3954, "step": 1505 }, { "epoch": 0.04155385480293995, "grad_norm": 0.002451283158734441, "learning_rate": 0.001, "loss": 0.4477, "step": 1506 }, { "epoch": 0.04158144700400432, "grad_norm": 0.0028861695900559425, "learning_rate": 0.001, "loss": 0.3852, "step": 1507 }, { "epoch": 0.041609039205068685, "grad_norm": 0.0033806059509515762, "learning_rate": 0.001, "loss": 0.398, "step": 1508 }, { "epoch": 0.04163663140613306, "grad_norm": 0.003824063576757908, "learning_rate": 0.001, "loss": 0.3906, "step": 1509 }, { "epoch": 0.041664223607197425, "grad_norm": 0.00630558468401432, "learning_rate": 0.001, "loss": 0.4021, "step": 1510 }, { "epoch": 0.0416918158082618, "grad_norm": 0.0037113004364073277, "learning_rate": 0.001, "loss": 0.3717, "step": 1511 }, { "epoch": 0.041719408009326164, "grad_norm": 0.0054063801653683186, "learning_rate": 0.001, "loss": 0.4019, "step": 1512 }, { "epoch": 0.04174700021039053, "grad_norm": 0.003154453821480274, "learning_rate": 0.001, "loss": 0.3825, "step": 1513 }, { "epoch": 0.041774592411454904, "grad_norm": 0.0029439502395689487, "learning_rate": 0.001, "loss": 0.3875, "step": 1514 }, { "epoch": 0.04180218461251927, "grad_norm": 0.003378200577571988, "learning_rate": 0.001, "loss": 0.4199, "step": 1515 }, { "epoch": 0.041829776813583644, "grad_norm": 0.003158586798235774, "learning_rate": 0.001, "loss": 0.4098, "step": 1516 }, { "epoch": 0.04185736901464801, "grad_norm": 0.004686887841671705, "learning_rate": 0.001, "loss": 0.39, "step": 1517 }, { "epoch": 0.04188496121571238, "grad_norm": 0.004565032664686441, "learning_rate": 0.001, "loss": 0.3755, "step": 1518 }, { "epoch": 0.04191255341677675, "grad_norm": 0.005517443176358938, "learning_rate": 0.001, "loss": 0.4254, "step": 1519 }, { "epoch": 0.04194014561784112, "grad_norm": 0.002761922078207135, "learning_rate": 0.001, "loss": 0.3857, "step": 1520 }, { "epoch": 0.04196773781890548, "grad_norm": 0.0039441585540771484, "learning_rate": 0.001, "loss": 0.386, "step": 1521 }, { "epoch": 0.041995330019969856, "grad_norm": 0.00710391066968441, "learning_rate": 0.001, "loss": 0.4264, "step": 1522 }, { "epoch": 0.04202292222103422, "grad_norm": 0.025746062397956848, "learning_rate": 0.001, "loss": 0.4271, "step": 1523 }, { "epoch": 0.042050514422098596, "grad_norm": 0.004072318784892559, "learning_rate": 0.001, "loss": 0.3753, "step": 1524 }, { "epoch": 0.04207810662316296, "grad_norm": 0.0024748845025897026, "learning_rate": 0.001, "loss": 0.4254, "step": 1525 }, { "epoch": 0.04210569882422733, "grad_norm": 0.007111032959073782, "learning_rate": 0.001, "loss": 0.3759, "step": 1526 }, { "epoch": 0.0421332910252917, "grad_norm": 0.005953185725957155, "learning_rate": 0.001, "loss": 0.3995, "step": 1527 }, { "epoch": 0.04216088322635607, "grad_norm": 0.004936009179800749, "learning_rate": 0.001, "loss": 0.3839, "step": 1528 }, { "epoch": 0.04218847542742044, "grad_norm": 0.004421617835760117, "learning_rate": 0.001, "loss": 0.3926, "step": 1529 }, { "epoch": 0.04221606762848481, "grad_norm": 0.007696077227592468, "learning_rate": 0.001, "loss": 0.4035, "step": 1530 }, { "epoch": 0.042243659829549175, "grad_norm": 0.0060005756095051765, "learning_rate": 0.001, "loss": 0.4186, "step": 1531 }, { "epoch": 0.04227125203061355, "grad_norm": 0.006462580990046263, "learning_rate": 0.001, "loss": 0.4137, "step": 1532 }, { "epoch": 0.042298844231677915, "grad_norm": 0.023720385506749153, "learning_rate": 0.001, "loss": 0.3589, "step": 1533 }, { "epoch": 0.04232643643274229, "grad_norm": 0.003752040909603238, "learning_rate": 0.001, "loss": 0.4127, "step": 1534 }, { "epoch": 0.042354028633806655, "grad_norm": 0.005650446284562349, "learning_rate": 0.001, "loss": 0.4025, "step": 1535 }, { "epoch": 0.04238162083487102, "grad_norm": 0.003995851147919893, "learning_rate": 0.001, "loss": 0.4094, "step": 1536 }, { "epoch": 0.042409213035935395, "grad_norm": 0.003202822059392929, "learning_rate": 0.001, "loss": 0.3891, "step": 1537 }, { "epoch": 0.04243680523699976, "grad_norm": 0.004148907493799925, "learning_rate": 0.001, "loss": 0.4121, "step": 1538 }, { "epoch": 0.042464397438064135, "grad_norm": 0.004003866575658321, "learning_rate": 0.001, "loss": 0.4071, "step": 1539 }, { "epoch": 0.0424919896391285, "grad_norm": 0.003789936425164342, "learning_rate": 0.001, "loss": 0.4593, "step": 1540 }, { "epoch": 0.04251958184019287, "grad_norm": 0.004240360110998154, "learning_rate": 0.001, "loss": 0.353, "step": 1541 }, { "epoch": 0.04254717404125724, "grad_norm": 0.002904722234234214, "learning_rate": 0.001, "loss": 0.4451, "step": 1542 }, { "epoch": 0.04257476624232161, "grad_norm": 0.004250886384397745, "learning_rate": 0.001, "loss": 0.4395, "step": 1543 }, { "epoch": 0.042602358443385974, "grad_norm": 0.0044527859427034855, "learning_rate": 0.001, "loss": 0.4137, "step": 1544 }, { "epoch": 0.04262995064445035, "grad_norm": 0.006279831752181053, "learning_rate": 0.001, "loss": 0.354, "step": 1545 }, { "epoch": 0.042657542845514713, "grad_norm": 0.004428897984325886, "learning_rate": 0.001, "loss": 0.407, "step": 1546 }, { "epoch": 0.04268513504657909, "grad_norm": 0.00569180166348815, "learning_rate": 0.001, "loss": 0.4014, "step": 1547 }, { "epoch": 0.04271272724764345, "grad_norm": 0.011190955527126789, "learning_rate": 0.001, "loss": 0.3869, "step": 1548 }, { "epoch": 0.04274031944870782, "grad_norm": 0.015735691413283348, "learning_rate": 0.001, "loss": 0.3924, "step": 1549 }, { "epoch": 0.04276791164977219, "grad_norm": 0.0033663571812212467, "learning_rate": 0.001, "loss": 0.4163, "step": 1550 }, { "epoch": 0.04279550385083656, "grad_norm": 0.005885041318833828, "learning_rate": 0.001, "loss": 0.3891, "step": 1551 }, { "epoch": 0.04282309605190093, "grad_norm": 0.022580578923225403, "learning_rate": 0.001, "loss": 0.3921, "step": 1552 }, { "epoch": 0.0428506882529653, "grad_norm": 0.004381990525871515, "learning_rate": 0.001, "loss": 0.4167, "step": 1553 }, { "epoch": 0.042878280454029666, "grad_norm": 0.0038387307431548834, "learning_rate": 0.001, "loss": 0.38, "step": 1554 }, { "epoch": 0.04290587265509404, "grad_norm": 0.027915263548493385, "learning_rate": 0.001, "loss": 0.4126, "step": 1555 }, { "epoch": 0.042933464856158406, "grad_norm": 0.006606489885598421, "learning_rate": 0.001, "loss": 0.3775, "step": 1556 }, { "epoch": 0.04296105705722278, "grad_norm": 0.013772227801382542, "learning_rate": 0.001, "loss": 0.3991, "step": 1557 }, { "epoch": 0.042988649258287145, "grad_norm": 0.01971166953444481, "learning_rate": 0.001, "loss": 0.3872, "step": 1558 }, { "epoch": 0.04301624145935151, "grad_norm": 0.0028942166827619076, "learning_rate": 0.001, "loss": 0.3951, "step": 1559 }, { "epoch": 0.043043833660415885, "grad_norm": 0.004030006006360054, "learning_rate": 0.001, "loss": 0.3701, "step": 1560 }, { "epoch": 0.04307142586148025, "grad_norm": 0.0030979826115071774, "learning_rate": 0.001, "loss": 0.4082, "step": 1561 }, { "epoch": 0.043099018062544625, "grad_norm": 0.012965509667992592, "learning_rate": 0.001, "loss": 0.3686, "step": 1562 }, { "epoch": 0.04312661026360899, "grad_norm": 0.0034757067915052176, "learning_rate": 0.001, "loss": 0.4043, "step": 1563 }, { "epoch": 0.04315420246467336, "grad_norm": 0.003752148011699319, "learning_rate": 0.001, "loss": 0.4315, "step": 1564 }, { "epoch": 0.04318179466573773, "grad_norm": 0.002841662149876356, "learning_rate": 0.001, "loss": 0.3934, "step": 1565 }, { "epoch": 0.0432093868668021, "grad_norm": 0.0031714646611362696, "learning_rate": 0.001, "loss": 0.3961, "step": 1566 }, { "epoch": 0.043236979067866464, "grad_norm": 0.004814228042960167, "learning_rate": 0.001, "loss": 0.3909, "step": 1567 }, { "epoch": 0.04326457126893084, "grad_norm": 0.004090086091309786, "learning_rate": 0.001, "loss": 0.4061, "step": 1568 }, { "epoch": 0.043292163469995204, "grad_norm": 0.0035240172874182463, "learning_rate": 0.001, "loss": 0.4002, "step": 1569 }, { "epoch": 0.04331975567105958, "grad_norm": 0.0025755963288247585, "learning_rate": 0.001, "loss": 0.4287, "step": 1570 }, { "epoch": 0.043347347872123944, "grad_norm": 0.010493564419448376, "learning_rate": 0.001, "loss": 0.4117, "step": 1571 }, { "epoch": 0.04337494007318831, "grad_norm": 0.003166765673086047, "learning_rate": 0.001, "loss": 0.4402, "step": 1572 }, { "epoch": 0.043402532274252684, "grad_norm": 0.004130365792661905, "learning_rate": 0.001, "loss": 0.395, "step": 1573 }, { "epoch": 0.04343012447531705, "grad_norm": 0.003527469700202346, "learning_rate": 0.001, "loss": 0.4327, "step": 1574 }, { "epoch": 0.043457716676381423, "grad_norm": 0.0034512521233409643, "learning_rate": 0.001, "loss": 0.3567, "step": 1575 }, { "epoch": 0.04348530887744579, "grad_norm": 0.002996640047058463, "learning_rate": 0.001, "loss": 0.3649, "step": 1576 }, { "epoch": 0.043512901078510156, "grad_norm": 0.0035705615300685167, "learning_rate": 0.001, "loss": 0.3878, "step": 1577 }, { "epoch": 0.04354049327957453, "grad_norm": 0.004510779399424791, "learning_rate": 0.001, "loss": 0.3945, "step": 1578 }, { "epoch": 0.043568085480638896, "grad_norm": 0.005176417529582977, "learning_rate": 0.001, "loss": 0.3704, "step": 1579 }, { "epoch": 0.04359567768170327, "grad_norm": 0.0046493723057210445, "learning_rate": 0.001, "loss": 0.4271, "step": 1580 }, { "epoch": 0.043623269882767636, "grad_norm": 0.004336629528552294, "learning_rate": 0.001, "loss": 0.3974, "step": 1581 }, { "epoch": 0.043650862083832, "grad_norm": 0.0031178700737655163, "learning_rate": 0.001, "loss": 0.4275, "step": 1582 }, { "epoch": 0.043678454284896376, "grad_norm": 0.006402260158210993, "learning_rate": 0.001, "loss": 0.4198, "step": 1583 }, { "epoch": 0.04370604648596074, "grad_norm": 0.004152487497776747, "learning_rate": 0.001, "loss": 0.3969, "step": 1584 }, { "epoch": 0.043733638687025116, "grad_norm": 0.00424406910315156, "learning_rate": 0.001, "loss": 0.3865, "step": 1585 }, { "epoch": 0.04376123088808948, "grad_norm": 0.005350259132683277, "learning_rate": 0.001, "loss": 0.3781, "step": 1586 }, { "epoch": 0.04378882308915385, "grad_norm": 0.0027786684222519398, "learning_rate": 0.001, "loss": 0.4478, "step": 1587 }, { "epoch": 0.04381641529021822, "grad_norm": 0.004228509031236172, "learning_rate": 0.001, "loss": 0.4055, "step": 1588 }, { "epoch": 0.04384400749128259, "grad_norm": 0.0037349634803831577, "learning_rate": 0.001, "loss": 0.3892, "step": 1589 }, { "epoch": 0.043871599692346955, "grad_norm": 0.0034225585404783487, "learning_rate": 0.001, "loss": 0.4, "step": 1590 }, { "epoch": 0.04389919189341133, "grad_norm": 0.00584405055269599, "learning_rate": 0.001, "loss": 0.386, "step": 1591 }, { "epoch": 0.043926784094475695, "grad_norm": 0.004452804569154978, "learning_rate": 0.001, "loss": 0.4242, "step": 1592 }, { "epoch": 0.04395437629554007, "grad_norm": 0.0026068915612995625, "learning_rate": 0.001, "loss": 0.3954, "step": 1593 }, { "epoch": 0.043981968496604434, "grad_norm": 0.003229719353839755, "learning_rate": 0.001, "loss": 0.4059, "step": 1594 }, { "epoch": 0.0440095606976688, "grad_norm": 0.005484900437295437, "learning_rate": 0.001, "loss": 0.3981, "step": 1595 }, { "epoch": 0.044037152898733174, "grad_norm": 0.007316559553146362, "learning_rate": 0.001, "loss": 0.3879, "step": 1596 }, { "epoch": 0.04406474509979754, "grad_norm": 0.009250715374946594, "learning_rate": 0.001, "loss": 0.3797, "step": 1597 }, { "epoch": 0.044092337300861914, "grad_norm": 0.004528039135038853, "learning_rate": 0.001, "loss": 0.371, "step": 1598 }, { "epoch": 0.04411992950192628, "grad_norm": 0.005715006496757269, "learning_rate": 0.001, "loss": 0.4012, "step": 1599 }, { "epoch": 0.04414752170299065, "grad_norm": 0.0036250154953449965, "learning_rate": 0.001, "loss": 0.4285, "step": 1600 }, { "epoch": 0.04417511390405502, "grad_norm": 0.007519236300140619, "learning_rate": 0.001, "loss": 0.4164, "step": 1601 }, { "epoch": 0.04420270610511939, "grad_norm": 0.005943160969763994, "learning_rate": 0.001, "loss": 0.3953, "step": 1602 }, { "epoch": 0.04423029830618376, "grad_norm": 0.00410908367484808, "learning_rate": 0.001, "loss": 0.3608, "step": 1603 }, { "epoch": 0.04425789050724813, "grad_norm": 0.004322184715420008, "learning_rate": 0.001, "loss": 0.4226, "step": 1604 }, { "epoch": 0.04428548270831249, "grad_norm": 0.0035136695951223373, "learning_rate": 0.001, "loss": 0.38, "step": 1605 }, { "epoch": 0.044313074909376866, "grad_norm": 0.020684808492660522, "learning_rate": 0.001, "loss": 0.3983, "step": 1606 }, { "epoch": 0.04434066711044123, "grad_norm": 0.00836837850511074, "learning_rate": 0.001, "loss": 0.4281, "step": 1607 }, { "epoch": 0.044368259311505606, "grad_norm": 0.004143499303609133, "learning_rate": 0.001, "loss": 0.4234, "step": 1608 }, { "epoch": 0.04439585151256997, "grad_norm": 0.005604143720120192, "learning_rate": 0.001, "loss": 0.3957, "step": 1609 }, { "epoch": 0.04442344371363434, "grad_norm": 0.0037680943496525288, "learning_rate": 0.001, "loss": 0.4055, "step": 1610 }, { "epoch": 0.04445103591469871, "grad_norm": 0.004539195913821459, "learning_rate": 0.001, "loss": 0.4032, "step": 1611 }, { "epoch": 0.04447862811576308, "grad_norm": 0.003528768662363291, "learning_rate": 0.001, "loss": 0.3904, "step": 1612 }, { "epoch": 0.044506220316827445, "grad_norm": 0.002794221742078662, "learning_rate": 0.001, "loss": 0.4185, "step": 1613 }, { "epoch": 0.04453381251789182, "grad_norm": 0.003042758908122778, "learning_rate": 0.001, "loss": 0.4114, "step": 1614 }, { "epoch": 0.044561404718956185, "grad_norm": 0.0024623468052595854, "learning_rate": 0.001, "loss": 0.4099, "step": 1615 }, { "epoch": 0.04458899692002056, "grad_norm": 0.0035985438153147697, "learning_rate": 0.001, "loss": 0.4159, "step": 1616 }, { "epoch": 0.044616589121084925, "grad_norm": 0.007022172212600708, "learning_rate": 0.001, "loss": 0.3593, "step": 1617 }, { "epoch": 0.04464418132214929, "grad_norm": 0.003542872378602624, "learning_rate": 0.001, "loss": 0.4131, "step": 1618 }, { "epoch": 0.044671773523213665, "grad_norm": 0.0038910373114049435, "learning_rate": 0.001, "loss": 0.423, "step": 1619 }, { "epoch": 0.04469936572427803, "grad_norm": 0.0027147457003593445, "learning_rate": 0.001, "loss": 0.4051, "step": 1620 }, { "epoch": 0.044726957925342405, "grad_norm": 0.002940715989097953, "learning_rate": 0.001, "loss": 0.4256, "step": 1621 }, { "epoch": 0.04475455012640677, "grad_norm": 0.0030967697966843843, "learning_rate": 0.001, "loss": 0.3798, "step": 1622 }, { "epoch": 0.04478214232747114, "grad_norm": 0.002238066168501973, "learning_rate": 0.001, "loss": 0.4067, "step": 1623 }, { "epoch": 0.04480973452853551, "grad_norm": 0.003412696998566389, "learning_rate": 0.001, "loss": 0.413, "step": 1624 }, { "epoch": 0.04483732672959988, "grad_norm": 0.0026292402762919664, "learning_rate": 0.001, "loss": 0.4015, "step": 1625 }, { "epoch": 0.04486491893066425, "grad_norm": 0.004190067294985056, "learning_rate": 0.001, "loss": 0.3692, "step": 1626 }, { "epoch": 0.04489251113172862, "grad_norm": 0.0036978810094296932, "learning_rate": 0.001, "loss": 0.3922, "step": 1627 }, { "epoch": 0.044920103332792984, "grad_norm": 0.004438107367604971, "learning_rate": 0.001, "loss": 0.3819, "step": 1628 }, { "epoch": 0.04494769553385736, "grad_norm": 0.004876590799540281, "learning_rate": 0.001, "loss": 0.3844, "step": 1629 }, { "epoch": 0.04497528773492172, "grad_norm": 0.004272471182048321, "learning_rate": 0.001, "loss": 0.3869, "step": 1630 }, { "epoch": 0.0450028799359861, "grad_norm": 0.006823899690061808, "learning_rate": 0.001, "loss": 0.4183, "step": 1631 }, { "epoch": 0.04503047213705046, "grad_norm": 0.008064229972660542, "learning_rate": 0.001, "loss": 0.417, "step": 1632 }, { "epoch": 0.04505806433811483, "grad_norm": 0.004609786439687014, "learning_rate": 0.001, "loss": 0.4324, "step": 1633 }, { "epoch": 0.0450856565391792, "grad_norm": 0.0027909104246646166, "learning_rate": 0.001, "loss": 0.4091, "step": 1634 }, { "epoch": 0.04511324874024357, "grad_norm": 0.0036747294943779707, "learning_rate": 0.001, "loss": 0.3533, "step": 1635 }, { "epoch": 0.045140840941307936, "grad_norm": 0.0037599606439471245, "learning_rate": 0.001, "loss": 0.4282, "step": 1636 }, { "epoch": 0.04516843314237231, "grad_norm": 0.0029045911505818367, "learning_rate": 0.001, "loss": 0.3837, "step": 1637 }, { "epoch": 0.045196025343436676, "grad_norm": 0.0038696962874382734, "learning_rate": 0.001, "loss": 0.3883, "step": 1638 }, { "epoch": 0.04522361754450105, "grad_norm": 0.004320462234318256, "learning_rate": 0.001, "loss": 0.3783, "step": 1639 }, { "epoch": 0.045251209745565416, "grad_norm": 0.002876073122024536, "learning_rate": 0.001, "loss": 0.406, "step": 1640 }, { "epoch": 0.04527880194662978, "grad_norm": 0.002509112237021327, "learning_rate": 0.001, "loss": 0.4383, "step": 1641 }, { "epoch": 0.045306394147694155, "grad_norm": 0.012423038482666016, "learning_rate": 0.001, "loss": 0.3616, "step": 1642 }, { "epoch": 0.04533398634875852, "grad_norm": 0.00442110188305378, "learning_rate": 0.001, "loss": 0.3814, "step": 1643 }, { "epoch": 0.045361578549822895, "grad_norm": 0.003638372290879488, "learning_rate": 0.001, "loss": 0.4611, "step": 1644 }, { "epoch": 0.04538917075088726, "grad_norm": 0.0037818588316440582, "learning_rate": 0.001, "loss": 0.3893, "step": 1645 }, { "epoch": 0.04541676295195163, "grad_norm": 0.0035038308706134558, "learning_rate": 0.001, "loss": 0.3853, "step": 1646 }, { "epoch": 0.045444355153016, "grad_norm": 0.003616967238485813, "learning_rate": 0.001, "loss": 0.3903, "step": 1647 }, { "epoch": 0.04547194735408037, "grad_norm": 0.002239943016320467, "learning_rate": 0.001, "loss": 0.3769, "step": 1648 }, { "epoch": 0.04549953955514474, "grad_norm": 0.0036853745114058256, "learning_rate": 0.001, "loss": 0.3835, "step": 1649 }, { "epoch": 0.04552713175620911, "grad_norm": 0.00262830825522542, "learning_rate": 0.001, "loss": 0.4076, "step": 1650 }, { "epoch": 0.045554723957273474, "grad_norm": 0.010400556027889252, "learning_rate": 0.001, "loss": 0.4051, "step": 1651 }, { "epoch": 0.04558231615833785, "grad_norm": 0.0026481510140001774, "learning_rate": 0.001, "loss": 0.3712, "step": 1652 }, { "epoch": 0.045609908359402214, "grad_norm": 0.0029154308140277863, "learning_rate": 0.001, "loss": 0.3951, "step": 1653 }, { "epoch": 0.04563750056046659, "grad_norm": 0.0028925796505063772, "learning_rate": 0.001, "loss": 0.3907, "step": 1654 }, { "epoch": 0.045665092761530954, "grad_norm": 0.0028384907636791468, "learning_rate": 0.001, "loss": 0.411, "step": 1655 }, { "epoch": 0.04569268496259532, "grad_norm": 0.0039413925260305405, "learning_rate": 0.001, "loss": 0.3847, "step": 1656 }, { "epoch": 0.045720277163659694, "grad_norm": 0.004098753910511732, "learning_rate": 0.001, "loss": 0.4258, "step": 1657 }, { "epoch": 0.04574786936472406, "grad_norm": 0.0032568899914622307, "learning_rate": 0.001, "loss": 0.3791, "step": 1658 }, { "epoch": 0.045775461565788426, "grad_norm": 0.0038113740738481283, "learning_rate": 0.001, "loss": 0.4379, "step": 1659 }, { "epoch": 0.0458030537668528, "grad_norm": 0.004003043286502361, "learning_rate": 0.001, "loss": 0.3718, "step": 1660 }, { "epoch": 0.045830645967917166, "grad_norm": 0.003037869231775403, "learning_rate": 0.001, "loss": 0.3917, "step": 1661 }, { "epoch": 0.04585823816898154, "grad_norm": 0.004264459945261478, "learning_rate": 0.001, "loss": 0.4109, "step": 1662 }, { "epoch": 0.045885830370045906, "grad_norm": 0.0026220069266855717, "learning_rate": 0.001, "loss": 0.4086, "step": 1663 }, { "epoch": 0.04591342257111027, "grad_norm": 0.003170250216498971, "learning_rate": 0.001, "loss": 0.3909, "step": 1664 }, { "epoch": 0.045941014772174646, "grad_norm": 0.004167741164565086, "learning_rate": 0.001, "loss": 0.4141, "step": 1665 }, { "epoch": 0.04596860697323901, "grad_norm": 0.0035946646239608526, "learning_rate": 0.001, "loss": 0.4111, "step": 1666 }, { "epoch": 0.045996199174303386, "grad_norm": 0.003959175664931536, "learning_rate": 0.001, "loss": 0.407, "step": 1667 }, { "epoch": 0.04602379137536775, "grad_norm": 0.004025304224342108, "learning_rate": 0.001, "loss": 0.4136, "step": 1668 }, { "epoch": 0.04605138357643212, "grad_norm": 0.004997665528208017, "learning_rate": 0.001, "loss": 0.3931, "step": 1669 }, { "epoch": 0.04607897577749649, "grad_norm": 0.003882192773744464, "learning_rate": 0.001, "loss": 0.397, "step": 1670 }, { "epoch": 0.04610656797856086, "grad_norm": 0.006935080513358116, "learning_rate": 0.001, "loss": 0.412, "step": 1671 }, { "epoch": 0.04613416017962523, "grad_norm": 0.0038688175845891237, "learning_rate": 0.001, "loss": 0.3767, "step": 1672 }, { "epoch": 0.0461617523806896, "grad_norm": 0.0037809666246175766, "learning_rate": 0.001, "loss": 0.4086, "step": 1673 }, { "epoch": 0.046189344581753965, "grad_norm": 0.009138336405158043, "learning_rate": 0.001, "loss": 0.4027, "step": 1674 }, { "epoch": 0.04621693678281834, "grad_norm": 0.004577755928039551, "learning_rate": 0.001, "loss": 0.3929, "step": 1675 }, { "epoch": 0.046244528983882704, "grad_norm": 0.0038164344150573015, "learning_rate": 0.001, "loss": 0.441, "step": 1676 }, { "epoch": 0.04627212118494708, "grad_norm": 0.004704809281975031, "learning_rate": 0.001, "loss": 0.3689, "step": 1677 }, { "epoch": 0.046299713386011444, "grad_norm": 0.008598784916102886, "learning_rate": 0.001, "loss": 0.4019, "step": 1678 }, { "epoch": 0.04632730558707581, "grad_norm": 0.004788943100720644, "learning_rate": 0.001, "loss": 0.4133, "step": 1679 }, { "epoch": 0.046354897788140184, "grad_norm": 0.0038863064255565405, "learning_rate": 0.001, "loss": 0.3827, "step": 1680 }, { "epoch": 0.04638248998920455, "grad_norm": 0.0029795279260724783, "learning_rate": 0.001, "loss": 0.4245, "step": 1681 }, { "epoch": 0.046410082190268924, "grad_norm": 0.003151806304231286, "learning_rate": 0.001, "loss": 0.4215, "step": 1682 }, { "epoch": 0.04643767439133329, "grad_norm": 0.006868270225822926, "learning_rate": 0.001, "loss": 0.4285, "step": 1683 }, { "epoch": 0.04646526659239766, "grad_norm": 0.00662572355940938, "learning_rate": 0.001, "loss": 0.3975, "step": 1684 }, { "epoch": 0.04649285879346203, "grad_norm": 0.01428454089909792, "learning_rate": 0.001, "loss": 0.3854, "step": 1685 }, { "epoch": 0.0465204509945264, "grad_norm": 0.006754969246685505, "learning_rate": 0.001, "loss": 0.3699, "step": 1686 }, { "epoch": 0.04654804319559076, "grad_norm": 0.00340940966270864, "learning_rate": 0.001, "loss": 0.3674, "step": 1687 }, { "epoch": 0.046575635396655136, "grad_norm": 0.003416246036067605, "learning_rate": 0.001, "loss": 0.3975, "step": 1688 }, { "epoch": 0.0466032275977195, "grad_norm": 0.0039048672188073397, "learning_rate": 0.001, "loss": 0.3848, "step": 1689 }, { "epoch": 0.046630819798783876, "grad_norm": 0.003685768460854888, "learning_rate": 0.001, "loss": 0.4198, "step": 1690 }, { "epoch": 0.04665841199984824, "grad_norm": 0.006730150897055864, "learning_rate": 0.001, "loss": 0.3419, "step": 1691 }, { "epoch": 0.04668600420091261, "grad_norm": 0.004073324613273144, "learning_rate": 0.001, "loss": 0.4148, "step": 1692 }, { "epoch": 0.04671359640197698, "grad_norm": 0.0040067946538329124, "learning_rate": 0.001, "loss": 0.3957, "step": 1693 }, { "epoch": 0.04674118860304135, "grad_norm": 0.006488442420959473, "learning_rate": 0.001, "loss": 0.4193, "step": 1694 }, { "epoch": 0.04676878080410572, "grad_norm": 0.006833325605839491, "learning_rate": 0.001, "loss": 0.3841, "step": 1695 }, { "epoch": 0.04679637300517009, "grad_norm": 0.0035828256513923407, "learning_rate": 0.001, "loss": 0.3844, "step": 1696 }, { "epoch": 0.046823965206234455, "grad_norm": 0.0032494564075022936, "learning_rate": 0.001, "loss": 0.3772, "step": 1697 }, { "epoch": 0.04685155740729883, "grad_norm": 0.0033526027109473944, "learning_rate": 0.001, "loss": 0.4183, "step": 1698 }, { "epoch": 0.046879149608363195, "grad_norm": 0.004358900245279074, "learning_rate": 0.001, "loss": 0.4057, "step": 1699 }, { "epoch": 0.04690674180942757, "grad_norm": 0.0045670876279473305, "learning_rate": 0.001, "loss": 0.3853, "step": 1700 }, { "epoch": 0.046934334010491935, "grad_norm": 0.004190321080386639, "learning_rate": 0.001, "loss": 0.4115, "step": 1701 }, { "epoch": 0.0469619262115563, "grad_norm": 0.0030443707946687937, "learning_rate": 0.001, "loss": 0.4293, "step": 1702 }, { "epoch": 0.046989518412620675, "grad_norm": 0.003604897065088153, "learning_rate": 0.001, "loss": 0.4429, "step": 1703 }, { "epoch": 0.04701711061368504, "grad_norm": 0.003690918907523155, "learning_rate": 0.001, "loss": 0.3883, "step": 1704 }, { "epoch": 0.047044702814749415, "grad_norm": 0.0034342606086283922, "learning_rate": 0.001, "loss": 0.4184, "step": 1705 }, { "epoch": 0.04707229501581378, "grad_norm": 0.003786922199651599, "learning_rate": 0.001, "loss": 0.4286, "step": 1706 }, { "epoch": 0.04709988721687815, "grad_norm": 0.0021744174882769585, "learning_rate": 0.001, "loss": 0.3615, "step": 1707 }, { "epoch": 0.04712747941794252, "grad_norm": 0.004297124687582254, "learning_rate": 0.001, "loss": 0.3889, "step": 1708 }, { "epoch": 0.04715507161900689, "grad_norm": 0.003097895532846451, "learning_rate": 0.001, "loss": 0.4263, "step": 1709 }, { "epoch": 0.047182663820071254, "grad_norm": 0.0027625923976302147, "learning_rate": 0.001, "loss": 0.4183, "step": 1710 }, { "epoch": 0.04721025602113563, "grad_norm": 0.024495547637343407, "learning_rate": 0.001, "loss": 0.3727, "step": 1711 }, { "epoch": 0.04723784822219999, "grad_norm": 0.006063805893063545, "learning_rate": 0.001, "loss": 0.4558, "step": 1712 }, { "epoch": 0.04726544042326437, "grad_norm": 0.002210379345342517, "learning_rate": 0.001, "loss": 0.4529, "step": 1713 }, { "epoch": 0.04729303262432873, "grad_norm": 0.002300212625414133, "learning_rate": 0.001, "loss": 0.42, "step": 1714 }, { "epoch": 0.0473206248253931, "grad_norm": 0.002808920806273818, "learning_rate": 0.001, "loss": 0.4337, "step": 1715 }, { "epoch": 0.04734821702645747, "grad_norm": 0.0031099985353648663, "learning_rate": 0.001, "loss": 0.3939, "step": 1716 }, { "epoch": 0.04737580922752184, "grad_norm": 0.003029707819223404, "learning_rate": 0.001, "loss": 0.3879, "step": 1717 }, { "epoch": 0.04740340142858621, "grad_norm": 0.005905452184379101, "learning_rate": 0.001, "loss": 0.4024, "step": 1718 }, { "epoch": 0.04743099362965058, "grad_norm": 0.002499626949429512, "learning_rate": 0.001, "loss": 0.4131, "step": 1719 }, { "epoch": 0.047458585830714946, "grad_norm": 0.0029100440442562103, "learning_rate": 0.001, "loss": 0.3769, "step": 1720 }, { "epoch": 0.04748617803177932, "grad_norm": 0.0029877678025513887, "learning_rate": 0.001, "loss": 0.3793, "step": 1721 }, { "epoch": 0.047513770232843686, "grad_norm": 0.0023172239307314157, "learning_rate": 0.001, "loss": 0.4134, "step": 1722 }, { "epoch": 0.04754136243390806, "grad_norm": 0.0031008669175207615, "learning_rate": 0.001, "loss": 0.3727, "step": 1723 }, { "epoch": 0.047568954634972425, "grad_norm": 0.003588200779631734, "learning_rate": 0.001, "loss": 0.4225, "step": 1724 }, { "epoch": 0.04759654683603679, "grad_norm": 0.0027301576919853687, "learning_rate": 0.001, "loss": 0.4042, "step": 1725 }, { "epoch": 0.047624139037101165, "grad_norm": 0.003348779398947954, "learning_rate": 0.001, "loss": 0.3554, "step": 1726 }, { "epoch": 0.04765173123816553, "grad_norm": 0.003580378834158182, "learning_rate": 0.001, "loss": 0.3819, "step": 1727 }, { "epoch": 0.047679323439229905, "grad_norm": 0.004255024716258049, "learning_rate": 0.001, "loss": 0.4169, "step": 1728 }, { "epoch": 0.04770691564029427, "grad_norm": 0.0033867263700813055, "learning_rate": 0.001, "loss": 0.3654, "step": 1729 }, { "epoch": 0.04773450784135864, "grad_norm": 0.005044759716838598, "learning_rate": 0.001, "loss": 0.416, "step": 1730 }, { "epoch": 0.04776210004242301, "grad_norm": 0.0034105442464351654, "learning_rate": 0.001, "loss": 0.3911, "step": 1731 }, { "epoch": 0.04778969224348738, "grad_norm": 0.004883192479610443, "learning_rate": 0.001, "loss": 0.4679, "step": 1732 }, { "epoch": 0.047817284444551744, "grad_norm": 0.003439564723521471, "learning_rate": 0.001, "loss": 0.3782, "step": 1733 }, { "epoch": 0.04784487664561612, "grad_norm": 0.006362561602145433, "learning_rate": 0.001, "loss": 0.4092, "step": 1734 }, { "epoch": 0.047872468846680484, "grad_norm": 0.004292085766792297, "learning_rate": 0.001, "loss": 0.4047, "step": 1735 }, { "epoch": 0.04790006104774486, "grad_norm": 0.00374743458814919, "learning_rate": 0.001, "loss": 0.3785, "step": 1736 }, { "epoch": 0.047927653248809224, "grad_norm": 0.004549616016447544, "learning_rate": 0.001, "loss": 0.3976, "step": 1737 }, { "epoch": 0.04795524544987359, "grad_norm": 0.008672794327139854, "learning_rate": 0.001, "loss": 0.3664, "step": 1738 }, { "epoch": 0.047982837650937964, "grad_norm": 0.007166683673858643, "learning_rate": 0.001, "loss": 0.416, "step": 1739 }, { "epoch": 0.04801042985200233, "grad_norm": 0.0030777885112911463, "learning_rate": 0.001, "loss": 0.4281, "step": 1740 }, { "epoch": 0.048038022053066703, "grad_norm": 0.0049812328070402145, "learning_rate": 0.001, "loss": 0.424, "step": 1741 }, { "epoch": 0.04806561425413107, "grad_norm": 0.005072145257145166, "learning_rate": 0.001, "loss": 0.3912, "step": 1742 }, { "epoch": 0.048093206455195436, "grad_norm": 0.004582617431879044, "learning_rate": 0.001, "loss": 0.3764, "step": 1743 }, { "epoch": 0.04812079865625981, "grad_norm": 0.004273936152458191, "learning_rate": 0.001, "loss": 0.3881, "step": 1744 }, { "epoch": 0.048148390857324176, "grad_norm": 0.003426861949265003, "learning_rate": 0.001, "loss": 0.3663, "step": 1745 }, { "epoch": 0.04817598305838855, "grad_norm": 0.0036460154224187136, "learning_rate": 0.001, "loss": 0.3976, "step": 1746 }, { "epoch": 0.048203575259452916, "grad_norm": 0.003482312895357609, "learning_rate": 0.001, "loss": 0.4189, "step": 1747 }, { "epoch": 0.04823116746051728, "grad_norm": 0.0033532571978867054, "learning_rate": 0.001, "loss": 0.4093, "step": 1748 }, { "epoch": 0.048258759661581656, "grad_norm": 0.0038984876591712236, "learning_rate": 0.001, "loss": 0.4015, "step": 1749 }, { "epoch": 0.04828635186264602, "grad_norm": 0.005925590638071299, "learning_rate": 0.001, "loss": 0.3672, "step": 1750 }, { "epoch": 0.048313944063710396, "grad_norm": 0.005235752090811729, "learning_rate": 0.001, "loss": 0.3777, "step": 1751 }, { "epoch": 0.04834153626477476, "grad_norm": 0.00452176108956337, "learning_rate": 0.001, "loss": 0.4007, "step": 1752 }, { "epoch": 0.04836912846583913, "grad_norm": 0.004116971977055073, "learning_rate": 0.001, "loss": 0.3719, "step": 1753 }, { "epoch": 0.0483967206669035, "grad_norm": 0.0037535110022872686, "learning_rate": 0.001, "loss": 0.3643, "step": 1754 }, { "epoch": 0.04842431286796787, "grad_norm": 0.0049346694722771645, "learning_rate": 0.001, "loss": 0.3676, "step": 1755 }, { "epoch": 0.048451905069032235, "grad_norm": 0.006009018048644066, "learning_rate": 0.001, "loss": 0.3844, "step": 1756 }, { "epoch": 0.04847949727009661, "grad_norm": 0.003077111905440688, "learning_rate": 0.001, "loss": 0.3965, "step": 1757 }, { "epoch": 0.048507089471160975, "grad_norm": 0.004866272211074829, "learning_rate": 0.001, "loss": 0.3991, "step": 1758 }, { "epoch": 0.04853468167222535, "grad_norm": 0.004328357521444559, "learning_rate": 0.001, "loss": 0.4211, "step": 1759 }, { "epoch": 0.048562273873289714, "grad_norm": 0.003560771932825446, "learning_rate": 0.001, "loss": 0.4317, "step": 1760 }, { "epoch": 0.04858986607435408, "grad_norm": 0.00306741357780993, "learning_rate": 0.001, "loss": 0.4185, "step": 1761 }, { "epoch": 0.048617458275418454, "grad_norm": 0.010002429597079754, "learning_rate": 0.001, "loss": 0.3931, "step": 1762 }, { "epoch": 0.04864505047648282, "grad_norm": 0.006219691131263971, "learning_rate": 0.001, "loss": 0.3574, "step": 1763 }, { "epoch": 0.048672642677547194, "grad_norm": 0.0033367029391229153, "learning_rate": 0.001, "loss": 0.4001, "step": 1764 }, { "epoch": 0.04870023487861156, "grad_norm": 0.002981371246278286, "learning_rate": 0.001, "loss": 0.3876, "step": 1765 }, { "epoch": 0.04872782707967593, "grad_norm": 0.004406462889164686, "learning_rate": 0.001, "loss": 0.4005, "step": 1766 }, { "epoch": 0.0487554192807403, "grad_norm": 0.0028726779855787754, "learning_rate": 0.001, "loss": 0.3537, "step": 1767 }, { "epoch": 0.04878301148180467, "grad_norm": 0.0029508452862501144, "learning_rate": 0.001, "loss": 0.4005, "step": 1768 }, { "epoch": 0.04881060368286904, "grad_norm": 0.004091752227395773, "learning_rate": 0.001, "loss": 0.3988, "step": 1769 }, { "epoch": 0.04883819588393341, "grad_norm": 0.0027895302046090364, "learning_rate": 0.001, "loss": 0.3969, "step": 1770 }, { "epoch": 0.04886578808499777, "grad_norm": 0.003976823296397924, "learning_rate": 0.001, "loss": 0.4086, "step": 1771 }, { "epoch": 0.048893380286062146, "grad_norm": 0.0030748520512133837, "learning_rate": 0.001, "loss": 0.4191, "step": 1772 }, { "epoch": 0.04892097248712651, "grad_norm": 0.005313929636031389, "learning_rate": 0.001, "loss": 0.4141, "step": 1773 }, { "epoch": 0.048948564688190886, "grad_norm": 0.003955441992729902, "learning_rate": 0.001, "loss": 0.4196, "step": 1774 }, { "epoch": 0.04897615688925525, "grad_norm": 0.007145262788981199, "learning_rate": 0.001, "loss": 0.4179, "step": 1775 }, { "epoch": 0.04900374909031962, "grad_norm": 0.003774230368435383, "learning_rate": 0.001, "loss": 0.3864, "step": 1776 }, { "epoch": 0.04903134129138399, "grad_norm": 0.002694410039111972, "learning_rate": 0.001, "loss": 0.389, "step": 1777 }, { "epoch": 0.04905893349244836, "grad_norm": 0.003494556527584791, "learning_rate": 0.001, "loss": 0.3966, "step": 1778 }, { "epoch": 0.049086525693512725, "grad_norm": 0.0033225903753191233, "learning_rate": 0.001, "loss": 0.3659, "step": 1779 }, { "epoch": 0.0491141178945771, "grad_norm": 0.0031309863552451134, "learning_rate": 0.001, "loss": 0.3728, "step": 1780 }, { "epoch": 0.049141710095641465, "grad_norm": 0.0028971245046705008, "learning_rate": 0.001, "loss": 0.3719, "step": 1781 }, { "epoch": 0.04916930229670584, "grad_norm": 0.006014004349708557, "learning_rate": 0.001, "loss": 0.3521, "step": 1782 }, { "epoch": 0.049196894497770205, "grad_norm": 0.002988451160490513, "learning_rate": 0.001, "loss": 0.4184, "step": 1783 }, { "epoch": 0.04922448669883457, "grad_norm": 0.0036536797415465117, "learning_rate": 0.001, "loss": 0.4109, "step": 1784 }, { "epoch": 0.049252078899898945, "grad_norm": 0.00857572816312313, "learning_rate": 0.001, "loss": 0.4012, "step": 1785 }, { "epoch": 0.04927967110096331, "grad_norm": 0.003724129404872656, "learning_rate": 0.001, "loss": 0.4247, "step": 1786 }, { "epoch": 0.049307263302027685, "grad_norm": 0.004292929545044899, "learning_rate": 0.001, "loss": 0.4128, "step": 1787 }, { "epoch": 0.04933485550309205, "grad_norm": 0.004144448321312666, "learning_rate": 0.001, "loss": 0.3601, "step": 1788 }, { "epoch": 0.04936244770415642, "grad_norm": 0.002937472425401211, "learning_rate": 0.001, "loss": 0.3888, "step": 1789 }, { "epoch": 0.04939003990522079, "grad_norm": 0.0032268317881971598, "learning_rate": 0.001, "loss": 0.3306, "step": 1790 }, { "epoch": 0.04941763210628516, "grad_norm": 0.0034790022764354944, "learning_rate": 0.001, "loss": 0.4137, "step": 1791 }, { "epoch": 0.04944522430734953, "grad_norm": 0.004379100166261196, "learning_rate": 0.001, "loss": 0.387, "step": 1792 }, { "epoch": 0.0494728165084139, "grad_norm": 0.00435485178604722, "learning_rate": 0.001, "loss": 0.361, "step": 1793 }, { "epoch": 0.049500408709478264, "grad_norm": 0.0028884608764201403, "learning_rate": 0.001, "loss": 0.3888, "step": 1794 }, { "epoch": 0.04952800091054264, "grad_norm": 0.0035733478143811226, "learning_rate": 0.001, "loss": 0.3798, "step": 1795 }, { "epoch": 0.049555593111607, "grad_norm": 0.003197557758539915, "learning_rate": 0.001, "loss": 0.4283, "step": 1796 }, { "epoch": 0.04958318531267138, "grad_norm": 0.012362437322735786, "learning_rate": 0.001, "loss": 0.4401, "step": 1797 }, { "epoch": 0.04961077751373574, "grad_norm": 0.005861248355358839, "learning_rate": 0.001, "loss": 0.4287, "step": 1798 }, { "epoch": 0.04963836971480011, "grad_norm": 0.0055047376081347466, "learning_rate": 0.001, "loss": 0.3827, "step": 1799 }, { "epoch": 0.04966596191586448, "grad_norm": 0.005381823051720858, "learning_rate": 0.001, "loss": 0.4094, "step": 1800 }, { "epoch": 0.04969355411692885, "grad_norm": 0.003702189540490508, "learning_rate": 0.001, "loss": 0.416, "step": 1801 }, { "epoch": 0.049721146317993216, "grad_norm": 0.005075744818896055, "learning_rate": 0.001, "loss": 0.3941, "step": 1802 }, { "epoch": 0.04974873851905759, "grad_norm": 0.0033848159946501255, "learning_rate": 0.001, "loss": 0.4107, "step": 1803 }, { "epoch": 0.049776330720121956, "grad_norm": 0.007472567725926638, "learning_rate": 0.001, "loss": 0.3677, "step": 1804 }, { "epoch": 0.04980392292118633, "grad_norm": 0.005941023584455252, "learning_rate": 0.001, "loss": 0.3957, "step": 1805 }, { "epoch": 0.049831515122250696, "grad_norm": 0.00963501911610365, "learning_rate": 0.001, "loss": 0.3943, "step": 1806 }, { "epoch": 0.04985910732331506, "grad_norm": 0.017620790749788284, "learning_rate": 0.001, "loss": 0.4225, "step": 1807 }, { "epoch": 0.049886699524379435, "grad_norm": 0.0027190614491701126, "learning_rate": 0.001, "loss": 0.3788, "step": 1808 }, { "epoch": 0.0499142917254438, "grad_norm": 0.003102682065218687, "learning_rate": 0.001, "loss": 0.4197, "step": 1809 }, { "epoch": 0.049941883926508175, "grad_norm": 0.0033104156609624624, "learning_rate": 0.001, "loss": 0.3862, "step": 1810 }, { "epoch": 0.04996947612757254, "grad_norm": 0.0038089246954768896, "learning_rate": 0.001, "loss": 0.4085, "step": 1811 }, { "epoch": 0.04999706832863691, "grad_norm": 0.005627461709082127, "learning_rate": 0.001, "loss": 0.3882, "step": 1812 }, { "epoch": 0.05002466052970128, "grad_norm": 0.0027909004129469395, "learning_rate": 0.001, "loss": 0.4023, "step": 1813 }, { "epoch": 0.05005225273076565, "grad_norm": 0.0031870375387370586, "learning_rate": 0.001, "loss": 0.4156, "step": 1814 }, { "epoch": 0.05007984493183002, "grad_norm": 0.0030249350238591433, "learning_rate": 0.001, "loss": 0.3822, "step": 1815 }, { "epoch": 0.05010743713289439, "grad_norm": 0.0051695844158530235, "learning_rate": 0.001, "loss": 0.3823, "step": 1816 }, { "epoch": 0.050135029333958754, "grad_norm": 0.004010764416307211, "learning_rate": 0.001, "loss": 0.3718, "step": 1817 }, { "epoch": 0.05016262153502313, "grad_norm": 0.004209370352327824, "learning_rate": 0.001, "loss": 0.3638, "step": 1818 }, { "epoch": 0.050190213736087494, "grad_norm": 0.004149060230702162, "learning_rate": 0.001, "loss": 0.3765, "step": 1819 }, { "epoch": 0.05021780593715187, "grad_norm": 0.02054491639137268, "learning_rate": 0.001, "loss": 0.4019, "step": 1820 }, { "epoch": 0.050245398138216234, "grad_norm": 0.017421457916498184, "learning_rate": 0.001, "loss": 0.4034, "step": 1821 }, { "epoch": 0.0502729903392806, "grad_norm": 0.002541644498705864, "learning_rate": 0.001, "loss": 0.4215, "step": 1822 }, { "epoch": 0.050300582540344974, "grad_norm": 0.0031819387804716825, "learning_rate": 0.001, "loss": 0.4075, "step": 1823 }, { "epoch": 0.05032817474140934, "grad_norm": 0.004389981739223003, "learning_rate": 0.001, "loss": 0.414, "step": 1824 }, { "epoch": 0.050355766942473706, "grad_norm": 0.0037767868489027023, "learning_rate": 0.001, "loss": 0.4166, "step": 1825 }, { "epoch": 0.05038335914353808, "grad_norm": 0.0033333280589431524, "learning_rate": 0.001, "loss": 0.4304, "step": 1826 }, { "epoch": 0.050410951344602446, "grad_norm": 0.0033245261292904615, "learning_rate": 0.001, "loss": 0.4512, "step": 1827 }, { "epoch": 0.05043854354566682, "grad_norm": 0.005083107389509678, "learning_rate": 0.001, "loss": 0.4058, "step": 1828 }, { "epoch": 0.050466135746731186, "grad_norm": 0.002904941327869892, "learning_rate": 0.001, "loss": 0.4123, "step": 1829 }, { "epoch": 0.05049372794779555, "grad_norm": 0.0927748754620552, "learning_rate": 0.001, "loss": 0.381, "step": 1830 }, { "epoch": 0.050521320148859926, "grad_norm": 0.0028325989842414856, "learning_rate": 0.001, "loss": 0.4329, "step": 1831 }, { "epoch": 0.05054891234992429, "grad_norm": 0.005994097795337439, "learning_rate": 0.001, "loss": 0.3843, "step": 1832 }, { "epoch": 0.050576504550988666, "grad_norm": 0.0032757038716226816, "learning_rate": 0.001, "loss": 0.4053, "step": 1833 }, { "epoch": 0.05060409675205303, "grad_norm": 0.0055892630480229855, "learning_rate": 0.001, "loss": 0.3838, "step": 1834 }, { "epoch": 0.0506316889531174, "grad_norm": 0.007300201803445816, "learning_rate": 0.001, "loss": 0.4304, "step": 1835 }, { "epoch": 0.05065928115418177, "grad_norm": 0.004705760162323713, "learning_rate": 0.001, "loss": 0.4032, "step": 1836 }, { "epoch": 0.05068687335524614, "grad_norm": 0.0038876638282090425, "learning_rate": 0.001, "loss": 0.4482, "step": 1837 }, { "epoch": 0.05071446555631051, "grad_norm": 0.0049439528957009315, "learning_rate": 0.001, "loss": 0.3771, "step": 1838 }, { "epoch": 0.05074205775737488, "grad_norm": 0.00367721077054739, "learning_rate": 0.001, "loss": 0.3874, "step": 1839 }, { "epoch": 0.050769649958439245, "grad_norm": 0.0021761716343462467, "learning_rate": 0.001, "loss": 0.3918, "step": 1840 }, { "epoch": 0.05079724215950362, "grad_norm": 0.003433308796957135, "learning_rate": 0.001, "loss": 0.3675, "step": 1841 }, { "epoch": 0.050824834360567984, "grad_norm": 0.0041929068975150585, "learning_rate": 0.001, "loss": 0.4287, "step": 1842 }, { "epoch": 0.05085242656163236, "grad_norm": 0.003800287377089262, "learning_rate": 0.001, "loss": 0.3831, "step": 1843 }, { "epoch": 0.050880018762696724, "grad_norm": 0.00222361390478909, "learning_rate": 0.001, "loss": 0.3766, "step": 1844 }, { "epoch": 0.05090761096376109, "grad_norm": 0.010053042322397232, "learning_rate": 0.001, "loss": 0.3897, "step": 1845 }, { "epoch": 0.050935203164825464, "grad_norm": 0.002249652985483408, "learning_rate": 0.001, "loss": 0.4388, "step": 1846 }, { "epoch": 0.05096279536588983, "grad_norm": 0.0034179852809756994, "learning_rate": 0.001, "loss": 0.4085, "step": 1847 }, { "epoch": 0.0509903875669542, "grad_norm": 0.0027194637805223465, "learning_rate": 0.001, "loss": 0.3943, "step": 1848 }, { "epoch": 0.05101797976801857, "grad_norm": 0.00189922412391752, "learning_rate": 0.001, "loss": 0.45, "step": 1849 }, { "epoch": 0.05104557196908294, "grad_norm": 0.0029718675650656223, "learning_rate": 0.001, "loss": 0.408, "step": 1850 }, { "epoch": 0.05107316417014731, "grad_norm": 0.002118749311193824, "learning_rate": 0.001, "loss": 0.4314, "step": 1851 }, { "epoch": 0.05110075637121168, "grad_norm": 0.001921525807119906, "learning_rate": 0.001, "loss": 0.401, "step": 1852 }, { "epoch": 0.05112834857227604, "grad_norm": 0.002200416987761855, "learning_rate": 0.001, "loss": 0.4341, "step": 1853 }, { "epoch": 0.051155940773340416, "grad_norm": 0.004263362381607294, "learning_rate": 0.001, "loss": 0.3937, "step": 1854 }, { "epoch": 0.05118353297440478, "grad_norm": 0.002614164724946022, "learning_rate": 0.001, "loss": 0.4168, "step": 1855 }, { "epoch": 0.051211125175469156, "grad_norm": 0.002143037738278508, "learning_rate": 0.001, "loss": 0.4199, "step": 1856 }, { "epoch": 0.05123871737653352, "grad_norm": 0.0025298171676695347, "learning_rate": 0.001, "loss": 0.4177, "step": 1857 }, { "epoch": 0.05126630957759789, "grad_norm": 0.0026806145906448364, "learning_rate": 0.001, "loss": 0.3976, "step": 1858 }, { "epoch": 0.05129390177866226, "grad_norm": 0.0022738908883184195, "learning_rate": 0.001, "loss": 0.4439, "step": 1859 }, { "epoch": 0.05132149397972663, "grad_norm": 0.0023423023521900177, "learning_rate": 0.001, "loss": 0.3977, "step": 1860 }, { "epoch": 0.051349086180791, "grad_norm": 0.0033382964320480824, "learning_rate": 0.001, "loss": 0.4005, "step": 1861 }, { "epoch": 0.05137667838185537, "grad_norm": 0.002349057700484991, "learning_rate": 0.001, "loss": 0.4096, "step": 1862 }, { "epoch": 0.051404270582919735, "grad_norm": 0.0029646153561770916, "learning_rate": 0.001, "loss": 0.3665, "step": 1863 }, { "epoch": 0.05143186278398411, "grad_norm": 0.003112231148406863, "learning_rate": 0.001, "loss": 0.3745, "step": 1864 }, { "epoch": 0.051459454985048475, "grad_norm": 0.006748152896761894, "learning_rate": 0.001, "loss": 0.3885, "step": 1865 }, { "epoch": 0.05148704718611285, "grad_norm": 0.003962017595767975, "learning_rate": 0.001, "loss": 0.3669, "step": 1866 }, { "epoch": 0.051514639387177215, "grad_norm": 0.0032168615143746138, "learning_rate": 0.001, "loss": 0.4077, "step": 1867 }, { "epoch": 0.05154223158824158, "grad_norm": 0.00549092423170805, "learning_rate": 0.001, "loss": 0.3736, "step": 1868 }, { "epoch": 0.051569823789305955, "grad_norm": 0.005641186144202948, "learning_rate": 0.001, "loss": 0.373, "step": 1869 }, { "epoch": 0.05159741599037032, "grad_norm": 0.0029503426048904657, "learning_rate": 0.001, "loss": 0.3835, "step": 1870 }, { "epoch": 0.05162500819143469, "grad_norm": 0.002859857166185975, "learning_rate": 0.001, "loss": 0.4275, "step": 1871 }, { "epoch": 0.05165260039249906, "grad_norm": 0.0032987252343446016, "learning_rate": 0.001, "loss": 0.377, "step": 1872 }, { "epoch": 0.05168019259356343, "grad_norm": 0.002959759905934334, "learning_rate": 0.001, "loss": 0.361, "step": 1873 }, { "epoch": 0.0517077847946278, "grad_norm": 0.00282686366699636, "learning_rate": 0.001, "loss": 0.3661, "step": 1874 }, { "epoch": 0.05173537699569217, "grad_norm": 0.003676011925563216, "learning_rate": 0.001, "loss": 0.418, "step": 1875 }, { "epoch": 0.051762969196756534, "grad_norm": 0.0026158930268138647, "learning_rate": 0.001, "loss": 0.4015, "step": 1876 }, { "epoch": 0.05179056139782091, "grad_norm": 0.0024638089817017317, "learning_rate": 0.001, "loss": 0.4249, "step": 1877 }, { "epoch": 0.05181815359888527, "grad_norm": 0.003338116453960538, "learning_rate": 0.001, "loss": 0.392, "step": 1878 }, { "epoch": 0.05184574579994965, "grad_norm": 0.004489647690206766, "learning_rate": 0.001, "loss": 0.3799, "step": 1879 }, { "epoch": 0.05187333800101401, "grad_norm": 0.002798578003421426, "learning_rate": 0.001, "loss": 0.3785, "step": 1880 }, { "epoch": 0.05190093020207838, "grad_norm": 0.0029343098867684603, "learning_rate": 0.001, "loss": 0.3848, "step": 1881 }, { "epoch": 0.05192852240314275, "grad_norm": 0.003431658959016204, "learning_rate": 0.001, "loss": 0.4085, "step": 1882 }, { "epoch": 0.05195611460420712, "grad_norm": 0.002329075476154685, "learning_rate": 0.001, "loss": 0.404, "step": 1883 }, { "epoch": 0.05198370680527149, "grad_norm": 0.002889421069994569, "learning_rate": 0.001, "loss": 0.394, "step": 1884 }, { "epoch": 0.05201129900633586, "grad_norm": 0.003102013608440757, "learning_rate": 0.001, "loss": 0.4429, "step": 1885 }, { "epoch": 0.052038891207400226, "grad_norm": 0.0030128401704132557, "learning_rate": 0.001, "loss": 0.402, "step": 1886 }, { "epoch": 0.0520664834084646, "grad_norm": 0.002882935106754303, "learning_rate": 0.001, "loss": 0.4126, "step": 1887 }, { "epoch": 0.052094075609528966, "grad_norm": 0.0033189402893185616, "learning_rate": 0.001, "loss": 0.382, "step": 1888 }, { "epoch": 0.05212166781059334, "grad_norm": 0.003128107637166977, "learning_rate": 0.001, "loss": 0.3822, "step": 1889 }, { "epoch": 0.052149260011657705, "grad_norm": 0.0028161504305899143, "learning_rate": 0.001, "loss": 0.4362, "step": 1890 }, { "epoch": 0.05217685221272207, "grad_norm": 0.00556217972189188, "learning_rate": 0.001, "loss": 0.426, "step": 1891 }, { "epoch": 0.052204444413786445, "grad_norm": 0.006903808563947678, "learning_rate": 0.001, "loss": 0.4056, "step": 1892 }, { "epoch": 0.05223203661485081, "grad_norm": 0.008768963627517223, "learning_rate": 0.001, "loss": 0.407, "step": 1893 }, { "epoch": 0.05225962881591518, "grad_norm": 0.0038300056476145983, "learning_rate": 0.001, "loss": 0.4084, "step": 1894 }, { "epoch": 0.05228722101697955, "grad_norm": 0.005089603364467621, "learning_rate": 0.001, "loss": 0.387, "step": 1895 }, { "epoch": 0.05231481321804392, "grad_norm": 0.004371874965727329, "learning_rate": 0.001, "loss": 0.3989, "step": 1896 }, { "epoch": 0.05234240541910829, "grad_norm": 0.0037384675815701485, "learning_rate": 0.001, "loss": 0.4168, "step": 1897 }, { "epoch": 0.05236999762017266, "grad_norm": 0.012704477645456791, "learning_rate": 0.001, "loss": 0.4017, "step": 1898 }, { "epoch": 0.052397589821237024, "grad_norm": 0.004448601044714451, "learning_rate": 0.001, "loss": 0.3781, "step": 1899 }, { "epoch": 0.0524251820223014, "grad_norm": 0.004198121372610331, "learning_rate": 0.001, "loss": 0.3724, "step": 1900 }, { "epoch": 0.052452774223365764, "grad_norm": 0.008598609827458858, "learning_rate": 0.001, "loss": 0.4058, "step": 1901 }, { "epoch": 0.05248036642443014, "grad_norm": 0.005187192931771278, "learning_rate": 0.001, "loss": 0.388, "step": 1902 }, { "epoch": 0.052507958625494504, "grad_norm": 0.0033852029591798782, "learning_rate": 0.001, "loss": 0.418, "step": 1903 }, { "epoch": 0.05253555082655887, "grad_norm": 0.004068314563483, "learning_rate": 0.001, "loss": 0.4299, "step": 1904 }, { "epoch": 0.052563143027623244, "grad_norm": 0.004143074620515108, "learning_rate": 0.001, "loss": 0.4146, "step": 1905 }, { "epoch": 0.05259073522868761, "grad_norm": 0.004285483155399561, "learning_rate": 0.001, "loss": 0.3817, "step": 1906 }, { "epoch": 0.05261832742975198, "grad_norm": 0.004647474270313978, "learning_rate": 0.001, "loss": 0.3652, "step": 1907 }, { "epoch": 0.05264591963081635, "grad_norm": 0.002305620349943638, "learning_rate": 0.001, "loss": 0.3944, "step": 1908 }, { "epoch": 0.052673511831880716, "grad_norm": 0.004161974415183067, "learning_rate": 0.001, "loss": 0.3945, "step": 1909 }, { "epoch": 0.05270110403294509, "grad_norm": 0.004659401252865791, "learning_rate": 0.001, "loss": 0.4125, "step": 1910 }, { "epoch": 0.052728696234009456, "grad_norm": 0.003204295178875327, "learning_rate": 0.001, "loss": 0.3579, "step": 1911 }, { "epoch": 0.05275628843507383, "grad_norm": 0.002777168760076165, "learning_rate": 0.001, "loss": 0.3996, "step": 1912 }, { "epoch": 0.052783880636138196, "grad_norm": 0.003622923046350479, "learning_rate": 0.001, "loss": 0.4119, "step": 1913 }, { "epoch": 0.05281147283720256, "grad_norm": 0.003526994027197361, "learning_rate": 0.001, "loss": 0.3893, "step": 1914 }, { "epoch": 0.052839065038266936, "grad_norm": 0.0032004155218601227, "learning_rate": 0.001, "loss": 0.3905, "step": 1915 }, { "epoch": 0.0528666572393313, "grad_norm": 0.003097831504419446, "learning_rate": 0.001, "loss": 0.3859, "step": 1916 }, { "epoch": 0.052894249440395676, "grad_norm": 0.0046899812296032906, "learning_rate": 0.001, "loss": 0.3689, "step": 1917 }, { "epoch": 0.05292184164146004, "grad_norm": 0.00434432877227664, "learning_rate": 0.001, "loss": 0.4004, "step": 1918 }, { "epoch": 0.05294943384252441, "grad_norm": 0.0034439517185091972, "learning_rate": 0.001, "loss": 0.3907, "step": 1919 }, { "epoch": 0.05297702604358878, "grad_norm": 0.0031842549797147512, "learning_rate": 0.001, "loss": 0.3825, "step": 1920 }, { "epoch": 0.05300461824465315, "grad_norm": 0.002607315080240369, "learning_rate": 0.001, "loss": 0.4513, "step": 1921 }, { "epoch": 0.053032210445717515, "grad_norm": 0.003620088566094637, "learning_rate": 0.001, "loss": 0.3873, "step": 1922 }, { "epoch": 0.05305980264678189, "grad_norm": 0.0034968543332070112, "learning_rate": 0.001, "loss": 0.3656, "step": 1923 }, { "epoch": 0.053087394847846255, "grad_norm": 0.0023364322260022163, "learning_rate": 0.001, "loss": 0.4037, "step": 1924 }, { "epoch": 0.05311498704891063, "grad_norm": 0.002855852944776416, "learning_rate": 0.001, "loss": 0.4145, "step": 1925 }, { "epoch": 0.053142579249974994, "grad_norm": 0.0027531140949577093, "learning_rate": 0.001, "loss": 0.3575, "step": 1926 }, { "epoch": 0.05317017145103936, "grad_norm": 0.0032164284493774176, "learning_rate": 0.001, "loss": 0.3979, "step": 1927 }, { "epoch": 0.053197763652103734, "grad_norm": 0.002508921315893531, "learning_rate": 0.001, "loss": 0.4085, "step": 1928 }, { "epoch": 0.0532253558531681, "grad_norm": 0.0026788045652210712, "learning_rate": 0.001, "loss": 0.3872, "step": 1929 }, { "epoch": 0.053252948054232474, "grad_norm": 0.0051200552843511105, "learning_rate": 0.001, "loss": 0.4025, "step": 1930 }, { "epoch": 0.05328054025529684, "grad_norm": 0.004671086091548204, "learning_rate": 0.001, "loss": 0.4054, "step": 1931 }, { "epoch": 0.05330813245636121, "grad_norm": 0.0028699622489511967, "learning_rate": 0.001, "loss": 0.4078, "step": 1932 }, { "epoch": 0.05333572465742558, "grad_norm": 0.003617132781073451, "learning_rate": 0.001, "loss": 0.3793, "step": 1933 }, { "epoch": 0.05336331685848995, "grad_norm": 0.0036757884081453085, "learning_rate": 0.001, "loss": 0.3995, "step": 1934 }, { "epoch": 0.05339090905955432, "grad_norm": 0.002974196569994092, "learning_rate": 0.001, "loss": 0.4085, "step": 1935 }, { "epoch": 0.05341850126061869, "grad_norm": 0.0034452476538717747, "learning_rate": 0.001, "loss": 0.4123, "step": 1936 }, { "epoch": 0.05344609346168305, "grad_norm": 0.010058878920972347, "learning_rate": 0.001, "loss": 0.4216, "step": 1937 }, { "epoch": 0.053473685662747426, "grad_norm": 0.007609996944665909, "learning_rate": 0.001, "loss": 0.3969, "step": 1938 }, { "epoch": 0.05350127786381179, "grad_norm": 0.006538870744407177, "learning_rate": 0.001, "loss": 0.4252, "step": 1939 }, { "epoch": 0.053528870064876166, "grad_norm": 0.003294197143986821, "learning_rate": 0.001, "loss": 0.4012, "step": 1940 }, { "epoch": 0.05355646226594053, "grad_norm": 0.0027565581258386374, "learning_rate": 0.001, "loss": 0.4105, "step": 1941 }, { "epoch": 0.0535840544670049, "grad_norm": 0.0027171720284968615, "learning_rate": 0.001, "loss": 0.3947, "step": 1942 }, { "epoch": 0.05361164666806927, "grad_norm": 0.00246808142401278, "learning_rate": 0.001, "loss": 0.3959, "step": 1943 }, { "epoch": 0.05363923886913364, "grad_norm": 0.0030862221028655767, "learning_rate": 0.001, "loss": 0.3797, "step": 1944 }, { "epoch": 0.053666831070198005, "grad_norm": 0.002278526546433568, "learning_rate": 0.001, "loss": 0.4429, "step": 1945 }, { "epoch": 0.05369442327126238, "grad_norm": 0.002600095234811306, "learning_rate": 0.001, "loss": 0.4105, "step": 1946 }, { "epoch": 0.053722015472326745, "grad_norm": 0.0028381014708429575, "learning_rate": 0.001, "loss": 0.4161, "step": 1947 }, { "epoch": 0.05374960767339112, "grad_norm": 0.0026186273898929358, "learning_rate": 0.001, "loss": 0.3953, "step": 1948 }, { "epoch": 0.053777199874455485, "grad_norm": 0.0026584621518850327, "learning_rate": 0.001, "loss": 0.445, "step": 1949 }, { "epoch": 0.05380479207551985, "grad_norm": 0.002734116045758128, "learning_rate": 0.001, "loss": 0.396, "step": 1950 }, { "epoch": 0.053832384276584225, "grad_norm": 0.0053114392794668674, "learning_rate": 0.001, "loss": 0.4241, "step": 1951 }, { "epoch": 0.05385997647764859, "grad_norm": 0.005844905972480774, "learning_rate": 0.001, "loss": 0.3789, "step": 1952 }, { "epoch": 0.053887568678712965, "grad_norm": 0.002963895909488201, "learning_rate": 0.001, "loss": 0.406, "step": 1953 }, { "epoch": 0.05391516087977733, "grad_norm": 0.004491107538342476, "learning_rate": 0.001, "loss": 0.4068, "step": 1954 }, { "epoch": 0.0539427530808417, "grad_norm": 0.003358663059771061, "learning_rate": 0.001, "loss": 0.4336, "step": 1955 }, { "epoch": 0.05397034528190607, "grad_norm": 0.003177732229232788, "learning_rate": 0.001, "loss": 0.4212, "step": 1956 }, { "epoch": 0.05399793748297044, "grad_norm": 0.0027129724621772766, "learning_rate": 0.001, "loss": 0.4155, "step": 1957 }, { "epoch": 0.05402552968403481, "grad_norm": 0.00669928640127182, "learning_rate": 0.001, "loss": 0.3979, "step": 1958 }, { "epoch": 0.05405312188509918, "grad_norm": 0.0033415532670915127, "learning_rate": 0.001, "loss": 0.4139, "step": 1959 }, { "epoch": 0.054080714086163544, "grad_norm": 0.0033846443984657526, "learning_rate": 0.001, "loss": 0.4469, "step": 1960 }, { "epoch": 0.05410830628722792, "grad_norm": 0.0034599697683006525, "learning_rate": 0.001, "loss": 0.3879, "step": 1961 }, { "epoch": 0.05413589848829228, "grad_norm": 0.004474925808608532, "learning_rate": 0.001, "loss": 0.367, "step": 1962 }, { "epoch": 0.05416349068935666, "grad_norm": 0.005082536954432726, "learning_rate": 0.001, "loss": 0.3704, "step": 1963 }, { "epoch": 0.05419108289042102, "grad_norm": 0.008428883738815784, "learning_rate": 0.001, "loss": 0.3963, "step": 1964 }, { "epoch": 0.05421867509148539, "grad_norm": 0.0027383146807551384, "learning_rate": 0.001, "loss": 0.4093, "step": 1965 }, { "epoch": 0.05424626729254976, "grad_norm": 0.0036172361578792334, "learning_rate": 0.001, "loss": 0.424, "step": 1966 }, { "epoch": 0.05427385949361413, "grad_norm": 0.0034358236007392406, "learning_rate": 0.001, "loss": 0.41, "step": 1967 }, { "epoch": 0.054301451694678496, "grad_norm": 0.0043413047678768635, "learning_rate": 0.001, "loss": 0.3677, "step": 1968 }, { "epoch": 0.05432904389574287, "grad_norm": 0.0025661587715148926, "learning_rate": 0.001, "loss": 0.435, "step": 1969 }, { "epoch": 0.054356636096807236, "grad_norm": 0.005917475093156099, "learning_rate": 0.001, "loss": 0.4118, "step": 1970 }, { "epoch": 0.05438422829787161, "grad_norm": 0.003292621113359928, "learning_rate": 0.001, "loss": 0.455, "step": 1971 }, { "epoch": 0.054411820498935975, "grad_norm": 0.0028464931529015303, "learning_rate": 0.001, "loss": 0.4019, "step": 1972 }, { "epoch": 0.05443941270000034, "grad_norm": 0.003331197891384363, "learning_rate": 0.001, "loss": 0.3802, "step": 1973 }, { "epoch": 0.054467004901064715, "grad_norm": 0.005236343014985323, "learning_rate": 0.001, "loss": 0.3887, "step": 1974 }, { "epoch": 0.05449459710212908, "grad_norm": 0.003114610444754362, "learning_rate": 0.001, "loss": 0.4028, "step": 1975 }, { "epoch": 0.054522189303193455, "grad_norm": 0.0027003728318959475, "learning_rate": 0.001, "loss": 0.4072, "step": 1976 }, { "epoch": 0.05454978150425782, "grad_norm": 0.00410441542044282, "learning_rate": 0.001, "loss": 0.4108, "step": 1977 }, { "epoch": 0.05457737370532219, "grad_norm": 0.006601103115826845, "learning_rate": 0.001, "loss": 0.3811, "step": 1978 }, { "epoch": 0.05460496590638656, "grad_norm": 0.003532871138304472, "learning_rate": 0.001, "loss": 0.4071, "step": 1979 }, { "epoch": 0.05463255810745093, "grad_norm": 0.0024505204055458307, "learning_rate": 0.001, "loss": 0.4183, "step": 1980 }, { "epoch": 0.0546601503085153, "grad_norm": 0.0038412862922996283, "learning_rate": 0.001, "loss": 0.3656, "step": 1981 }, { "epoch": 0.05468774250957967, "grad_norm": 0.00442805141210556, "learning_rate": 0.001, "loss": 0.3929, "step": 1982 }, { "epoch": 0.054715334710644034, "grad_norm": 0.004178628791123629, "learning_rate": 0.001, "loss": 0.4056, "step": 1983 }, { "epoch": 0.05474292691170841, "grad_norm": 0.0023596303071826696, "learning_rate": 0.001, "loss": 0.3791, "step": 1984 }, { "epoch": 0.054770519112772774, "grad_norm": 0.0026005019899457693, "learning_rate": 0.001, "loss": 0.4101, "step": 1985 }, { "epoch": 0.05479811131383715, "grad_norm": 0.0043563637882471085, "learning_rate": 0.001, "loss": 0.3924, "step": 1986 }, { "epoch": 0.054825703514901514, "grad_norm": 0.004494437016546726, "learning_rate": 0.001, "loss": 0.3904, "step": 1987 }, { "epoch": 0.05485329571596588, "grad_norm": 0.0030924968887120485, "learning_rate": 0.001, "loss": 0.3947, "step": 1988 }, { "epoch": 0.054880887917030254, "grad_norm": 0.0029607934411615133, "learning_rate": 0.001, "loss": 0.402, "step": 1989 }, { "epoch": 0.05490848011809462, "grad_norm": 0.004574719350785017, "learning_rate": 0.001, "loss": 0.4282, "step": 1990 }, { "epoch": 0.054936072319158986, "grad_norm": 0.0027861008420586586, "learning_rate": 0.001, "loss": 0.4374, "step": 1991 }, { "epoch": 0.05496366452022336, "grad_norm": 0.003089543664827943, "learning_rate": 0.001, "loss": 0.452, "step": 1992 }, { "epoch": 0.054991256721287726, "grad_norm": 0.0027578859589993954, "learning_rate": 0.001, "loss": 0.4185, "step": 1993 }, { "epoch": 0.0550188489223521, "grad_norm": 0.003672545775771141, "learning_rate": 0.001, "loss": 0.4158, "step": 1994 }, { "epoch": 0.055046441123416466, "grad_norm": 0.0028012071270495653, "learning_rate": 0.001, "loss": 0.3829, "step": 1995 }, { "epoch": 0.05507403332448083, "grad_norm": 0.0035645493771880865, "learning_rate": 0.001, "loss": 0.3798, "step": 1996 }, { "epoch": 0.055101625525545206, "grad_norm": 0.004443664103746414, "learning_rate": 0.001, "loss": 0.4631, "step": 1997 }, { "epoch": 0.05512921772660957, "grad_norm": 0.002620038343593478, "learning_rate": 0.001, "loss": 0.4562, "step": 1998 }, { "epoch": 0.055156809927673946, "grad_norm": 0.004475294146686792, "learning_rate": 0.001, "loss": 0.3954, "step": 1999 }, { "epoch": 0.05518440212873831, "grad_norm": 0.003935279790312052, "learning_rate": 0.001, "loss": 0.4801, "step": 2000 }, { "epoch": 0.05518440212873831, "eval_runtime": 24.2254, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.165, "step": 2000 }, { "epoch": 0.05521199432980268, "grad_norm": 0.01289259921759367, "learning_rate": 0.001, "loss": 0.4222, "step": 2001 }, { "epoch": 0.05523958653086705, "grad_norm": 0.0044218553230166435, "learning_rate": 0.001, "loss": 0.4219, "step": 2002 }, { "epoch": 0.05526717873193142, "grad_norm": 0.0024792973417788744, "learning_rate": 0.001, "loss": 0.3932, "step": 2003 }, { "epoch": 0.05529477093299579, "grad_norm": 0.0030391570180654526, "learning_rate": 0.001, "loss": 0.3952, "step": 2004 }, { "epoch": 0.05532236313406016, "grad_norm": 0.0027939057908952236, "learning_rate": 0.001, "loss": 0.4062, "step": 2005 }, { "epoch": 0.055349955335124525, "grad_norm": 0.003908068872988224, "learning_rate": 0.001, "loss": 0.3825, "step": 2006 }, { "epoch": 0.0553775475361889, "grad_norm": 0.006802330259233713, "learning_rate": 0.001, "loss": 0.3977, "step": 2007 }, { "epoch": 0.055405139737253264, "grad_norm": 0.005997187457978725, "learning_rate": 0.001, "loss": 0.4176, "step": 2008 }, { "epoch": 0.05543273193831764, "grad_norm": 0.004595068749040365, "learning_rate": 0.001, "loss": 0.4324, "step": 2009 }, { "epoch": 0.055460324139382004, "grad_norm": 0.0029883128590881824, "learning_rate": 0.001, "loss": 0.3992, "step": 2010 }, { "epoch": 0.05548791634044637, "grad_norm": 0.0025319228880107403, "learning_rate": 0.001, "loss": 0.3903, "step": 2011 }, { "epoch": 0.055515508541510744, "grad_norm": 0.004491596017032862, "learning_rate": 0.001, "loss": 0.3786, "step": 2012 }, { "epoch": 0.05554310074257511, "grad_norm": 0.01933477073907852, "learning_rate": 0.001, "loss": 0.3898, "step": 2013 }, { "epoch": 0.05557069294363948, "grad_norm": 0.0026234816759824753, "learning_rate": 0.001, "loss": 0.3984, "step": 2014 }, { "epoch": 0.05559828514470385, "grad_norm": 0.0022836877033114433, "learning_rate": 0.001, "loss": 0.4291, "step": 2015 }, { "epoch": 0.05562587734576822, "grad_norm": 0.003522195853292942, "learning_rate": 0.001, "loss": 0.3994, "step": 2016 }, { "epoch": 0.05565346954683259, "grad_norm": 0.0039251442067325115, "learning_rate": 0.001, "loss": 0.3804, "step": 2017 }, { "epoch": 0.05568106174789696, "grad_norm": 0.011906804516911507, "learning_rate": 0.001, "loss": 0.3988, "step": 2018 }, { "epoch": 0.05570865394896132, "grad_norm": 0.002788014244288206, "learning_rate": 0.001, "loss": 0.4111, "step": 2019 }, { "epoch": 0.055736246150025696, "grad_norm": 0.006921442225575447, "learning_rate": 0.001, "loss": 0.3862, "step": 2020 }, { "epoch": 0.05576383835109006, "grad_norm": 0.004114898853003979, "learning_rate": 0.001, "loss": 0.4144, "step": 2021 }, { "epoch": 0.055791430552154436, "grad_norm": 0.0033005515579134226, "learning_rate": 0.001, "loss": 0.4108, "step": 2022 }, { "epoch": 0.0558190227532188, "grad_norm": 0.00554333720356226, "learning_rate": 0.001, "loss": 0.4142, "step": 2023 }, { "epoch": 0.05584661495428317, "grad_norm": 0.009507423266768456, "learning_rate": 0.001, "loss": 0.3891, "step": 2024 }, { "epoch": 0.05587420715534754, "grad_norm": 0.0032099178060889244, "learning_rate": 0.001, "loss": 0.3636, "step": 2025 }, { "epoch": 0.05590179935641191, "grad_norm": 0.0028057810850441456, "learning_rate": 0.001, "loss": 0.4091, "step": 2026 }, { "epoch": 0.05592939155747628, "grad_norm": 0.00344046950340271, "learning_rate": 0.001, "loss": 0.4409, "step": 2027 }, { "epoch": 0.05595698375854065, "grad_norm": 0.004159116186201572, "learning_rate": 0.001, "loss": 0.3919, "step": 2028 }, { "epoch": 0.055984575959605015, "grad_norm": 0.017573168501257896, "learning_rate": 0.001, "loss": 0.4202, "step": 2029 }, { "epoch": 0.05601216816066939, "grad_norm": 0.0059144701808691025, "learning_rate": 0.001, "loss": 0.38, "step": 2030 }, { "epoch": 0.056039760361733755, "grad_norm": 0.003630187129601836, "learning_rate": 0.001, "loss": 0.3843, "step": 2031 }, { "epoch": 0.05606735256279813, "grad_norm": 0.003033621236681938, "learning_rate": 0.001, "loss": 0.393, "step": 2032 }, { "epoch": 0.056094944763862495, "grad_norm": 0.003519507823511958, "learning_rate": 0.001, "loss": 0.4114, "step": 2033 }, { "epoch": 0.05612253696492686, "grad_norm": 0.004492396954447031, "learning_rate": 0.001, "loss": 0.4137, "step": 2034 }, { "epoch": 0.056150129165991235, "grad_norm": 0.004509568680077791, "learning_rate": 0.001, "loss": 0.4155, "step": 2035 }, { "epoch": 0.0561777213670556, "grad_norm": 0.0026023637037724257, "learning_rate": 0.001, "loss": 0.4057, "step": 2036 }, { "epoch": 0.05620531356811997, "grad_norm": 0.0026178304105997086, "learning_rate": 0.001, "loss": 0.3801, "step": 2037 }, { "epoch": 0.05623290576918434, "grad_norm": 0.0024824494030326605, "learning_rate": 0.001, "loss": 0.3754, "step": 2038 }, { "epoch": 0.05626049797024871, "grad_norm": 0.0025145653635263443, "learning_rate": 0.001, "loss": 0.4485, "step": 2039 }, { "epoch": 0.05628809017131308, "grad_norm": 0.005017207004129887, "learning_rate": 0.001, "loss": 0.4104, "step": 2040 }, { "epoch": 0.05631568237237745, "grad_norm": 0.005889947526156902, "learning_rate": 0.001, "loss": 0.4284, "step": 2041 }, { "epoch": 0.056343274573441814, "grad_norm": 0.003475229488685727, "learning_rate": 0.001, "loss": 0.3887, "step": 2042 }, { "epoch": 0.05637086677450619, "grad_norm": 0.00831044651567936, "learning_rate": 0.001, "loss": 0.4023, "step": 2043 }, { "epoch": 0.05639845897557055, "grad_norm": 0.003840766381472349, "learning_rate": 0.001, "loss": 0.3942, "step": 2044 }, { "epoch": 0.05642605117663493, "grad_norm": 0.004076477140188217, "learning_rate": 0.001, "loss": 0.3865, "step": 2045 }, { "epoch": 0.05645364337769929, "grad_norm": 0.004721554461866617, "learning_rate": 0.001, "loss": 0.4019, "step": 2046 }, { "epoch": 0.05648123557876366, "grad_norm": 0.003165911417454481, "learning_rate": 0.001, "loss": 0.4076, "step": 2047 }, { "epoch": 0.05650882777982803, "grad_norm": 0.0031442714389413595, "learning_rate": 0.001, "loss": 0.3718, "step": 2048 }, { "epoch": 0.0565364199808924, "grad_norm": 0.004942482803016901, "learning_rate": 0.001, "loss": 0.3707, "step": 2049 }, { "epoch": 0.05656401218195677, "grad_norm": 0.004441413562744856, "learning_rate": 0.001, "loss": 0.3872, "step": 2050 }, { "epoch": 0.05659160438302114, "grad_norm": 0.005134286358952522, "learning_rate": 0.001, "loss": 0.415, "step": 2051 }, { "epoch": 0.056619196584085506, "grad_norm": 0.0030015951488167048, "learning_rate": 0.001, "loss": 0.4174, "step": 2052 }, { "epoch": 0.05664678878514988, "grad_norm": 0.0028276192024350166, "learning_rate": 0.001, "loss": 0.3858, "step": 2053 }, { "epoch": 0.056674380986214246, "grad_norm": 0.0030078550335019827, "learning_rate": 0.001, "loss": 0.3855, "step": 2054 }, { "epoch": 0.05670197318727862, "grad_norm": 0.0047853728756308556, "learning_rate": 0.001, "loss": 0.4107, "step": 2055 }, { "epoch": 0.056729565388342985, "grad_norm": 0.003354682819917798, "learning_rate": 0.001, "loss": 0.3903, "step": 2056 }, { "epoch": 0.05675715758940735, "grad_norm": 0.002619178267195821, "learning_rate": 0.001, "loss": 0.353, "step": 2057 }, { "epoch": 0.056784749790471725, "grad_norm": 0.003579444019123912, "learning_rate": 0.001, "loss": 0.4116, "step": 2058 }, { "epoch": 0.05681234199153609, "grad_norm": 0.002495531225576997, "learning_rate": 0.001, "loss": 0.4171, "step": 2059 }, { "epoch": 0.05683993419260046, "grad_norm": 0.0025894520804286003, "learning_rate": 0.001, "loss": 0.4454, "step": 2060 }, { "epoch": 0.05686752639366483, "grad_norm": 0.0035489851143211126, "learning_rate": 0.001, "loss": 0.3708, "step": 2061 }, { "epoch": 0.0568951185947292, "grad_norm": 0.00259172054938972, "learning_rate": 0.001, "loss": 0.4112, "step": 2062 }, { "epoch": 0.05692271079579357, "grad_norm": 0.0038454467430710793, "learning_rate": 0.001, "loss": 0.434, "step": 2063 }, { "epoch": 0.05695030299685794, "grad_norm": 0.002690738532692194, "learning_rate": 0.001, "loss": 0.4067, "step": 2064 }, { "epoch": 0.056977895197922304, "grad_norm": 0.0031391652300953865, "learning_rate": 0.001, "loss": 0.4122, "step": 2065 }, { "epoch": 0.05700548739898668, "grad_norm": 0.0025237516965717077, "learning_rate": 0.001, "loss": 0.3933, "step": 2066 }, { "epoch": 0.057033079600051044, "grad_norm": 0.005846772808581591, "learning_rate": 0.001, "loss": 0.4046, "step": 2067 }, { "epoch": 0.05706067180111542, "grad_norm": 0.0032868178095668554, "learning_rate": 0.001, "loss": 0.4246, "step": 2068 }, { "epoch": 0.057088264002179784, "grad_norm": 0.0038759801536798477, "learning_rate": 0.001, "loss": 0.3933, "step": 2069 }, { "epoch": 0.05711585620324415, "grad_norm": 0.002897894475609064, "learning_rate": 0.001, "loss": 0.4083, "step": 2070 }, { "epoch": 0.057143448404308524, "grad_norm": 0.003541572019457817, "learning_rate": 0.001, "loss": 0.3807, "step": 2071 }, { "epoch": 0.05717104060537289, "grad_norm": 0.003856340888887644, "learning_rate": 0.001, "loss": 0.4249, "step": 2072 }, { "epoch": 0.05719863280643726, "grad_norm": 0.003133943770080805, "learning_rate": 0.001, "loss": 0.4246, "step": 2073 }, { "epoch": 0.05722622500750163, "grad_norm": 0.0026191668584942818, "learning_rate": 0.001, "loss": 0.4348, "step": 2074 }, { "epoch": 0.057253817208565996, "grad_norm": 0.003371886443346739, "learning_rate": 0.001, "loss": 0.4478, "step": 2075 }, { "epoch": 0.05728140940963037, "grad_norm": 0.003778213867917657, "learning_rate": 0.001, "loss": 0.4202, "step": 2076 }, { "epoch": 0.057309001610694736, "grad_norm": 0.0024114828556776047, "learning_rate": 0.001, "loss": 0.397, "step": 2077 }, { "epoch": 0.05733659381175911, "grad_norm": 0.0026140043046325445, "learning_rate": 0.001, "loss": 0.3753, "step": 2078 }, { "epoch": 0.057364186012823476, "grad_norm": 0.006618298124521971, "learning_rate": 0.001, "loss": 0.4144, "step": 2079 }, { "epoch": 0.05739177821388784, "grad_norm": 0.002615788020193577, "learning_rate": 0.001, "loss": 0.3928, "step": 2080 }, { "epoch": 0.057419370414952216, "grad_norm": 0.0076182009652256966, "learning_rate": 0.001, "loss": 0.3932, "step": 2081 }, { "epoch": 0.05744696261601658, "grad_norm": 0.0025556047912687063, "learning_rate": 0.001, "loss": 0.3813, "step": 2082 }, { "epoch": 0.05747455481708095, "grad_norm": 0.0024526710622012615, "learning_rate": 0.001, "loss": 0.4107, "step": 2083 }, { "epoch": 0.05750214701814532, "grad_norm": 0.004499271512031555, "learning_rate": 0.001, "loss": 0.3638, "step": 2084 }, { "epoch": 0.05752973921920969, "grad_norm": 0.0039004157297313213, "learning_rate": 0.001, "loss": 0.4047, "step": 2085 }, { "epoch": 0.05755733142027406, "grad_norm": 0.005158510524779558, "learning_rate": 0.001, "loss": 0.3914, "step": 2086 }, { "epoch": 0.05758492362133843, "grad_norm": 0.0028425029013305902, "learning_rate": 0.001, "loss": 0.408, "step": 2087 }, { "epoch": 0.057612515822402795, "grad_norm": 0.0027261306531727314, "learning_rate": 0.001, "loss": 0.4183, "step": 2088 }, { "epoch": 0.05764010802346717, "grad_norm": 0.0026766748633235693, "learning_rate": 0.001, "loss": 0.3774, "step": 2089 }, { "epoch": 0.057667700224531535, "grad_norm": 0.0038580589462071657, "learning_rate": 0.001, "loss": 0.4266, "step": 2090 }, { "epoch": 0.05769529242559591, "grad_norm": 0.0030884486623108387, "learning_rate": 0.001, "loss": 0.3999, "step": 2091 }, { "epoch": 0.057722884626660274, "grad_norm": 0.002511868719011545, "learning_rate": 0.001, "loss": 0.4202, "step": 2092 }, { "epoch": 0.05775047682772464, "grad_norm": 0.003216751618310809, "learning_rate": 0.001, "loss": 0.4032, "step": 2093 }, { "epoch": 0.057778069028789014, "grad_norm": 0.0028110183775424957, "learning_rate": 0.001, "loss": 0.412, "step": 2094 }, { "epoch": 0.05780566122985338, "grad_norm": 0.00429938780143857, "learning_rate": 0.001, "loss": 0.371, "step": 2095 }, { "epoch": 0.057833253430917754, "grad_norm": 0.005798738915473223, "learning_rate": 0.001, "loss": 0.4238, "step": 2096 }, { "epoch": 0.05786084563198212, "grad_norm": 0.0031060322653502226, "learning_rate": 0.001, "loss": 0.4029, "step": 2097 }, { "epoch": 0.05788843783304649, "grad_norm": 0.0039985994808375835, "learning_rate": 0.001, "loss": 0.3612, "step": 2098 }, { "epoch": 0.05791603003411086, "grad_norm": 0.0036842762492597103, "learning_rate": 0.001, "loss": 0.3811, "step": 2099 }, { "epoch": 0.05794362223517523, "grad_norm": 0.002856861101463437, "learning_rate": 0.001, "loss": 0.3664, "step": 2100 }, { "epoch": 0.0579712144362396, "grad_norm": 0.00465161819010973, "learning_rate": 0.001, "loss": 0.404, "step": 2101 }, { "epoch": 0.057998806637303967, "grad_norm": 0.0029720210004597902, "learning_rate": 0.001, "loss": 0.4339, "step": 2102 }, { "epoch": 0.05802639883836833, "grad_norm": 0.0034581513609737158, "learning_rate": 0.001, "loss": 0.3858, "step": 2103 }, { "epoch": 0.058053991039432706, "grad_norm": 0.0030470779165625572, "learning_rate": 0.001, "loss": 0.3836, "step": 2104 }, { "epoch": 0.05808158324049707, "grad_norm": 0.005939992144703865, "learning_rate": 0.001, "loss": 0.3763, "step": 2105 }, { "epoch": 0.05810917544156144, "grad_norm": 0.005322432145476341, "learning_rate": 0.001, "loss": 0.4142, "step": 2106 }, { "epoch": 0.05813676764262581, "grad_norm": 0.0031803702004253864, "learning_rate": 0.001, "loss": 0.3886, "step": 2107 }, { "epoch": 0.05816435984369018, "grad_norm": 0.004405119922012091, "learning_rate": 0.001, "loss": 0.4181, "step": 2108 }, { "epoch": 0.05819195204475455, "grad_norm": 0.005180948879569769, "learning_rate": 0.001, "loss": 0.4202, "step": 2109 }, { "epoch": 0.05821954424581892, "grad_norm": 0.003976668696850538, "learning_rate": 0.001, "loss": 0.3849, "step": 2110 }, { "epoch": 0.058247136446883285, "grad_norm": 0.006682976149022579, "learning_rate": 0.001, "loss": 0.3829, "step": 2111 }, { "epoch": 0.05827472864794766, "grad_norm": 0.0036576632410287857, "learning_rate": 0.001, "loss": 0.3938, "step": 2112 }, { "epoch": 0.058302320849012025, "grad_norm": 0.0037393097300082445, "learning_rate": 0.001, "loss": 0.4176, "step": 2113 }, { "epoch": 0.0583299130500764, "grad_norm": 0.004572103265672922, "learning_rate": 0.001, "loss": 0.3864, "step": 2114 }, { "epoch": 0.058357505251140765, "grad_norm": 0.004839747212827206, "learning_rate": 0.001, "loss": 0.4277, "step": 2115 }, { "epoch": 0.05838509745220513, "grad_norm": 0.002434584079310298, "learning_rate": 0.001, "loss": 0.4297, "step": 2116 }, { "epoch": 0.058412689653269505, "grad_norm": 0.00401110528036952, "learning_rate": 0.001, "loss": 0.3835, "step": 2117 }, { "epoch": 0.05844028185433387, "grad_norm": 0.015435201115906239, "learning_rate": 0.001, "loss": 0.4008, "step": 2118 }, { "epoch": 0.058467874055398245, "grad_norm": 0.005054370500147343, "learning_rate": 0.001, "loss": 0.3822, "step": 2119 }, { "epoch": 0.05849546625646261, "grad_norm": 0.0033668207470327616, "learning_rate": 0.001, "loss": 0.3873, "step": 2120 }, { "epoch": 0.05852305845752698, "grad_norm": 0.004512968007475138, "learning_rate": 0.001, "loss": 0.3878, "step": 2121 }, { "epoch": 0.05855065065859135, "grad_norm": 0.003313259920105338, "learning_rate": 0.001, "loss": 0.436, "step": 2122 }, { "epoch": 0.05857824285965572, "grad_norm": 0.0035786698572337627, "learning_rate": 0.001, "loss": 0.3897, "step": 2123 }, { "epoch": 0.05860583506072009, "grad_norm": 0.002702909056097269, "learning_rate": 0.001, "loss": 0.414, "step": 2124 }, { "epoch": 0.05863342726178446, "grad_norm": 0.004725235048681498, "learning_rate": 0.001, "loss": 0.3909, "step": 2125 }, { "epoch": 0.058661019462848824, "grad_norm": 0.0036240858025848866, "learning_rate": 0.001, "loss": 0.3936, "step": 2126 }, { "epoch": 0.0586886116639132, "grad_norm": 0.004966442938894033, "learning_rate": 0.001, "loss": 0.3921, "step": 2127 }, { "epoch": 0.05871620386497756, "grad_norm": 0.0031415580306202173, "learning_rate": 0.001, "loss": 0.3542, "step": 2128 }, { "epoch": 0.05874379606604194, "grad_norm": 0.003191297873854637, "learning_rate": 0.001, "loss": 0.4267, "step": 2129 }, { "epoch": 0.0587713882671063, "grad_norm": 0.0039037340320646763, "learning_rate": 0.001, "loss": 0.4063, "step": 2130 }, { "epoch": 0.05879898046817067, "grad_norm": 0.00381074589677155, "learning_rate": 0.001, "loss": 0.3995, "step": 2131 }, { "epoch": 0.05882657266923504, "grad_norm": 0.004218887537717819, "learning_rate": 0.001, "loss": 0.3944, "step": 2132 }, { "epoch": 0.05885416487029941, "grad_norm": 0.004148995969444513, "learning_rate": 0.001, "loss": 0.456, "step": 2133 }, { "epoch": 0.058881757071363776, "grad_norm": 0.0040593999437987804, "learning_rate": 0.001, "loss": 0.3874, "step": 2134 }, { "epoch": 0.05890934927242815, "grad_norm": 0.004279328975826502, "learning_rate": 0.001, "loss": 0.4111, "step": 2135 }, { "epoch": 0.058936941473492516, "grad_norm": 0.00401785783469677, "learning_rate": 0.001, "loss": 0.4497, "step": 2136 }, { "epoch": 0.05896453367455689, "grad_norm": 0.004279072396457195, "learning_rate": 0.001, "loss": 0.4052, "step": 2137 }, { "epoch": 0.058992125875621255, "grad_norm": 0.004601733293384314, "learning_rate": 0.001, "loss": 0.3811, "step": 2138 }, { "epoch": 0.05901971807668562, "grad_norm": 0.010014613159000874, "learning_rate": 0.001, "loss": 0.4262, "step": 2139 }, { "epoch": 0.059047310277749995, "grad_norm": 0.004289823584258556, "learning_rate": 0.001, "loss": 0.3813, "step": 2140 }, { "epoch": 0.05907490247881436, "grad_norm": 0.004107081796973944, "learning_rate": 0.001, "loss": 0.406, "step": 2141 }, { "epoch": 0.059102494679878735, "grad_norm": 0.004256140440702438, "learning_rate": 0.001, "loss": 0.3738, "step": 2142 }, { "epoch": 0.0591300868809431, "grad_norm": 0.00437437929213047, "learning_rate": 0.001, "loss": 0.3625, "step": 2143 }, { "epoch": 0.05915767908200747, "grad_norm": 0.004300633445382118, "learning_rate": 0.001, "loss": 0.4134, "step": 2144 }, { "epoch": 0.05918527128307184, "grad_norm": 0.0044938200153410435, "learning_rate": 0.001, "loss": 0.401, "step": 2145 }, { "epoch": 0.05921286348413621, "grad_norm": 0.004241921007633209, "learning_rate": 0.001, "loss": 0.3975, "step": 2146 }, { "epoch": 0.05924045568520058, "grad_norm": 0.005186907015740871, "learning_rate": 0.001, "loss": 0.3937, "step": 2147 }, { "epoch": 0.05926804788626495, "grad_norm": 0.0033411476761102676, "learning_rate": 0.001, "loss": 0.446, "step": 2148 }, { "epoch": 0.059295640087329314, "grad_norm": 0.003417744068428874, "learning_rate": 0.001, "loss": 0.4201, "step": 2149 }, { "epoch": 0.05932323228839369, "grad_norm": 0.0042548892088234425, "learning_rate": 0.001, "loss": 0.3831, "step": 2150 }, { "epoch": 0.059350824489458054, "grad_norm": 0.0036170384846627712, "learning_rate": 0.001, "loss": 0.377, "step": 2151 }, { "epoch": 0.05937841669052243, "grad_norm": 0.0034266202710568905, "learning_rate": 0.001, "loss": 0.4278, "step": 2152 }, { "epoch": 0.059406008891586794, "grad_norm": 0.0034320498816668987, "learning_rate": 0.001, "loss": 0.4498, "step": 2153 }, { "epoch": 0.05943360109265116, "grad_norm": 0.0033084768801927567, "learning_rate": 0.001, "loss": 0.3733, "step": 2154 }, { "epoch": 0.059461193293715534, "grad_norm": 0.003638029098510742, "learning_rate": 0.001, "loss": 0.4059, "step": 2155 }, { "epoch": 0.0594887854947799, "grad_norm": 0.0036058463156223297, "learning_rate": 0.001, "loss": 0.4087, "step": 2156 }, { "epoch": 0.059516377695844266, "grad_norm": 0.003367355791851878, "learning_rate": 0.001, "loss": 0.3577, "step": 2157 }, { "epoch": 0.05954396989690864, "grad_norm": 0.003909154795110226, "learning_rate": 0.001, "loss": 0.404, "step": 2158 }, { "epoch": 0.059571562097973006, "grad_norm": 0.0027421792037785053, "learning_rate": 0.001, "loss": 0.4082, "step": 2159 }, { "epoch": 0.05959915429903738, "grad_norm": 0.003349900944158435, "learning_rate": 0.001, "loss": 0.38, "step": 2160 }, { "epoch": 0.059626746500101746, "grad_norm": 0.003867817111313343, "learning_rate": 0.001, "loss": 0.4502, "step": 2161 }, { "epoch": 0.05965433870116611, "grad_norm": 0.002976678777486086, "learning_rate": 0.001, "loss": 0.4679, "step": 2162 }, { "epoch": 0.059681930902230486, "grad_norm": 0.004093970637768507, "learning_rate": 0.001, "loss": 0.4623, "step": 2163 }, { "epoch": 0.05970952310329485, "grad_norm": 0.008607220835983753, "learning_rate": 0.001, "loss": 0.3939, "step": 2164 }, { "epoch": 0.059737115304359226, "grad_norm": 0.003642444731667638, "learning_rate": 0.001, "loss": 0.3617, "step": 2165 }, { "epoch": 0.05976470750542359, "grad_norm": 0.004864281043410301, "learning_rate": 0.001, "loss": 0.3954, "step": 2166 }, { "epoch": 0.05979229970648796, "grad_norm": 0.003302594181150198, "learning_rate": 0.001, "loss": 0.3968, "step": 2167 }, { "epoch": 0.05981989190755233, "grad_norm": 0.004229418467730284, "learning_rate": 0.001, "loss": 0.3913, "step": 2168 }, { "epoch": 0.0598474841086167, "grad_norm": 0.026235150173306465, "learning_rate": 0.001, "loss": 0.3886, "step": 2169 }, { "epoch": 0.05987507630968107, "grad_norm": 0.0035215525422245264, "learning_rate": 0.001, "loss": 0.4007, "step": 2170 }, { "epoch": 0.05990266851074544, "grad_norm": 0.0038247762713581324, "learning_rate": 0.001, "loss": 0.4207, "step": 2171 }, { "epoch": 0.059930260711809805, "grad_norm": 0.0033723246306180954, "learning_rate": 0.001, "loss": 0.4046, "step": 2172 }, { "epoch": 0.05995785291287418, "grad_norm": 0.003272601403295994, "learning_rate": 0.001, "loss": 0.4218, "step": 2173 }, { "epoch": 0.059985445113938544, "grad_norm": 0.00700679887086153, "learning_rate": 0.001, "loss": 0.3977, "step": 2174 }, { "epoch": 0.06001303731500292, "grad_norm": 0.004312647040933371, "learning_rate": 0.001, "loss": 0.4259, "step": 2175 }, { "epoch": 0.060040629516067284, "grad_norm": 0.0028906308580189943, "learning_rate": 0.001, "loss": 0.4082, "step": 2176 }, { "epoch": 0.06006822171713165, "grad_norm": 0.0023235634434968233, "learning_rate": 0.001, "loss": 0.406, "step": 2177 }, { "epoch": 0.060095813918196024, "grad_norm": 0.003625315148383379, "learning_rate": 0.001, "loss": 0.4082, "step": 2178 }, { "epoch": 0.06012340611926039, "grad_norm": 0.0026299748569726944, "learning_rate": 0.001, "loss": 0.4043, "step": 2179 }, { "epoch": 0.06015099832032476, "grad_norm": 0.003912107087671757, "learning_rate": 0.001, "loss": 0.3928, "step": 2180 }, { "epoch": 0.06017859052138913, "grad_norm": 0.0030596598517149687, "learning_rate": 0.001, "loss": 0.396, "step": 2181 }, { "epoch": 0.0602061827224535, "grad_norm": 0.0034961546771228313, "learning_rate": 0.001, "loss": 0.3976, "step": 2182 }, { "epoch": 0.06023377492351787, "grad_norm": 0.0026868092827498913, "learning_rate": 0.001, "loss": 0.341, "step": 2183 }, { "epoch": 0.06026136712458224, "grad_norm": 0.0036376873031258583, "learning_rate": 0.001, "loss": 0.3954, "step": 2184 }, { "epoch": 0.0602889593256466, "grad_norm": 0.0034194989129900932, "learning_rate": 0.001, "loss": 0.3782, "step": 2185 }, { "epoch": 0.060316551526710976, "grad_norm": 0.00262664002366364, "learning_rate": 0.001, "loss": 0.4054, "step": 2186 }, { "epoch": 0.06034414372777534, "grad_norm": 0.002487578894942999, "learning_rate": 0.001, "loss": 0.4719, "step": 2187 }, { "epoch": 0.060371735928839716, "grad_norm": 0.002136924536898732, "learning_rate": 0.001, "loss": 0.4038, "step": 2188 }, { "epoch": 0.06039932812990408, "grad_norm": 0.0025951487477868795, "learning_rate": 0.001, "loss": 0.4247, "step": 2189 }, { "epoch": 0.06042692033096845, "grad_norm": 0.007234814576804638, "learning_rate": 0.001, "loss": 0.3922, "step": 2190 }, { "epoch": 0.06045451253203282, "grad_norm": 0.04125404730439186, "learning_rate": 0.001, "loss": 0.4087, "step": 2191 }, { "epoch": 0.06048210473309719, "grad_norm": 0.0031280010007321835, "learning_rate": 0.001, "loss": 0.4324, "step": 2192 }, { "epoch": 0.06050969693416156, "grad_norm": 0.003333252388983965, "learning_rate": 0.001, "loss": 0.3976, "step": 2193 }, { "epoch": 0.06053728913522593, "grad_norm": 0.002960551530122757, "learning_rate": 0.001, "loss": 0.4211, "step": 2194 }, { "epoch": 0.060564881336290295, "grad_norm": 0.003678489476442337, "learning_rate": 0.001, "loss": 0.4412, "step": 2195 }, { "epoch": 0.06059247353735467, "grad_norm": 0.008868735283613205, "learning_rate": 0.001, "loss": 0.3728, "step": 2196 }, { "epoch": 0.060620065738419035, "grad_norm": 0.009513468481600285, "learning_rate": 0.001, "loss": 0.3676, "step": 2197 }, { "epoch": 0.06064765793948341, "grad_norm": 0.0050335777923464775, "learning_rate": 0.001, "loss": 0.3789, "step": 2198 }, { "epoch": 0.060675250140547775, "grad_norm": 0.006050426512956619, "learning_rate": 0.001, "loss": 0.4329, "step": 2199 }, { "epoch": 0.06070284234161214, "grad_norm": 0.002776517765596509, "learning_rate": 0.001, "loss": 0.4271, "step": 2200 }, { "epoch": 0.060730434542676515, "grad_norm": 0.003950697835534811, "learning_rate": 0.001, "loss": 0.4057, "step": 2201 }, { "epoch": 0.06075802674374088, "grad_norm": 0.003235916141420603, "learning_rate": 0.001, "loss": 0.3991, "step": 2202 }, { "epoch": 0.06078561894480525, "grad_norm": 0.0029406328685581684, "learning_rate": 0.001, "loss": 0.3845, "step": 2203 }, { "epoch": 0.06081321114586962, "grad_norm": 0.0045188660733401775, "learning_rate": 0.001, "loss": 0.4269, "step": 2204 }, { "epoch": 0.06084080334693399, "grad_norm": 0.0031814551912248135, "learning_rate": 0.001, "loss": 0.391, "step": 2205 }, { "epoch": 0.06086839554799836, "grad_norm": 0.0054643419571220875, "learning_rate": 0.001, "loss": 0.426, "step": 2206 }, { "epoch": 0.06089598774906273, "grad_norm": 0.0061082998290658, "learning_rate": 0.001, "loss": 0.4021, "step": 2207 }, { "epoch": 0.060923579950127094, "grad_norm": 0.002864258596673608, "learning_rate": 0.001, "loss": 0.3931, "step": 2208 }, { "epoch": 0.06095117215119147, "grad_norm": 0.014014131389558315, "learning_rate": 0.001, "loss": 0.3781, "step": 2209 }, { "epoch": 0.06097876435225583, "grad_norm": 0.0033069124910980463, "learning_rate": 0.001, "loss": 0.4007, "step": 2210 }, { "epoch": 0.06100635655332021, "grad_norm": 0.0032278632279485464, "learning_rate": 0.001, "loss": 0.4836, "step": 2211 }, { "epoch": 0.06103394875438457, "grad_norm": 0.0036987089551985264, "learning_rate": 0.001, "loss": 0.3673, "step": 2212 }, { "epoch": 0.06106154095544894, "grad_norm": 0.006201690062880516, "learning_rate": 0.001, "loss": 0.3988, "step": 2213 }, { "epoch": 0.06108913315651331, "grad_norm": 0.0028827004134655, "learning_rate": 0.001, "loss": 0.4225, "step": 2214 }, { "epoch": 0.06111672535757768, "grad_norm": 0.0034600994549691677, "learning_rate": 0.001, "loss": 0.4068, "step": 2215 }, { "epoch": 0.06114431755864205, "grad_norm": 0.09618855267763138, "learning_rate": 0.001, "loss": 0.4004, "step": 2216 }, { "epoch": 0.06117190975970642, "grad_norm": 0.003716063452884555, "learning_rate": 0.001, "loss": 0.4075, "step": 2217 }, { "epoch": 0.061199501960770786, "grad_norm": 0.002988336840644479, "learning_rate": 0.001, "loss": 0.4147, "step": 2218 }, { "epoch": 0.06122709416183516, "grad_norm": 0.0036907619796693325, "learning_rate": 0.001, "loss": 0.4122, "step": 2219 }, { "epoch": 0.061254686362899526, "grad_norm": 0.002765973797068, "learning_rate": 0.001, "loss": 0.4421, "step": 2220 }, { "epoch": 0.0612822785639639, "grad_norm": 0.00515265017747879, "learning_rate": 0.001, "loss": 0.4076, "step": 2221 }, { "epoch": 0.061309870765028265, "grad_norm": 0.0030234006699174643, "learning_rate": 0.001, "loss": 0.4428, "step": 2222 }, { "epoch": 0.06133746296609263, "grad_norm": 0.007390964776277542, "learning_rate": 0.001, "loss": 0.4148, "step": 2223 }, { "epoch": 0.061365055167157005, "grad_norm": 0.0025635359343141317, "learning_rate": 0.001, "loss": 0.4205, "step": 2224 }, { "epoch": 0.06139264736822137, "grad_norm": 0.0057982392609119415, "learning_rate": 0.001, "loss": 0.4358, "step": 2225 }, { "epoch": 0.06142023956928574, "grad_norm": 0.003062683856114745, "learning_rate": 0.001, "loss": 0.3966, "step": 2226 }, { "epoch": 0.06144783177035011, "grad_norm": 0.0026797873433679342, "learning_rate": 0.001, "loss": 0.3841, "step": 2227 }, { "epoch": 0.06147542397141448, "grad_norm": 0.0024085459299385548, "learning_rate": 0.001, "loss": 0.4376, "step": 2228 }, { "epoch": 0.06150301617247885, "grad_norm": 0.004409399814903736, "learning_rate": 0.001, "loss": 0.4038, "step": 2229 }, { "epoch": 0.06153060837354322, "grad_norm": 0.003322464181110263, "learning_rate": 0.001, "loss": 0.3991, "step": 2230 }, { "epoch": 0.061558200574607584, "grad_norm": 0.00409911060705781, "learning_rate": 0.001, "loss": 0.4184, "step": 2231 }, { "epoch": 0.06158579277567196, "grad_norm": 0.0029169321060180664, "learning_rate": 0.001, "loss": 0.3508, "step": 2232 }, { "epoch": 0.061613384976736324, "grad_norm": 0.00469607999548316, "learning_rate": 0.001, "loss": 0.3692, "step": 2233 }, { "epoch": 0.0616409771778007, "grad_norm": 0.0029155181255191565, "learning_rate": 0.001, "loss": 0.3887, "step": 2234 }, { "epoch": 0.061668569378865064, "grad_norm": 0.003221947466954589, "learning_rate": 0.001, "loss": 0.4239, "step": 2235 }, { "epoch": 0.06169616157992943, "grad_norm": 0.0033768792636692524, "learning_rate": 0.001, "loss": 0.3954, "step": 2236 }, { "epoch": 0.061723753780993804, "grad_norm": 0.002673777285963297, "learning_rate": 0.001, "loss": 0.3536, "step": 2237 }, { "epoch": 0.06175134598205817, "grad_norm": 0.002202989300712943, "learning_rate": 0.001, "loss": 0.4157, "step": 2238 }, { "epoch": 0.06177893818312254, "grad_norm": 0.00238011684268713, "learning_rate": 0.001, "loss": 0.3929, "step": 2239 }, { "epoch": 0.06180653038418691, "grad_norm": 0.0027684099040925503, "learning_rate": 0.001, "loss": 0.4129, "step": 2240 }, { "epoch": 0.061834122585251276, "grad_norm": 0.0032890914008021355, "learning_rate": 0.001, "loss": 0.4205, "step": 2241 }, { "epoch": 0.06186171478631565, "grad_norm": 0.003589882282540202, "learning_rate": 0.001, "loss": 0.4234, "step": 2242 }, { "epoch": 0.061889306987380016, "grad_norm": 0.002842566231265664, "learning_rate": 0.001, "loss": 0.4051, "step": 2243 }, { "epoch": 0.06191689918844439, "grad_norm": 0.0028953177388757467, "learning_rate": 0.001, "loss": 0.4144, "step": 2244 }, { "epoch": 0.061944491389508756, "grad_norm": 0.003262540325522423, "learning_rate": 0.001, "loss": 0.4051, "step": 2245 }, { "epoch": 0.06197208359057312, "grad_norm": 0.004046915099024773, "learning_rate": 0.001, "loss": 0.3918, "step": 2246 }, { "epoch": 0.061999675791637496, "grad_norm": 0.0038636084645986557, "learning_rate": 0.001, "loss": 0.4276, "step": 2247 }, { "epoch": 0.06202726799270186, "grad_norm": 0.002859740052372217, "learning_rate": 0.001, "loss": 0.3932, "step": 2248 }, { "epoch": 0.06205486019376623, "grad_norm": 0.002851482480764389, "learning_rate": 0.001, "loss": 0.4235, "step": 2249 }, { "epoch": 0.0620824523948306, "grad_norm": 0.0031568272970616817, "learning_rate": 0.001, "loss": 0.4335, "step": 2250 }, { "epoch": 0.06211004459589497, "grad_norm": 0.003563236678019166, "learning_rate": 0.001, "loss": 0.3736, "step": 2251 }, { "epoch": 0.06213763679695934, "grad_norm": 0.0029816743917763233, "learning_rate": 0.001, "loss": 0.4153, "step": 2252 }, { "epoch": 0.06216522899802371, "grad_norm": 0.002947515808045864, "learning_rate": 0.001, "loss": 0.4301, "step": 2253 }, { "epoch": 0.062192821199088075, "grad_norm": 0.003983738832175732, "learning_rate": 0.001, "loss": 0.3958, "step": 2254 }, { "epoch": 0.06222041340015245, "grad_norm": 0.0053352476097643375, "learning_rate": 0.001, "loss": 0.4146, "step": 2255 }, { "epoch": 0.062248005601216815, "grad_norm": 0.004124592524021864, "learning_rate": 0.001, "loss": 0.3691, "step": 2256 }, { "epoch": 0.06227559780228119, "grad_norm": 0.004866636358201504, "learning_rate": 0.001, "loss": 0.4341, "step": 2257 }, { "epoch": 0.062303190003345554, "grad_norm": 0.005429052747786045, "learning_rate": 0.001, "loss": 0.4275, "step": 2258 }, { "epoch": 0.06233078220440992, "grad_norm": 0.00399363599717617, "learning_rate": 0.001, "loss": 0.4352, "step": 2259 }, { "epoch": 0.062358374405474294, "grad_norm": 0.0047408780083060265, "learning_rate": 0.001, "loss": 0.3742, "step": 2260 }, { "epoch": 0.06238596660653866, "grad_norm": 0.007790642790496349, "learning_rate": 0.001, "loss": 0.362, "step": 2261 }, { "epoch": 0.062413558807603034, "grad_norm": 0.003895730245858431, "learning_rate": 0.001, "loss": 0.3724, "step": 2262 }, { "epoch": 0.0624411510086674, "grad_norm": 0.004620977211743593, "learning_rate": 0.001, "loss": 0.4067, "step": 2263 }, { "epoch": 0.06246874320973177, "grad_norm": 0.002770553808659315, "learning_rate": 0.001, "loss": 0.3951, "step": 2264 }, { "epoch": 0.06249633541079614, "grad_norm": 0.0061469171196222305, "learning_rate": 0.001, "loss": 0.4015, "step": 2265 }, { "epoch": 0.06252392761186051, "grad_norm": 0.00576475216075778, "learning_rate": 0.001, "loss": 0.391, "step": 2266 }, { "epoch": 0.06255151981292488, "grad_norm": 0.004090351052582264, "learning_rate": 0.001, "loss": 0.3965, "step": 2267 }, { "epoch": 0.06257911201398925, "grad_norm": 0.024996625259518623, "learning_rate": 0.001, "loss": 0.3813, "step": 2268 }, { "epoch": 0.06260670421505361, "grad_norm": 0.0244828462600708, "learning_rate": 0.001, "loss": 0.3979, "step": 2269 }, { "epoch": 0.06263429641611798, "grad_norm": 0.0046532778069376945, "learning_rate": 0.001, "loss": 0.432, "step": 2270 }, { "epoch": 0.06266188861718236, "grad_norm": 0.004564880859106779, "learning_rate": 0.001, "loss": 0.3931, "step": 2271 }, { "epoch": 0.06268948081824673, "grad_norm": 0.0029092628974467516, "learning_rate": 0.001, "loss": 0.4537, "step": 2272 }, { "epoch": 0.06271707301931109, "grad_norm": 0.0033110990189015865, "learning_rate": 0.001, "loss": 0.4023, "step": 2273 }, { "epoch": 0.06274466522037546, "grad_norm": 0.0045304871164262295, "learning_rate": 0.001, "loss": 0.3824, "step": 2274 }, { "epoch": 0.06277225742143983, "grad_norm": 0.0031519641634076834, "learning_rate": 0.001, "loss": 0.392, "step": 2275 }, { "epoch": 0.0627998496225042, "grad_norm": 0.003920139744877815, "learning_rate": 0.001, "loss": 0.3883, "step": 2276 }, { "epoch": 0.06282744182356857, "grad_norm": 0.004257616586983204, "learning_rate": 0.001, "loss": 0.4043, "step": 2277 }, { "epoch": 0.06285503402463294, "grad_norm": 0.0038890133146196604, "learning_rate": 0.001, "loss": 0.4207, "step": 2278 }, { "epoch": 0.0628826262256973, "grad_norm": 0.0028080164920538664, "learning_rate": 0.001, "loss": 0.4085, "step": 2279 }, { "epoch": 0.06291021842676167, "grad_norm": 0.008050143718719482, "learning_rate": 0.001, "loss": 0.415, "step": 2280 }, { "epoch": 0.06293781062782605, "grad_norm": 0.0025884988717734814, "learning_rate": 0.001, "loss": 0.3781, "step": 2281 }, { "epoch": 0.06296540282889042, "grad_norm": 0.0030551603995263577, "learning_rate": 0.001, "loss": 0.4164, "step": 2282 }, { "epoch": 0.06299299502995478, "grad_norm": 0.0027584030758589506, "learning_rate": 0.001, "loss": 0.3816, "step": 2283 }, { "epoch": 0.06302058723101915, "grad_norm": 0.0033046863973140717, "learning_rate": 0.001, "loss": 0.3583, "step": 2284 }, { "epoch": 0.06304817943208352, "grad_norm": 0.0036664491053670645, "learning_rate": 0.001, "loss": 0.405, "step": 2285 }, { "epoch": 0.06307577163314788, "grad_norm": 0.002570797922089696, "learning_rate": 0.001, "loss": 0.4117, "step": 2286 }, { "epoch": 0.06310336383421226, "grad_norm": 0.003837966127321124, "learning_rate": 0.001, "loss": 0.3997, "step": 2287 }, { "epoch": 0.06313095603527663, "grad_norm": 0.00263983360491693, "learning_rate": 0.001, "loss": 0.3935, "step": 2288 }, { "epoch": 0.063158548236341, "grad_norm": 0.0033676191233098507, "learning_rate": 0.001, "loss": 0.4224, "step": 2289 }, { "epoch": 0.06318614043740536, "grad_norm": 0.0033159011509269476, "learning_rate": 0.001, "loss": 0.3714, "step": 2290 }, { "epoch": 0.06321373263846973, "grad_norm": 0.002879378153011203, "learning_rate": 0.001, "loss": 0.4229, "step": 2291 }, { "epoch": 0.06324132483953411, "grad_norm": 0.004017788916826248, "learning_rate": 0.001, "loss": 0.3722, "step": 2292 }, { "epoch": 0.06326891704059848, "grad_norm": 0.0034288205206394196, "learning_rate": 0.001, "loss": 0.3973, "step": 2293 }, { "epoch": 0.06329650924166284, "grad_norm": 0.003537122393026948, "learning_rate": 0.001, "loss": 0.3685, "step": 2294 }, { "epoch": 0.06332410144272721, "grad_norm": 0.002853949787095189, "learning_rate": 0.001, "loss": 0.3989, "step": 2295 }, { "epoch": 0.06335169364379158, "grad_norm": 0.00246223621070385, "learning_rate": 0.001, "loss": 0.4675, "step": 2296 }, { "epoch": 0.06337928584485596, "grad_norm": 0.003921873867511749, "learning_rate": 0.001, "loss": 0.3894, "step": 2297 }, { "epoch": 0.06340687804592032, "grad_norm": 0.0037953928112983704, "learning_rate": 0.001, "loss": 0.4206, "step": 2298 }, { "epoch": 0.06343447024698469, "grad_norm": 0.003141796449199319, "learning_rate": 0.001, "loss": 0.3779, "step": 2299 }, { "epoch": 0.06346206244804906, "grad_norm": 0.0069033862091600895, "learning_rate": 0.001, "loss": 0.4432, "step": 2300 }, { "epoch": 0.06348965464911342, "grad_norm": 0.00247231125831604, "learning_rate": 0.001, "loss": 0.3987, "step": 2301 }, { "epoch": 0.0635172468501778, "grad_norm": 0.003614683635532856, "learning_rate": 0.001, "loss": 0.3708, "step": 2302 }, { "epoch": 0.06354483905124217, "grad_norm": 0.0028486682567745447, "learning_rate": 0.001, "loss": 0.4465, "step": 2303 }, { "epoch": 0.06357243125230654, "grad_norm": 0.0033698193728923798, "learning_rate": 0.001, "loss": 0.3782, "step": 2304 }, { "epoch": 0.0636000234533709, "grad_norm": 0.0028043955098837614, "learning_rate": 0.001, "loss": 0.4198, "step": 2305 }, { "epoch": 0.06362761565443527, "grad_norm": 0.0027136304415762424, "learning_rate": 0.001, "loss": 0.4328, "step": 2306 }, { "epoch": 0.06365520785549965, "grad_norm": 0.00574844004586339, "learning_rate": 0.001, "loss": 0.4002, "step": 2307 }, { "epoch": 0.06368280005656402, "grad_norm": 0.002718136878684163, "learning_rate": 0.001, "loss": 0.443, "step": 2308 }, { "epoch": 0.06371039225762838, "grad_norm": 0.0043097492307424545, "learning_rate": 0.001, "loss": 0.3894, "step": 2309 }, { "epoch": 0.06373798445869275, "grad_norm": 0.006927134469151497, "learning_rate": 0.001, "loss": 0.4365, "step": 2310 }, { "epoch": 0.06376557665975711, "grad_norm": 0.0031723494175821543, "learning_rate": 0.001, "loss": 0.4052, "step": 2311 }, { "epoch": 0.0637931688608215, "grad_norm": 0.0034040913451462984, "learning_rate": 0.001, "loss": 0.3949, "step": 2312 }, { "epoch": 0.06382076106188586, "grad_norm": 0.003764103166759014, "learning_rate": 0.001, "loss": 0.3979, "step": 2313 }, { "epoch": 0.06384835326295023, "grad_norm": 0.0026512970216572285, "learning_rate": 0.001, "loss": 0.4215, "step": 2314 }, { "epoch": 0.0638759454640146, "grad_norm": 0.003220104379579425, "learning_rate": 0.001, "loss": 0.3675, "step": 2315 }, { "epoch": 0.06390353766507896, "grad_norm": 0.0035847953986376524, "learning_rate": 0.001, "loss": 0.4028, "step": 2316 }, { "epoch": 0.06393112986614334, "grad_norm": 0.004416230600327253, "learning_rate": 0.001, "loss": 0.4225, "step": 2317 }, { "epoch": 0.06395872206720771, "grad_norm": 0.00270696054212749, "learning_rate": 0.001, "loss": 0.3667, "step": 2318 }, { "epoch": 0.06398631426827207, "grad_norm": 0.0026564293075352907, "learning_rate": 0.001, "loss": 0.3829, "step": 2319 }, { "epoch": 0.06401390646933644, "grad_norm": 0.0031192628666758537, "learning_rate": 0.001, "loss": 0.3961, "step": 2320 }, { "epoch": 0.0640414986704008, "grad_norm": 0.003101060399785638, "learning_rate": 0.001, "loss": 0.4423, "step": 2321 }, { "epoch": 0.06406909087146519, "grad_norm": 0.00393852312117815, "learning_rate": 0.001, "loss": 0.3976, "step": 2322 }, { "epoch": 0.06409668307252955, "grad_norm": 0.003799998899921775, "learning_rate": 0.001, "loss": 0.4048, "step": 2323 }, { "epoch": 0.06412427527359392, "grad_norm": 0.008037365972995758, "learning_rate": 0.001, "loss": 0.4383, "step": 2324 }, { "epoch": 0.06415186747465829, "grad_norm": 0.004333310294896364, "learning_rate": 0.001, "loss": 0.3666, "step": 2325 }, { "epoch": 0.06417945967572265, "grad_norm": 0.0026929888408631086, "learning_rate": 0.001, "loss": 0.3995, "step": 2326 }, { "epoch": 0.06420705187678703, "grad_norm": 0.0040913717821240425, "learning_rate": 0.001, "loss": 0.4001, "step": 2327 }, { "epoch": 0.0642346440778514, "grad_norm": 0.005933169275522232, "learning_rate": 0.001, "loss": 0.3958, "step": 2328 }, { "epoch": 0.06426223627891577, "grad_norm": 0.003139302134513855, "learning_rate": 0.001, "loss": 0.4112, "step": 2329 }, { "epoch": 0.06428982847998013, "grad_norm": 0.003957003820687532, "learning_rate": 0.001, "loss": 0.3589, "step": 2330 }, { "epoch": 0.0643174206810445, "grad_norm": 0.003573901252821088, "learning_rate": 0.001, "loss": 0.3708, "step": 2331 }, { "epoch": 0.06434501288210887, "grad_norm": 0.0033237498719245195, "learning_rate": 0.001, "loss": 0.4117, "step": 2332 }, { "epoch": 0.06437260508317325, "grad_norm": 0.0034847762435674667, "learning_rate": 0.001, "loss": 0.3974, "step": 2333 }, { "epoch": 0.06440019728423761, "grad_norm": 0.004198945127427578, "learning_rate": 0.001, "loss": 0.3993, "step": 2334 }, { "epoch": 0.06442778948530198, "grad_norm": 0.003995297942310572, "learning_rate": 0.001, "loss": 0.3924, "step": 2335 }, { "epoch": 0.06445538168636634, "grad_norm": 0.003225005231797695, "learning_rate": 0.001, "loss": 0.3985, "step": 2336 }, { "epoch": 0.06448297388743071, "grad_norm": 0.0031134854070842266, "learning_rate": 0.001, "loss": 0.4141, "step": 2337 }, { "epoch": 0.06451056608849509, "grad_norm": 0.004270988516509533, "learning_rate": 0.001, "loss": 0.3993, "step": 2338 }, { "epoch": 0.06453815828955946, "grad_norm": 0.004274028819054365, "learning_rate": 0.001, "loss": 0.4183, "step": 2339 }, { "epoch": 0.06456575049062382, "grad_norm": 0.003787413239479065, "learning_rate": 0.001, "loss": 0.3675, "step": 2340 }, { "epoch": 0.06459334269168819, "grad_norm": 0.003217950463294983, "learning_rate": 0.001, "loss": 0.4109, "step": 2341 }, { "epoch": 0.06462093489275256, "grad_norm": 0.0038233373779803514, "learning_rate": 0.001, "loss": 0.3671, "step": 2342 }, { "epoch": 0.06464852709381694, "grad_norm": 0.0038631544448435307, "learning_rate": 0.001, "loss": 0.3884, "step": 2343 }, { "epoch": 0.0646761192948813, "grad_norm": 0.002935303607955575, "learning_rate": 0.001, "loss": 0.4283, "step": 2344 }, { "epoch": 0.06470371149594567, "grad_norm": 0.004563242197036743, "learning_rate": 0.001, "loss": 0.3883, "step": 2345 }, { "epoch": 0.06473130369701004, "grad_norm": 0.004774071741849184, "learning_rate": 0.001, "loss": 0.409, "step": 2346 }, { "epoch": 0.0647588958980744, "grad_norm": 0.0027761361561715603, "learning_rate": 0.001, "loss": 0.435, "step": 2347 }, { "epoch": 0.06478648809913878, "grad_norm": 0.0030395982321351767, "learning_rate": 0.001, "loss": 0.4186, "step": 2348 }, { "epoch": 0.06481408030020315, "grad_norm": 0.034988418221473694, "learning_rate": 0.001, "loss": 0.4053, "step": 2349 }, { "epoch": 0.06484167250126752, "grad_norm": 0.003620629198849201, "learning_rate": 0.001, "loss": 0.3867, "step": 2350 }, { "epoch": 0.06486926470233188, "grad_norm": 0.003206313122063875, "learning_rate": 0.001, "loss": 0.4027, "step": 2351 }, { "epoch": 0.06489685690339625, "grad_norm": 0.0028597121126949787, "learning_rate": 0.001, "loss": 0.3927, "step": 2352 }, { "epoch": 0.06492444910446063, "grad_norm": 0.0034794441889971495, "learning_rate": 0.001, "loss": 0.3854, "step": 2353 }, { "epoch": 0.064952041305525, "grad_norm": 0.0031176162883639336, "learning_rate": 0.001, "loss": 0.3651, "step": 2354 }, { "epoch": 0.06497963350658936, "grad_norm": 0.0035436085890978575, "learning_rate": 0.001, "loss": 0.37, "step": 2355 }, { "epoch": 0.06500722570765373, "grad_norm": 0.003136987565085292, "learning_rate": 0.001, "loss": 0.4029, "step": 2356 }, { "epoch": 0.0650348179087181, "grad_norm": 0.003193167271092534, "learning_rate": 0.001, "loss": 0.4076, "step": 2357 }, { "epoch": 0.06506241010978248, "grad_norm": 0.0033892260398715734, "learning_rate": 0.001, "loss": 0.3589, "step": 2358 }, { "epoch": 0.06509000231084684, "grad_norm": 0.0065610939636826515, "learning_rate": 0.001, "loss": 0.3956, "step": 2359 }, { "epoch": 0.06511759451191121, "grad_norm": 0.005481204017996788, "learning_rate": 0.001, "loss": 0.3597, "step": 2360 }, { "epoch": 0.06514518671297558, "grad_norm": 0.0103605967015028, "learning_rate": 0.001, "loss": 0.3356, "step": 2361 }, { "epoch": 0.06517277891403994, "grad_norm": 0.0066803195513784885, "learning_rate": 0.001, "loss": 0.426, "step": 2362 }, { "epoch": 0.06520037111510432, "grad_norm": 0.004259153269231319, "learning_rate": 0.001, "loss": 0.4293, "step": 2363 }, { "epoch": 0.06522796331616869, "grad_norm": 0.012415111064910889, "learning_rate": 0.001, "loss": 0.3602, "step": 2364 }, { "epoch": 0.06525555551723305, "grad_norm": 0.0029489900916814804, "learning_rate": 0.001, "loss": 0.4193, "step": 2365 }, { "epoch": 0.06528314771829742, "grad_norm": 0.006090483628213406, "learning_rate": 0.001, "loss": 0.379, "step": 2366 }, { "epoch": 0.06531073991936179, "grad_norm": 0.003908079583197832, "learning_rate": 0.001, "loss": 0.3831, "step": 2367 }, { "epoch": 0.06533833212042617, "grad_norm": 0.0039390516467392445, "learning_rate": 0.001, "loss": 0.4163, "step": 2368 }, { "epoch": 0.06536592432149053, "grad_norm": 0.009213199838995934, "learning_rate": 0.001, "loss": 0.4277, "step": 2369 }, { "epoch": 0.0653935165225549, "grad_norm": 0.0031545236706733704, "learning_rate": 0.001, "loss": 0.4037, "step": 2370 }, { "epoch": 0.06542110872361927, "grad_norm": 0.003665713593363762, "learning_rate": 0.001, "loss": 0.379, "step": 2371 }, { "epoch": 0.06544870092468363, "grad_norm": 0.00421819556504488, "learning_rate": 0.001, "loss": 0.3466, "step": 2372 }, { "epoch": 0.06547629312574801, "grad_norm": 0.0041595143266022205, "learning_rate": 0.001, "loss": 0.3893, "step": 2373 }, { "epoch": 0.06550388532681238, "grad_norm": 0.00258744228631258, "learning_rate": 0.001, "loss": 0.3991, "step": 2374 }, { "epoch": 0.06553147752787675, "grad_norm": 0.0025976793840527534, "learning_rate": 0.001, "loss": 0.4073, "step": 2375 }, { "epoch": 0.06555906972894111, "grad_norm": 0.002732262946665287, "learning_rate": 0.001, "loss": 0.4031, "step": 2376 }, { "epoch": 0.06558666193000548, "grad_norm": 0.0024393522180616856, "learning_rate": 0.001, "loss": 0.3939, "step": 2377 }, { "epoch": 0.06561425413106985, "grad_norm": 0.006330225151032209, "learning_rate": 0.001, "loss": 0.3867, "step": 2378 }, { "epoch": 0.06564184633213423, "grad_norm": 0.0031101806089282036, "learning_rate": 0.001, "loss": 0.362, "step": 2379 }, { "epoch": 0.06566943853319859, "grad_norm": 0.0026855298783630133, "learning_rate": 0.001, "loss": 0.4196, "step": 2380 }, { "epoch": 0.06569703073426296, "grad_norm": 0.002515793778002262, "learning_rate": 0.001, "loss": 0.4513, "step": 2381 }, { "epoch": 0.06572462293532733, "grad_norm": 0.0025803048629313707, "learning_rate": 0.001, "loss": 0.3932, "step": 2382 }, { "epoch": 0.06575221513639169, "grad_norm": 0.0031242026016116142, "learning_rate": 0.001, "loss": 0.3867, "step": 2383 }, { "epoch": 0.06577980733745607, "grad_norm": 0.003442540764808655, "learning_rate": 0.001, "loss": 0.425, "step": 2384 }, { "epoch": 0.06580739953852044, "grad_norm": 0.0035992697812616825, "learning_rate": 0.001, "loss": 0.3475, "step": 2385 }, { "epoch": 0.0658349917395848, "grad_norm": 0.0026494376361370087, "learning_rate": 0.001, "loss": 0.4028, "step": 2386 }, { "epoch": 0.06586258394064917, "grad_norm": 0.0026794499717652798, "learning_rate": 0.001, "loss": 0.4355, "step": 2387 }, { "epoch": 0.06589017614171354, "grad_norm": 0.003907007165253162, "learning_rate": 0.001, "loss": 0.4043, "step": 2388 }, { "epoch": 0.06591776834277792, "grad_norm": 0.0038369898684322834, "learning_rate": 0.001, "loss": 0.3949, "step": 2389 }, { "epoch": 0.06594536054384229, "grad_norm": 0.003516831435263157, "learning_rate": 0.001, "loss": 0.4073, "step": 2390 }, { "epoch": 0.06597295274490665, "grad_norm": 0.0029046509880572557, "learning_rate": 0.001, "loss": 0.4192, "step": 2391 }, { "epoch": 0.06600054494597102, "grad_norm": 0.0029956242069602013, "learning_rate": 0.001, "loss": 0.3925, "step": 2392 }, { "epoch": 0.06602813714703538, "grad_norm": 0.0032762791961431503, "learning_rate": 0.001, "loss": 0.4313, "step": 2393 }, { "epoch": 0.06605572934809976, "grad_norm": 0.0029915180057287216, "learning_rate": 0.001, "loss": 0.4053, "step": 2394 }, { "epoch": 0.06608332154916413, "grad_norm": 0.004418436903506517, "learning_rate": 0.001, "loss": 0.3815, "step": 2395 }, { "epoch": 0.0661109137502285, "grad_norm": 0.003055717097595334, "learning_rate": 0.001, "loss": 0.3776, "step": 2396 }, { "epoch": 0.06613850595129286, "grad_norm": 0.0034056156873703003, "learning_rate": 0.001, "loss": 0.3617, "step": 2397 }, { "epoch": 0.06616609815235723, "grad_norm": 0.002760351402685046, "learning_rate": 0.001, "loss": 0.4072, "step": 2398 }, { "epoch": 0.06619369035342161, "grad_norm": 0.0054167998023331165, "learning_rate": 0.001, "loss": 0.4098, "step": 2399 }, { "epoch": 0.06622128255448598, "grad_norm": 0.002874811412766576, "learning_rate": 0.001, "loss": 0.3747, "step": 2400 }, { "epoch": 0.06624887475555034, "grad_norm": 0.0038131948094815016, "learning_rate": 0.001, "loss": 0.3841, "step": 2401 }, { "epoch": 0.06627646695661471, "grad_norm": 0.01067175529897213, "learning_rate": 0.001, "loss": 0.4112, "step": 2402 }, { "epoch": 0.06630405915767908, "grad_norm": 0.003224137471988797, "learning_rate": 0.001, "loss": 0.426, "step": 2403 }, { "epoch": 0.06633165135874346, "grad_norm": 0.004995389375835657, "learning_rate": 0.001, "loss": 0.4192, "step": 2404 }, { "epoch": 0.06635924355980782, "grad_norm": 0.0033968077041208744, "learning_rate": 0.001, "loss": 0.3858, "step": 2405 }, { "epoch": 0.06638683576087219, "grad_norm": 0.003337099449709058, "learning_rate": 0.001, "loss": 0.3995, "step": 2406 }, { "epoch": 0.06641442796193656, "grad_norm": 0.0033400380052626133, "learning_rate": 0.001, "loss": 0.4082, "step": 2407 }, { "epoch": 0.06644202016300092, "grad_norm": 0.0059796725399792194, "learning_rate": 0.001, "loss": 0.4014, "step": 2408 }, { "epoch": 0.0664696123640653, "grad_norm": 0.0036425914149731398, "learning_rate": 0.001, "loss": 0.3791, "step": 2409 }, { "epoch": 0.06649720456512967, "grad_norm": 0.003386643249541521, "learning_rate": 0.001, "loss": 0.4226, "step": 2410 }, { "epoch": 0.06652479676619404, "grad_norm": 0.0025921366177499294, "learning_rate": 0.001, "loss": 0.4242, "step": 2411 }, { "epoch": 0.0665523889672584, "grad_norm": 0.003707844065502286, "learning_rate": 0.001, "loss": 0.3935, "step": 2412 }, { "epoch": 0.06657998116832277, "grad_norm": 0.005570289213210344, "learning_rate": 0.001, "loss": 0.395, "step": 2413 }, { "epoch": 0.06660757336938715, "grad_norm": 0.004504153970628977, "learning_rate": 0.001, "loss": 0.4011, "step": 2414 }, { "epoch": 0.06663516557045152, "grad_norm": 0.0021061068400740623, "learning_rate": 0.001, "loss": 0.4058, "step": 2415 }, { "epoch": 0.06666275777151588, "grad_norm": 0.00822344422340393, "learning_rate": 0.001, "loss": 0.3713, "step": 2416 }, { "epoch": 0.06669034997258025, "grad_norm": 0.003418115433305502, "learning_rate": 0.001, "loss": 0.4284, "step": 2417 }, { "epoch": 0.06671794217364461, "grad_norm": 0.015280312858521938, "learning_rate": 0.001, "loss": 0.3837, "step": 2418 }, { "epoch": 0.066745534374709, "grad_norm": 0.006191336549818516, "learning_rate": 0.001, "loss": 0.3973, "step": 2419 }, { "epoch": 0.06677312657577336, "grad_norm": 0.0025210208259522915, "learning_rate": 0.001, "loss": 0.3973, "step": 2420 }, { "epoch": 0.06680071877683773, "grad_norm": 0.003629886545240879, "learning_rate": 0.001, "loss": 0.4107, "step": 2421 }, { "epoch": 0.0668283109779021, "grad_norm": 0.005250046495348215, "learning_rate": 0.001, "loss": 0.3935, "step": 2422 }, { "epoch": 0.06685590317896646, "grad_norm": 0.0036179288290441036, "learning_rate": 0.001, "loss": 0.3925, "step": 2423 }, { "epoch": 0.06688349538003083, "grad_norm": 0.005279705859720707, "learning_rate": 0.001, "loss": 0.3952, "step": 2424 }, { "epoch": 0.06691108758109521, "grad_norm": 0.0030266193207353354, "learning_rate": 0.001, "loss": 0.38, "step": 2425 }, { "epoch": 0.06693867978215957, "grad_norm": 0.007787918671965599, "learning_rate": 0.001, "loss": 0.3782, "step": 2426 }, { "epoch": 0.06696627198322394, "grad_norm": 0.00260373717173934, "learning_rate": 0.001, "loss": 0.3733, "step": 2427 }, { "epoch": 0.06699386418428831, "grad_norm": 0.002870837925001979, "learning_rate": 0.001, "loss": 0.4329, "step": 2428 }, { "epoch": 0.06702145638535267, "grad_norm": 0.0024147257208824158, "learning_rate": 0.001, "loss": 0.4342, "step": 2429 }, { "epoch": 0.06704904858641705, "grad_norm": 0.003153110621497035, "learning_rate": 0.001, "loss": 0.4107, "step": 2430 }, { "epoch": 0.06707664078748142, "grad_norm": 0.0036289046984165907, "learning_rate": 0.001, "loss": 0.3967, "step": 2431 }, { "epoch": 0.06710423298854579, "grad_norm": 0.003097312757745385, "learning_rate": 0.001, "loss": 0.4015, "step": 2432 }, { "epoch": 0.06713182518961015, "grad_norm": 0.003282829187810421, "learning_rate": 0.001, "loss": 0.3956, "step": 2433 }, { "epoch": 0.06715941739067452, "grad_norm": 0.006064482033252716, "learning_rate": 0.001, "loss": 0.4164, "step": 2434 }, { "epoch": 0.0671870095917389, "grad_norm": 0.007535384502261877, "learning_rate": 0.001, "loss": 0.4349, "step": 2435 }, { "epoch": 0.06721460179280327, "grad_norm": 0.003580626333132386, "learning_rate": 0.001, "loss": 0.3686, "step": 2436 }, { "epoch": 0.06724219399386763, "grad_norm": 0.004072824027389288, "learning_rate": 0.001, "loss": 0.4207, "step": 2437 }, { "epoch": 0.067269786194932, "grad_norm": 0.0034644294064491987, "learning_rate": 0.001, "loss": 0.4201, "step": 2438 }, { "epoch": 0.06729737839599637, "grad_norm": 0.0031727415043860674, "learning_rate": 0.001, "loss": 0.4182, "step": 2439 }, { "epoch": 0.06732497059706075, "grad_norm": 0.0025959559716284275, "learning_rate": 0.001, "loss": 0.4412, "step": 2440 }, { "epoch": 0.06735256279812511, "grad_norm": 0.007867682725191116, "learning_rate": 0.001, "loss": 0.3974, "step": 2441 }, { "epoch": 0.06738015499918948, "grad_norm": 0.003531220369040966, "learning_rate": 0.001, "loss": 0.3763, "step": 2442 }, { "epoch": 0.06740774720025385, "grad_norm": 0.0036349524743855, "learning_rate": 0.001, "loss": 0.4136, "step": 2443 }, { "epoch": 0.06743533940131821, "grad_norm": 0.003158099949359894, "learning_rate": 0.001, "loss": 0.429, "step": 2444 }, { "epoch": 0.06746293160238259, "grad_norm": 0.002893587574362755, "learning_rate": 0.001, "loss": 0.4316, "step": 2445 }, { "epoch": 0.06749052380344696, "grad_norm": 0.002769331680610776, "learning_rate": 0.001, "loss": 0.3848, "step": 2446 }, { "epoch": 0.06751811600451132, "grad_norm": 0.0028869437519460917, "learning_rate": 0.001, "loss": 0.3884, "step": 2447 }, { "epoch": 0.06754570820557569, "grad_norm": 0.004190088715404272, "learning_rate": 0.001, "loss": 0.3899, "step": 2448 }, { "epoch": 0.06757330040664006, "grad_norm": 0.00414698664098978, "learning_rate": 0.001, "loss": 0.3769, "step": 2449 }, { "epoch": 0.06760089260770444, "grad_norm": 0.0034248684532940388, "learning_rate": 0.001, "loss": 0.4067, "step": 2450 }, { "epoch": 0.0676284848087688, "grad_norm": 0.0032379438634961843, "learning_rate": 0.001, "loss": 0.3517, "step": 2451 }, { "epoch": 0.06765607700983317, "grad_norm": 0.0037679013330489397, "learning_rate": 0.001, "loss": 0.3754, "step": 2452 }, { "epoch": 0.06768366921089754, "grad_norm": 0.0025305391754955053, "learning_rate": 0.001, "loss": 0.3749, "step": 2453 }, { "epoch": 0.0677112614119619, "grad_norm": 0.010560314171016216, "learning_rate": 0.001, "loss": 0.3775, "step": 2454 }, { "epoch": 0.06773885361302628, "grad_norm": 0.0022722717840224504, "learning_rate": 0.001, "loss": 0.4212, "step": 2455 }, { "epoch": 0.06776644581409065, "grad_norm": 0.0035940262023359537, "learning_rate": 0.001, "loss": 0.3902, "step": 2456 }, { "epoch": 0.06779403801515502, "grad_norm": 0.0032793956343084574, "learning_rate": 0.001, "loss": 0.381, "step": 2457 }, { "epoch": 0.06782163021621938, "grad_norm": 0.0032437799964100122, "learning_rate": 0.001, "loss": 0.3787, "step": 2458 }, { "epoch": 0.06784922241728375, "grad_norm": 0.004813835956156254, "learning_rate": 0.001, "loss": 0.3529, "step": 2459 }, { "epoch": 0.06787681461834813, "grad_norm": 0.0028039722237735987, "learning_rate": 0.001, "loss": 0.384, "step": 2460 }, { "epoch": 0.0679044068194125, "grad_norm": 0.0028152938466519117, "learning_rate": 0.001, "loss": 0.396, "step": 2461 }, { "epoch": 0.06793199902047686, "grad_norm": 0.004003360401839018, "learning_rate": 0.001, "loss": 0.3996, "step": 2462 }, { "epoch": 0.06795959122154123, "grad_norm": 0.003837777767330408, "learning_rate": 0.001, "loss": 0.3715, "step": 2463 }, { "epoch": 0.0679871834226056, "grad_norm": 0.0043929507955908775, "learning_rate": 0.001, "loss": 0.3933, "step": 2464 }, { "epoch": 0.06801477562366998, "grad_norm": 0.005224680993705988, "learning_rate": 0.001, "loss": 0.4112, "step": 2465 }, { "epoch": 0.06804236782473434, "grad_norm": 0.0029568690806627274, "learning_rate": 0.001, "loss": 0.3845, "step": 2466 }, { "epoch": 0.06806996002579871, "grad_norm": 0.0061539956368505955, "learning_rate": 0.001, "loss": 0.3828, "step": 2467 }, { "epoch": 0.06809755222686308, "grad_norm": 0.003775696037337184, "learning_rate": 0.001, "loss": 0.3627, "step": 2468 }, { "epoch": 0.06812514442792744, "grad_norm": 0.003055511973798275, "learning_rate": 0.001, "loss": 0.4198, "step": 2469 }, { "epoch": 0.06815273662899182, "grad_norm": 0.004253302235156298, "learning_rate": 0.001, "loss": 0.391, "step": 2470 }, { "epoch": 0.06818032883005619, "grad_norm": 0.00398953165858984, "learning_rate": 0.001, "loss": 0.3917, "step": 2471 }, { "epoch": 0.06820792103112056, "grad_norm": 0.005971815902739763, "learning_rate": 0.001, "loss": 0.3704, "step": 2472 }, { "epoch": 0.06823551323218492, "grad_norm": 0.0031450032256543636, "learning_rate": 0.001, "loss": 0.4128, "step": 2473 }, { "epoch": 0.06826310543324929, "grad_norm": 0.013066442683339119, "learning_rate": 0.001, "loss": 0.374, "step": 2474 }, { "epoch": 0.06829069763431365, "grad_norm": 0.005165675655007362, "learning_rate": 0.001, "loss": 0.4194, "step": 2475 }, { "epoch": 0.06831828983537803, "grad_norm": 0.002924390835687518, "learning_rate": 0.001, "loss": 0.3841, "step": 2476 }, { "epoch": 0.0683458820364424, "grad_norm": 0.0032948816660791636, "learning_rate": 0.001, "loss": 0.3903, "step": 2477 }, { "epoch": 0.06837347423750677, "grad_norm": 0.004480746109038591, "learning_rate": 0.001, "loss": 0.3155, "step": 2478 }, { "epoch": 0.06840106643857113, "grad_norm": 0.0036851740442216396, "learning_rate": 0.001, "loss": 0.3506, "step": 2479 }, { "epoch": 0.0684286586396355, "grad_norm": 0.0027917283587157726, "learning_rate": 0.001, "loss": 0.3936, "step": 2480 }, { "epoch": 0.06845625084069988, "grad_norm": 0.0024517588317394257, "learning_rate": 0.001, "loss": 0.4228, "step": 2481 }, { "epoch": 0.06848384304176425, "grad_norm": 0.002968950429931283, "learning_rate": 0.001, "loss": 0.4209, "step": 2482 }, { "epoch": 0.06851143524282861, "grad_norm": 0.0027774290647357702, "learning_rate": 0.001, "loss": 0.3914, "step": 2483 }, { "epoch": 0.06853902744389298, "grad_norm": 0.002697261283174157, "learning_rate": 0.001, "loss": 0.4455, "step": 2484 }, { "epoch": 0.06856661964495735, "grad_norm": 0.003583157667890191, "learning_rate": 0.001, "loss": 0.397, "step": 2485 }, { "epoch": 0.06859421184602173, "grad_norm": 0.002954701893031597, "learning_rate": 0.001, "loss": 0.4139, "step": 2486 }, { "epoch": 0.0686218040470861, "grad_norm": 0.00595908472314477, "learning_rate": 0.001, "loss": 0.4411, "step": 2487 }, { "epoch": 0.06864939624815046, "grad_norm": 0.0025385827757418156, "learning_rate": 0.001, "loss": 0.3983, "step": 2488 }, { "epoch": 0.06867698844921483, "grad_norm": 0.0031357340048998594, "learning_rate": 0.001, "loss": 0.3998, "step": 2489 }, { "epoch": 0.06870458065027919, "grad_norm": 0.006353291217237711, "learning_rate": 0.001, "loss": 0.3985, "step": 2490 }, { "epoch": 0.06873217285134357, "grad_norm": 0.0036874916404485703, "learning_rate": 0.001, "loss": 0.3951, "step": 2491 }, { "epoch": 0.06875976505240794, "grad_norm": 0.0032723871991038322, "learning_rate": 0.001, "loss": 0.4342, "step": 2492 }, { "epoch": 0.0687873572534723, "grad_norm": 0.004007409326732159, "learning_rate": 0.001, "loss": 0.3908, "step": 2493 }, { "epoch": 0.06881494945453667, "grad_norm": 0.0029772506095469, "learning_rate": 0.001, "loss": 0.3861, "step": 2494 }, { "epoch": 0.06884254165560104, "grad_norm": 0.002340099308639765, "learning_rate": 0.001, "loss": 0.4351, "step": 2495 }, { "epoch": 0.06887013385666542, "grad_norm": 0.0038869476411491632, "learning_rate": 0.001, "loss": 0.4013, "step": 2496 }, { "epoch": 0.06889772605772979, "grad_norm": 0.004465511068701744, "learning_rate": 0.001, "loss": 0.4003, "step": 2497 }, { "epoch": 0.06892531825879415, "grad_norm": 0.004165589809417725, "learning_rate": 0.001, "loss": 0.3973, "step": 2498 }, { "epoch": 0.06895291045985852, "grad_norm": 0.0023313488345593214, "learning_rate": 0.001, "loss": 0.4059, "step": 2499 }, { "epoch": 0.06898050266092288, "grad_norm": 0.004344920627772808, "learning_rate": 0.001, "loss": 0.3938, "step": 2500 }, { "epoch": 0.06898050266092288, "eval_runtime": 24.3559, "eval_samples_per_second": 1.314, "eval_steps_per_second": 0.164, "step": 2500 }, { "epoch": 0.06900809486198727, "grad_norm": 0.0032904972322285175, "learning_rate": 0.001, "loss": 0.4215, "step": 2501 }, { "epoch": 0.06903568706305163, "grad_norm": 0.0037113146390765905, "learning_rate": 0.001, "loss": 0.3731, "step": 2502 }, { "epoch": 0.069063279264116, "grad_norm": 0.0028964923694729805, "learning_rate": 0.001, "loss": 0.4537, "step": 2503 }, { "epoch": 0.06909087146518036, "grad_norm": 0.005055161193013191, "learning_rate": 0.001, "loss": 0.3704, "step": 2504 }, { "epoch": 0.06911846366624473, "grad_norm": 0.0033460462000221014, "learning_rate": 0.001, "loss": 0.3886, "step": 2505 }, { "epoch": 0.06914605586730911, "grad_norm": 0.00268213776871562, "learning_rate": 0.001, "loss": 0.3913, "step": 2506 }, { "epoch": 0.06917364806837348, "grad_norm": 0.0038208633195608854, "learning_rate": 0.001, "loss": 0.3798, "step": 2507 }, { "epoch": 0.06920124026943784, "grad_norm": 0.0029110198374837637, "learning_rate": 0.001, "loss": 0.4377, "step": 2508 }, { "epoch": 0.06922883247050221, "grad_norm": 0.004070633556693792, "learning_rate": 0.001, "loss": 0.4066, "step": 2509 }, { "epoch": 0.06925642467156658, "grad_norm": 0.004861120600253344, "learning_rate": 0.001, "loss": 0.4234, "step": 2510 }, { "epoch": 0.06928401687263096, "grad_norm": 0.004837087355554104, "learning_rate": 0.001, "loss": 0.3942, "step": 2511 }, { "epoch": 0.06931160907369532, "grad_norm": 0.005938942078500986, "learning_rate": 0.001, "loss": 0.3833, "step": 2512 }, { "epoch": 0.06933920127475969, "grad_norm": 0.007488689385354519, "learning_rate": 0.001, "loss": 0.4109, "step": 2513 }, { "epoch": 0.06936679347582406, "grad_norm": 0.007844404317438602, "learning_rate": 0.001, "loss": 0.3895, "step": 2514 }, { "epoch": 0.06939438567688842, "grad_norm": 0.006541546434164047, "learning_rate": 0.001, "loss": 0.3925, "step": 2515 }, { "epoch": 0.0694219778779528, "grad_norm": 0.004513300955295563, "learning_rate": 0.001, "loss": 0.3886, "step": 2516 }, { "epoch": 0.06944957007901717, "grad_norm": 0.0028703995048999786, "learning_rate": 0.001, "loss": 0.4453, "step": 2517 }, { "epoch": 0.06947716228008154, "grad_norm": 0.0036693988367915154, "learning_rate": 0.001, "loss": 0.4475, "step": 2518 }, { "epoch": 0.0695047544811459, "grad_norm": 0.0035753645934164524, "learning_rate": 0.001, "loss": 0.3993, "step": 2519 }, { "epoch": 0.06953234668221027, "grad_norm": 0.0029638251289725304, "learning_rate": 0.001, "loss": 0.3883, "step": 2520 }, { "epoch": 0.06955993888327464, "grad_norm": 0.002767800120636821, "learning_rate": 0.001, "loss": 0.4263, "step": 2521 }, { "epoch": 0.06958753108433902, "grad_norm": 0.00324231362901628, "learning_rate": 0.001, "loss": 0.4276, "step": 2522 }, { "epoch": 0.06961512328540338, "grad_norm": 0.0024813879281282425, "learning_rate": 0.001, "loss": 0.38, "step": 2523 }, { "epoch": 0.06964271548646775, "grad_norm": 0.0032533248886466026, "learning_rate": 0.001, "loss": 0.3958, "step": 2524 }, { "epoch": 0.06967030768753212, "grad_norm": 0.005439402535557747, "learning_rate": 0.001, "loss": 0.3758, "step": 2525 }, { "epoch": 0.06969789988859648, "grad_norm": 0.002291264943778515, "learning_rate": 0.001, "loss": 0.4292, "step": 2526 }, { "epoch": 0.06972549208966086, "grad_norm": 0.0028953540604561567, "learning_rate": 0.001, "loss": 0.4194, "step": 2527 }, { "epoch": 0.06975308429072523, "grad_norm": 0.003083001123741269, "learning_rate": 0.001, "loss": 0.4227, "step": 2528 }, { "epoch": 0.0697806764917896, "grad_norm": 0.00382791250012815, "learning_rate": 0.001, "loss": 0.3844, "step": 2529 }, { "epoch": 0.06980826869285396, "grad_norm": 0.004844884853810072, "learning_rate": 0.001, "loss": 0.3824, "step": 2530 }, { "epoch": 0.06983586089391833, "grad_norm": 0.0025243901181966066, "learning_rate": 0.001, "loss": 0.4334, "step": 2531 }, { "epoch": 0.06986345309498271, "grad_norm": 0.003347366116940975, "learning_rate": 0.001, "loss": 0.3989, "step": 2532 }, { "epoch": 0.06989104529604707, "grad_norm": 0.004716082476079464, "learning_rate": 0.001, "loss": 0.3485, "step": 2533 }, { "epoch": 0.06991863749711144, "grad_norm": 0.004542906302958727, "learning_rate": 0.001, "loss": 0.4021, "step": 2534 }, { "epoch": 0.06994622969817581, "grad_norm": 0.002997052390128374, "learning_rate": 0.001, "loss": 0.4181, "step": 2535 }, { "epoch": 0.06997382189924017, "grad_norm": 0.0032483143731951714, "learning_rate": 0.001, "loss": 0.3827, "step": 2536 }, { "epoch": 0.07000141410030455, "grad_norm": 0.0036252515856176615, "learning_rate": 0.001, "loss": 0.4184, "step": 2537 }, { "epoch": 0.07002900630136892, "grad_norm": 0.003113416489213705, "learning_rate": 0.001, "loss": 0.3858, "step": 2538 }, { "epoch": 0.07005659850243329, "grad_norm": 0.003129280637949705, "learning_rate": 0.001, "loss": 0.3711, "step": 2539 }, { "epoch": 0.07008419070349765, "grad_norm": 0.005747736897319555, "learning_rate": 0.001, "loss": 0.4057, "step": 2540 }, { "epoch": 0.07011178290456202, "grad_norm": 0.01309170015156269, "learning_rate": 0.001, "loss": 0.4313, "step": 2541 }, { "epoch": 0.0701393751056264, "grad_norm": 0.0030168737284839153, "learning_rate": 0.001, "loss": 0.3878, "step": 2542 }, { "epoch": 0.07016696730669077, "grad_norm": 0.0039615570567548275, "learning_rate": 0.001, "loss": 0.4214, "step": 2543 }, { "epoch": 0.07019455950775513, "grad_norm": 0.034471940249204636, "learning_rate": 0.001, "loss": 0.3899, "step": 2544 }, { "epoch": 0.0702221517088195, "grad_norm": 0.00423240615054965, "learning_rate": 0.001, "loss": 0.3951, "step": 2545 }, { "epoch": 0.07024974390988387, "grad_norm": 0.004391579423099756, "learning_rate": 0.001, "loss": 0.3852, "step": 2546 }, { "epoch": 0.07027733611094825, "grad_norm": 0.004261813126504421, "learning_rate": 0.001, "loss": 0.3933, "step": 2547 }, { "epoch": 0.07030492831201261, "grad_norm": 0.005609198939055204, "learning_rate": 0.001, "loss": 0.3745, "step": 2548 }, { "epoch": 0.07033252051307698, "grad_norm": 0.0033081392757594585, "learning_rate": 0.001, "loss": 0.3667, "step": 2549 }, { "epoch": 0.07036011271414135, "grad_norm": 0.0029132398776710033, "learning_rate": 0.001, "loss": 0.3935, "step": 2550 }, { "epoch": 0.07038770491520571, "grad_norm": 0.004867136478424072, "learning_rate": 0.001, "loss": 0.3799, "step": 2551 }, { "epoch": 0.07041529711627009, "grad_norm": 0.004058686550706625, "learning_rate": 0.001, "loss": 0.4201, "step": 2552 }, { "epoch": 0.07044288931733446, "grad_norm": 0.0027920163702219725, "learning_rate": 0.001, "loss": 0.426, "step": 2553 }, { "epoch": 0.07047048151839883, "grad_norm": 0.00434753717854619, "learning_rate": 0.001, "loss": 0.3935, "step": 2554 }, { "epoch": 0.07049807371946319, "grad_norm": 0.0051083932630717754, "learning_rate": 0.001, "loss": 0.3911, "step": 2555 }, { "epoch": 0.07052566592052756, "grad_norm": 0.0038204751908779144, "learning_rate": 0.001, "loss": 0.381, "step": 2556 }, { "epoch": 0.07055325812159194, "grad_norm": 0.0038525923155248165, "learning_rate": 0.001, "loss": 0.4203, "step": 2557 }, { "epoch": 0.0705808503226563, "grad_norm": 0.0032179048284888268, "learning_rate": 0.001, "loss": 0.4046, "step": 2558 }, { "epoch": 0.07060844252372067, "grad_norm": 0.008977223187685013, "learning_rate": 0.001, "loss": 0.4188, "step": 2559 }, { "epoch": 0.07063603472478504, "grad_norm": 0.004340124782174826, "learning_rate": 0.001, "loss": 0.3442, "step": 2560 }, { "epoch": 0.0706636269258494, "grad_norm": 0.0031930410768836737, "learning_rate": 0.001, "loss": 0.4036, "step": 2561 }, { "epoch": 0.07069121912691378, "grad_norm": 0.0034286684822291136, "learning_rate": 0.001, "loss": 0.3815, "step": 2562 }, { "epoch": 0.07071881132797815, "grad_norm": 0.003299806034192443, "learning_rate": 0.001, "loss": 0.3759, "step": 2563 }, { "epoch": 0.07074640352904252, "grad_norm": 0.0048191421665251255, "learning_rate": 0.001, "loss": 0.3997, "step": 2564 }, { "epoch": 0.07077399573010688, "grad_norm": 0.006031329277902842, "learning_rate": 0.001, "loss": 0.4296, "step": 2565 }, { "epoch": 0.07080158793117125, "grad_norm": 0.00475485622882843, "learning_rate": 0.001, "loss": 0.3825, "step": 2566 }, { "epoch": 0.07082918013223562, "grad_norm": 0.0029015771578997374, "learning_rate": 0.001, "loss": 0.4159, "step": 2567 }, { "epoch": 0.0708567723333, "grad_norm": 0.0026210874784737825, "learning_rate": 0.001, "loss": 0.3808, "step": 2568 }, { "epoch": 0.07088436453436436, "grad_norm": 0.00292952754534781, "learning_rate": 0.001, "loss": 0.4424, "step": 2569 }, { "epoch": 0.07091195673542873, "grad_norm": 0.004151395056396723, "learning_rate": 0.001, "loss": 0.3488, "step": 2570 }, { "epoch": 0.0709395489364931, "grad_norm": 0.003007207065820694, "learning_rate": 0.001, "loss": 0.4021, "step": 2571 }, { "epoch": 0.07096714113755746, "grad_norm": 0.0025580485817044973, "learning_rate": 0.001, "loss": 0.4374, "step": 2572 }, { "epoch": 0.07099473333862184, "grad_norm": 0.0034552421420812607, "learning_rate": 0.001, "loss": 0.3793, "step": 2573 }, { "epoch": 0.07102232553968621, "grad_norm": 0.004341066349297762, "learning_rate": 0.001, "loss": 0.4012, "step": 2574 }, { "epoch": 0.07104991774075058, "grad_norm": 0.0033516965340822935, "learning_rate": 0.001, "loss": 0.3915, "step": 2575 }, { "epoch": 0.07107750994181494, "grad_norm": 0.0036367857828736305, "learning_rate": 0.001, "loss": 0.4064, "step": 2576 }, { "epoch": 0.07110510214287931, "grad_norm": 0.0029854371678084135, "learning_rate": 0.001, "loss": 0.4095, "step": 2577 }, { "epoch": 0.07113269434394369, "grad_norm": 0.002374214818701148, "learning_rate": 0.001, "loss": 0.4303, "step": 2578 }, { "epoch": 0.07116028654500806, "grad_norm": 0.002954955445602536, "learning_rate": 0.001, "loss": 0.4127, "step": 2579 }, { "epoch": 0.07118787874607242, "grad_norm": 0.003998765256255865, "learning_rate": 0.001, "loss": 0.4217, "step": 2580 }, { "epoch": 0.07121547094713679, "grad_norm": 0.0029591761995106936, "learning_rate": 0.001, "loss": 0.4329, "step": 2581 }, { "epoch": 0.07124306314820116, "grad_norm": 0.0039256964810192585, "learning_rate": 0.001, "loss": 0.4368, "step": 2582 }, { "epoch": 0.07127065534926554, "grad_norm": 0.005670437589287758, "learning_rate": 0.001, "loss": 0.3942, "step": 2583 }, { "epoch": 0.0712982475503299, "grad_norm": 0.0024786926805973053, "learning_rate": 0.001, "loss": 0.4157, "step": 2584 }, { "epoch": 0.07132583975139427, "grad_norm": 0.0045395889319479465, "learning_rate": 0.001, "loss": 0.4074, "step": 2585 }, { "epoch": 0.07135343195245863, "grad_norm": 0.002754254499450326, "learning_rate": 0.001, "loss": 0.4155, "step": 2586 }, { "epoch": 0.071381024153523, "grad_norm": 0.0032517199870198965, "learning_rate": 0.001, "loss": 0.3633, "step": 2587 }, { "epoch": 0.07140861635458738, "grad_norm": 0.0037906805519014597, "learning_rate": 0.001, "loss": 0.4037, "step": 2588 }, { "epoch": 0.07143620855565175, "grad_norm": 0.00529517512768507, "learning_rate": 0.001, "loss": 0.3891, "step": 2589 }, { "epoch": 0.07146380075671611, "grad_norm": 0.00859268568456173, "learning_rate": 0.001, "loss": 0.3689, "step": 2590 }, { "epoch": 0.07149139295778048, "grad_norm": 0.003779566613957286, "learning_rate": 0.001, "loss": 0.3853, "step": 2591 }, { "epoch": 0.07151898515884485, "grad_norm": 0.0031110162381082773, "learning_rate": 0.001, "loss": 0.4138, "step": 2592 }, { "epoch": 0.07154657735990923, "grad_norm": 0.004116731230169535, "learning_rate": 0.001, "loss": 0.3965, "step": 2593 }, { "epoch": 0.0715741695609736, "grad_norm": 0.00332248630002141, "learning_rate": 0.001, "loss": 0.4053, "step": 2594 }, { "epoch": 0.07160176176203796, "grad_norm": 0.003030715975910425, "learning_rate": 0.001, "loss": 0.4413, "step": 2595 }, { "epoch": 0.07162935396310233, "grad_norm": 0.0028603002429008484, "learning_rate": 0.001, "loss": 0.3965, "step": 2596 }, { "epoch": 0.0716569461641667, "grad_norm": 0.0035293481778353453, "learning_rate": 0.001, "loss": 0.4229, "step": 2597 }, { "epoch": 0.07168453836523107, "grad_norm": 0.004880653228610754, "learning_rate": 0.001, "loss": 0.384, "step": 2598 }, { "epoch": 0.07171213056629544, "grad_norm": 0.0037580877542495728, "learning_rate": 0.001, "loss": 0.373, "step": 2599 }, { "epoch": 0.0717397227673598, "grad_norm": 0.004738042131066322, "learning_rate": 0.001, "loss": 0.3783, "step": 2600 }, { "epoch": 0.07176731496842417, "grad_norm": 0.0030720685608685017, "learning_rate": 0.001, "loss": 0.4232, "step": 2601 }, { "epoch": 0.07179490716948854, "grad_norm": 0.003540902165696025, "learning_rate": 0.001, "loss": 0.4012, "step": 2602 }, { "epoch": 0.07182249937055292, "grad_norm": 0.0028504952788352966, "learning_rate": 0.001, "loss": 0.3778, "step": 2603 }, { "epoch": 0.07185009157161729, "grad_norm": 0.0041265105828642845, "learning_rate": 0.001, "loss": 0.3976, "step": 2604 }, { "epoch": 0.07187768377268165, "grad_norm": 0.0034538819454610348, "learning_rate": 0.001, "loss": 0.3802, "step": 2605 }, { "epoch": 0.07190527597374602, "grad_norm": 0.002804661402478814, "learning_rate": 0.001, "loss": 0.388, "step": 2606 }, { "epoch": 0.07193286817481039, "grad_norm": 0.006338078528642654, "learning_rate": 0.001, "loss": 0.4074, "step": 2607 }, { "epoch": 0.07196046037587477, "grad_norm": 0.0034297939855605364, "learning_rate": 0.001, "loss": 0.3497, "step": 2608 }, { "epoch": 0.07198805257693913, "grad_norm": 0.0029641755390912294, "learning_rate": 0.001, "loss": 0.4111, "step": 2609 }, { "epoch": 0.0720156447780035, "grad_norm": 0.0038228612393140793, "learning_rate": 0.001, "loss": 0.4009, "step": 2610 }, { "epoch": 0.07204323697906787, "grad_norm": 0.0033363320399075747, "learning_rate": 0.001, "loss": 0.395, "step": 2611 }, { "epoch": 0.07207082918013223, "grad_norm": 0.002400481840595603, "learning_rate": 0.001, "loss": 0.3865, "step": 2612 }, { "epoch": 0.0720984213811966, "grad_norm": 0.00315939006395638, "learning_rate": 0.001, "loss": 0.4, "step": 2613 }, { "epoch": 0.07212601358226098, "grad_norm": 0.00354985473677516, "learning_rate": 0.001, "loss": 0.4134, "step": 2614 }, { "epoch": 0.07215360578332534, "grad_norm": 0.0028168221469968557, "learning_rate": 0.001, "loss": 0.4117, "step": 2615 }, { "epoch": 0.07218119798438971, "grad_norm": 0.0025577114429324865, "learning_rate": 0.001, "loss": 0.3769, "step": 2616 }, { "epoch": 0.07220879018545408, "grad_norm": 0.003505377098917961, "learning_rate": 0.001, "loss": 0.4206, "step": 2617 }, { "epoch": 0.07223638238651844, "grad_norm": 0.005422186106443405, "learning_rate": 0.001, "loss": 0.3677, "step": 2618 }, { "epoch": 0.07226397458758282, "grad_norm": 0.003959451802074909, "learning_rate": 0.001, "loss": 0.3826, "step": 2619 }, { "epoch": 0.07229156678864719, "grad_norm": 0.002390485256910324, "learning_rate": 0.001, "loss": 0.3899, "step": 2620 }, { "epoch": 0.07231915898971156, "grad_norm": 0.0019209344172850251, "learning_rate": 0.001, "loss": 0.395, "step": 2621 }, { "epoch": 0.07234675119077592, "grad_norm": 0.0027938170824199915, "learning_rate": 0.001, "loss": 0.4097, "step": 2622 }, { "epoch": 0.07237434339184029, "grad_norm": 0.002219612244516611, "learning_rate": 0.001, "loss": 0.4308, "step": 2623 }, { "epoch": 0.07240193559290467, "grad_norm": 0.002710830420255661, "learning_rate": 0.001, "loss": 0.4034, "step": 2624 }, { "epoch": 0.07242952779396904, "grad_norm": 0.002441881690174341, "learning_rate": 0.001, "loss": 0.3905, "step": 2625 }, { "epoch": 0.0724571199950334, "grad_norm": 0.002835171762853861, "learning_rate": 0.001, "loss": 0.4196, "step": 2626 }, { "epoch": 0.07248471219609777, "grad_norm": 0.005800854880362749, "learning_rate": 0.001, "loss": 0.3621, "step": 2627 }, { "epoch": 0.07251230439716214, "grad_norm": 0.0058226133696734905, "learning_rate": 0.001, "loss": 0.4385, "step": 2628 }, { "epoch": 0.07253989659822652, "grad_norm": 0.0031959593761712313, "learning_rate": 0.001, "loss": 0.4129, "step": 2629 }, { "epoch": 0.07256748879929088, "grad_norm": 0.007225159555673599, "learning_rate": 0.001, "loss": 0.3695, "step": 2630 }, { "epoch": 0.07259508100035525, "grad_norm": 0.003910002298653126, "learning_rate": 0.001, "loss": 0.4018, "step": 2631 }, { "epoch": 0.07262267320141962, "grad_norm": 0.0047545540146529675, "learning_rate": 0.001, "loss": 0.3985, "step": 2632 }, { "epoch": 0.07265026540248398, "grad_norm": 0.0037914151325821877, "learning_rate": 0.001, "loss": 0.3954, "step": 2633 }, { "epoch": 0.07267785760354836, "grad_norm": 0.007707824464887381, "learning_rate": 0.001, "loss": 0.3865, "step": 2634 }, { "epoch": 0.07270544980461273, "grad_norm": 0.0031401002779603004, "learning_rate": 0.001, "loss": 0.3622, "step": 2635 }, { "epoch": 0.0727330420056771, "grad_norm": 0.005065685138106346, "learning_rate": 0.001, "loss": 0.3612, "step": 2636 }, { "epoch": 0.07276063420674146, "grad_norm": 0.0033927711192518473, "learning_rate": 0.001, "loss": 0.4286, "step": 2637 }, { "epoch": 0.07278822640780583, "grad_norm": 0.0026958470698446035, "learning_rate": 0.001, "loss": 0.4064, "step": 2638 }, { "epoch": 0.07281581860887021, "grad_norm": 0.004273698199540377, "learning_rate": 0.001, "loss": 0.3942, "step": 2639 }, { "epoch": 0.07284341080993458, "grad_norm": 0.0033470946364104748, "learning_rate": 0.001, "loss": 0.4117, "step": 2640 }, { "epoch": 0.07287100301099894, "grad_norm": 0.0030789680313318968, "learning_rate": 0.001, "loss": 0.3801, "step": 2641 }, { "epoch": 0.07289859521206331, "grad_norm": 0.003024066798388958, "learning_rate": 0.001, "loss": 0.4068, "step": 2642 }, { "epoch": 0.07292618741312767, "grad_norm": 0.0035327670630067587, "learning_rate": 0.001, "loss": 0.4213, "step": 2643 }, { "epoch": 0.07295377961419205, "grad_norm": 0.005246289074420929, "learning_rate": 0.001, "loss": 0.4035, "step": 2644 }, { "epoch": 0.07298137181525642, "grad_norm": 0.0033100054133683443, "learning_rate": 0.001, "loss": 0.4015, "step": 2645 }, { "epoch": 0.07300896401632079, "grad_norm": 0.014535458758473396, "learning_rate": 0.001, "loss": 0.4149, "step": 2646 }, { "epoch": 0.07303655621738515, "grad_norm": 0.006717653013765812, "learning_rate": 0.001, "loss": 0.4366, "step": 2647 }, { "epoch": 0.07306414841844952, "grad_norm": 0.005544296000152826, "learning_rate": 0.001, "loss": 0.3546, "step": 2648 }, { "epoch": 0.0730917406195139, "grad_norm": 0.002779677277430892, "learning_rate": 0.001, "loss": 0.415, "step": 2649 }, { "epoch": 0.07311933282057827, "grad_norm": 0.0030521864537149668, "learning_rate": 0.001, "loss": 0.41, "step": 2650 }, { "epoch": 0.07314692502164263, "grad_norm": 0.0032854536548256874, "learning_rate": 0.001, "loss": 0.4159, "step": 2651 }, { "epoch": 0.073174517222707, "grad_norm": 0.002539557870477438, "learning_rate": 0.001, "loss": 0.4195, "step": 2652 }, { "epoch": 0.07320210942377137, "grad_norm": 0.004703735467046499, "learning_rate": 0.001, "loss": 0.3759, "step": 2653 }, { "epoch": 0.07322970162483575, "grad_norm": 0.0029251237865537405, "learning_rate": 0.001, "loss": 0.4182, "step": 2654 }, { "epoch": 0.07325729382590011, "grad_norm": 0.0026435167528688908, "learning_rate": 0.001, "loss": 0.3718, "step": 2655 }, { "epoch": 0.07328488602696448, "grad_norm": 0.004778844770044088, "learning_rate": 0.001, "loss": 0.4264, "step": 2656 }, { "epoch": 0.07331247822802885, "grad_norm": 0.003458186285570264, "learning_rate": 0.001, "loss": 0.4593, "step": 2657 }, { "epoch": 0.07334007042909321, "grad_norm": 0.0034521680790930986, "learning_rate": 0.001, "loss": 0.4509, "step": 2658 }, { "epoch": 0.07336766263015758, "grad_norm": 0.002783542964607477, "learning_rate": 0.001, "loss": 0.3887, "step": 2659 }, { "epoch": 0.07339525483122196, "grad_norm": 0.004468636121600866, "learning_rate": 0.001, "loss": 0.3971, "step": 2660 }, { "epoch": 0.07342284703228633, "grad_norm": 0.0026452334132045507, "learning_rate": 0.001, "loss": 0.4105, "step": 2661 }, { "epoch": 0.07345043923335069, "grad_norm": 0.003701903624460101, "learning_rate": 0.001, "loss": 0.3864, "step": 2662 }, { "epoch": 0.07347803143441506, "grad_norm": 0.002505905693396926, "learning_rate": 0.001, "loss": 0.4081, "step": 2663 }, { "epoch": 0.07350562363547943, "grad_norm": 0.002993806730955839, "learning_rate": 0.001, "loss": 0.3838, "step": 2664 }, { "epoch": 0.0735332158365438, "grad_norm": 0.002987109124660492, "learning_rate": 0.001, "loss": 0.4064, "step": 2665 }, { "epoch": 0.07356080803760817, "grad_norm": 0.0028901277109980583, "learning_rate": 0.001, "loss": 0.3842, "step": 2666 }, { "epoch": 0.07358840023867254, "grad_norm": 0.0037864744663238525, "learning_rate": 0.001, "loss": 0.3962, "step": 2667 }, { "epoch": 0.0736159924397369, "grad_norm": 0.005365621764212847, "learning_rate": 0.001, "loss": 0.4013, "step": 2668 }, { "epoch": 0.07364358464080127, "grad_norm": 0.0025341627188026905, "learning_rate": 0.001, "loss": 0.4598, "step": 2669 }, { "epoch": 0.07367117684186565, "grad_norm": 0.0026153249200433493, "learning_rate": 0.001, "loss": 0.4187, "step": 2670 }, { "epoch": 0.07369876904293002, "grad_norm": 0.00424772035330534, "learning_rate": 0.001, "loss": 0.4292, "step": 2671 }, { "epoch": 0.07372636124399438, "grad_norm": 0.005840023048222065, "learning_rate": 0.001, "loss": 0.382, "step": 2672 }, { "epoch": 0.07375395344505875, "grad_norm": 0.00886671431362629, "learning_rate": 0.001, "loss": 0.3735, "step": 2673 }, { "epoch": 0.07378154564612312, "grad_norm": 0.004951406270265579, "learning_rate": 0.001, "loss": 0.383, "step": 2674 }, { "epoch": 0.0738091378471875, "grad_norm": 0.004046993795782328, "learning_rate": 0.001, "loss": 0.4047, "step": 2675 }, { "epoch": 0.07383673004825186, "grad_norm": 0.003650445956736803, "learning_rate": 0.001, "loss": 0.4343, "step": 2676 }, { "epoch": 0.07386432224931623, "grad_norm": 0.0027846968732774258, "learning_rate": 0.001, "loss": 0.4657, "step": 2677 }, { "epoch": 0.0738919144503806, "grad_norm": 0.0027162914630025625, "learning_rate": 0.001, "loss": 0.3845, "step": 2678 }, { "epoch": 0.07391950665144496, "grad_norm": 0.002881822641938925, "learning_rate": 0.001, "loss": 0.435, "step": 2679 }, { "epoch": 0.07394709885250934, "grad_norm": 0.006673680152744055, "learning_rate": 0.001, "loss": 0.4224, "step": 2680 }, { "epoch": 0.07397469105357371, "grad_norm": 0.0028485655784606934, "learning_rate": 0.001, "loss": 0.3907, "step": 2681 }, { "epoch": 0.07400228325463808, "grad_norm": 0.004903602413833141, "learning_rate": 0.001, "loss": 0.3578, "step": 2682 }, { "epoch": 0.07402987545570244, "grad_norm": 0.010232421569526196, "learning_rate": 0.001, "loss": 0.4232, "step": 2683 }, { "epoch": 0.07405746765676681, "grad_norm": 0.01437798049300909, "learning_rate": 0.001, "loss": 0.4093, "step": 2684 }, { "epoch": 0.07408505985783119, "grad_norm": 0.0036920029670000076, "learning_rate": 0.001, "loss": 0.4224, "step": 2685 }, { "epoch": 0.07411265205889556, "grad_norm": 0.008351249620318413, "learning_rate": 0.001, "loss": 0.4205, "step": 2686 }, { "epoch": 0.07414024425995992, "grad_norm": 0.004162284545600414, "learning_rate": 0.001, "loss": 0.4054, "step": 2687 }, { "epoch": 0.07416783646102429, "grad_norm": 0.0036121357697993517, "learning_rate": 0.001, "loss": 0.4144, "step": 2688 }, { "epoch": 0.07419542866208866, "grad_norm": 0.0035279730800539255, "learning_rate": 0.001, "loss": 0.3711, "step": 2689 }, { "epoch": 0.07422302086315304, "grad_norm": 0.0031304580625146627, "learning_rate": 0.001, "loss": 0.3772, "step": 2690 }, { "epoch": 0.0742506130642174, "grad_norm": 0.006544687785208225, "learning_rate": 0.001, "loss": 0.4303, "step": 2691 }, { "epoch": 0.07427820526528177, "grad_norm": 0.004466759506613016, "learning_rate": 0.001, "loss": 0.3946, "step": 2692 }, { "epoch": 0.07430579746634614, "grad_norm": 0.0027272431179881096, "learning_rate": 0.001, "loss": 0.4069, "step": 2693 }, { "epoch": 0.0743333896674105, "grad_norm": 0.0032364106737077236, "learning_rate": 0.001, "loss": 0.3972, "step": 2694 }, { "epoch": 0.07436098186847488, "grad_norm": 0.002972652204334736, "learning_rate": 0.001, "loss": 0.3713, "step": 2695 }, { "epoch": 0.07438857406953925, "grad_norm": 0.0027488903142511845, "learning_rate": 0.001, "loss": 0.3686, "step": 2696 }, { "epoch": 0.07441616627060361, "grad_norm": 0.003380288602784276, "learning_rate": 0.001, "loss": 0.3894, "step": 2697 }, { "epoch": 0.07444375847166798, "grad_norm": 0.002856023609638214, "learning_rate": 0.001, "loss": 0.3889, "step": 2698 }, { "epoch": 0.07447135067273235, "grad_norm": 0.003780797589570284, "learning_rate": 0.001, "loss": 0.4039, "step": 2699 }, { "epoch": 0.07449894287379673, "grad_norm": 0.006125589832663536, "learning_rate": 0.001, "loss": 0.4125, "step": 2700 }, { "epoch": 0.0745265350748611, "grad_norm": 0.003866039216518402, "learning_rate": 0.001, "loss": 0.3978, "step": 2701 }, { "epoch": 0.07455412727592546, "grad_norm": 0.0035539071541279554, "learning_rate": 0.001, "loss": 0.3874, "step": 2702 }, { "epoch": 0.07458171947698983, "grad_norm": 0.003555738367140293, "learning_rate": 0.001, "loss": 0.3996, "step": 2703 }, { "epoch": 0.0746093116780542, "grad_norm": 0.0025483653880655766, "learning_rate": 0.001, "loss": 0.3836, "step": 2704 }, { "epoch": 0.07463690387911857, "grad_norm": 0.005306419916450977, "learning_rate": 0.001, "loss": 0.4259, "step": 2705 }, { "epoch": 0.07466449608018294, "grad_norm": 0.004178240429610014, "learning_rate": 0.001, "loss": 0.4074, "step": 2706 }, { "epoch": 0.0746920882812473, "grad_norm": 0.0047726561315357685, "learning_rate": 0.001, "loss": 0.3957, "step": 2707 }, { "epoch": 0.07471968048231167, "grad_norm": 0.003424114780500531, "learning_rate": 0.001, "loss": 0.4242, "step": 2708 }, { "epoch": 0.07474727268337604, "grad_norm": 0.004055993165820837, "learning_rate": 0.001, "loss": 0.4025, "step": 2709 }, { "epoch": 0.0747748648844404, "grad_norm": 0.0035490887239575386, "learning_rate": 0.001, "loss": 0.4291, "step": 2710 }, { "epoch": 0.07480245708550479, "grad_norm": 0.0075918626971542835, "learning_rate": 0.001, "loss": 0.4556, "step": 2711 }, { "epoch": 0.07483004928656915, "grad_norm": 0.00878838449716568, "learning_rate": 0.001, "loss": 0.4055, "step": 2712 }, { "epoch": 0.07485764148763352, "grad_norm": 0.004037888254970312, "learning_rate": 0.001, "loss": 0.3879, "step": 2713 }, { "epoch": 0.07488523368869789, "grad_norm": 0.003275372786447406, "learning_rate": 0.001, "loss": 0.3995, "step": 2714 }, { "epoch": 0.07491282588976225, "grad_norm": 0.005921733099967241, "learning_rate": 0.001, "loss": 0.3959, "step": 2715 }, { "epoch": 0.07494041809082663, "grad_norm": 0.003877841867506504, "learning_rate": 0.001, "loss": 0.3889, "step": 2716 }, { "epoch": 0.074968010291891, "grad_norm": 0.004174409434199333, "learning_rate": 0.001, "loss": 0.385, "step": 2717 }, { "epoch": 0.07499560249295537, "grad_norm": 0.004188715014606714, "learning_rate": 0.001, "loss": 0.4182, "step": 2718 }, { "epoch": 0.07502319469401973, "grad_norm": 0.0031484768260270357, "learning_rate": 0.001, "loss": 0.3746, "step": 2719 }, { "epoch": 0.0750507868950841, "grad_norm": 0.006613558623939753, "learning_rate": 0.001, "loss": 0.4151, "step": 2720 }, { "epoch": 0.07507837909614848, "grad_norm": 0.0036274839658290148, "learning_rate": 0.001, "loss": 0.3511, "step": 2721 }, { "epoch": 0.07510597129721285, "grad_norm": 0.004630135837942362, "learning_rate": 0.001, "loss": 0.412, "step": 2722 }, { "epoch": 0.07513356349827721, "grad_norm": 0.0061690667644143105, "learning_rate": 0.001, "loss": 0.4167, "step": 2723 }, { "epoch": 0.07516115569934158, "grad_norm": 0.005458046216517687, "learning_rate": 0.001, "loss": 0.3676, "step": 2724 }, { "epoch": 0.07518874790040594, "grad_norm": 0.007112190593034029, "learning_rate": 0.001, "loss": 0.3842, "step": 2725 }, { "epoch": 0.07521634010147032, "grad_norm": 0.0031167047563940287, "learning_rate": 0.001, "loss": 0.3922, "step": 2726 }, { "epoch": 0.07524393230253469, "grad_norm": 0.003523972351104021, "learning_rate": 0.001, "loss": 0.4247, "step": 2727 }, { "epoch": 0.07527152450359906, "grad_norm": 0.003653216175734997, "learning_rate": 0.001, "loss": 0.4352, "step": 2728 }, { "epoch": 0.07529911670466342, "grad_norm": 0.003052507760003209, "learning_rate": 0.001, "loss": 0.4049, "step": 2729 }, { "epoch": 0.07532670890572779, "grad_norm": 0.002761593321338296, "learning_rate": 0.001, "loss": 0.3611, "step": 2730 }, { "epoch": 0.07535430110679217, "grad_norm": 0.0036866154987365007, "learning_rate": 0.001, "loss": 0.3891, "step": 2731 }, { "epoch": 0.07538189330785654, "grad_norm": 0.008540788665413857, "learning_rate": 0.001, "loss": 0.4011, "step": 2732 }, { "epoch": 0.0754094855089209, "grad_norm": 0.0023057356011122465, "learning_rate": 0.001, "loss": 0.3936, "step": 2733 }, { "epoch": 0.07543707770998527, "grad_norm": 0.006381066981703043, "learning_rate": 0.001, "loss": 0.3804, "step": 2734 }, { "epoch": 0.07546466991104964, "grad_norm": 0.0035177739337086678, "learning_rate": 0.001, "loss": 0.4017, "step": 2735 }, { "epoch": 0.07549226211211402, "grad_norm": 0.0035792491398751736, "learning_rate": 0.001, "loss": 0.3758, "step": 2736 }, { "epoch": 0.07551985431317838, "grad_norm": 0.0046767196618020535, "learning_rate": 0.001, "loss": 0.4361, "step": 2737 }, { "epoch": 0.07554744651424275, "grad_norm": 0.0037277627270668745, "learning_rate": 0.001, "loss": 0.3893, "step": 2738 }, { "epoch": 0.07557503871530712, "grad_norm": 0.003284999867901206, "learning_rate": 0.001, "loss": 0.404, "step": 2739 }, { "epoch": 0.07560263091637148, "grad_norm": 0.0028962334617972374, "learning_rate": 0.001, "loss": 0.3979, "step": 2740 }, { "epoch": 0.07563022311743586, "grad_norm": 0.003682551207020879, "learning_rate": 0.001, "loss": 0.4198, "step": 2741 }, { "epoch": 0.07565781531850023, "grad_norm": 0.0028806542977690697, "learning_rate": 0.001, "loss": 0.3745, "step": 2742 }, { "epoch": 0.0756854075195646, "grad_norm": 0.003922648727893829, "learning_rate": 0.001, "loss": 0.3968, "step": 2743 }, { "epoch": 0.07571299972062896, "grad_norm": 0.0026945569552481174, "learning_rate": 0.001, "loss": 0.3959, "step": 2744 }, { "epoch": 0.07574059192169333, "grad_norm": 0.0057632802054286, "learning_rate": 0.001, "loss": 0.4194, "step": 2745 }, { "epoch": 0.07576818412275771, "grad_norm": 0.006452036090195179, "learning_rate": 0.001, "loss": 0.3966, "step": 2746 }, { "epoch": 0.07579577632382208, "grad_norm": 0.0030313171446323395, "learning_rate": 0.001, "loss": 0.4016, "step": 2747 }, { "epoch": 0.07582336852488644, "grad_norm": 0.003243018174543977, "learning_rate": 0.001, "loss": 0.3503, "step": 2748 }, { "epoch": 0.07585096072595081, "grad_norm": 0.0030355071648955345, "learning_rate": 0.001, "loss": 0.4086, "step": 2749 }, { "epoch": 0.07587855292701517, "grad_norm": 0.003429468721151352, "learning_rate": 0.001, "loss": 0.3872, "step": 2750 }, { "epoch": 0.07590614512807956, "grad_norm": 0.0029639608692377806, "learning_rate": 0.001, "loss": 0.4498, "step": 2751 }, { "epoch": 0.07593373732914392, "grad_norm": 0.004542426206171513, "learning_rate": 0.001, "loss": 0.4143, "step": 2752 }, { "epoch": 0.07596132953020829, "grad_norm": 0.002826446434482932, "learning_rate": 0.001, "loss": 0.405, "step": 2753 }, { "epoch": 0.07598892173127265, "grad_norm": 0.004107923712581396, "learning_rate": 0.001, "loss": 0.4035, "step": 2754 }, { "epoch": 0.07601651393233702, "grad_norm": 0.0028436153661459684, "learning_rate": 0.001, "loss": 0.4484, "step": 2755 }, { "epoch": 0.07604410613340139, "grad_norm": 0.004834937863051891, "learning_rate": 0.001, "loss": 0.3966, "step": 2756 }, { "epoch": 0.07607169833446577, "grad_norm": 0.003390131751075387, "learning_rate": 0.001, "loss": 0.4044, "step": 2757 }, { "epoch": 0.07609929053553013, "grad_norm": 0.002966254251077771, "learning_rate": 0.001, "loss": 0.4246, "step": 2758 }, { "epoch": 0.0761268827365945, "grad_norm": 0.00300071039237082, "learning_rate": 0.001, "loss": 0.3683, "step": 2759 }, { "epoch": 0.07615447493765887, "grad_norm": 0.0030495107639580965, "learning_rate": 0.001, "loss": 0.3625, "step": 2760 }, { "epoch": 0.07618206713872323, "grad_norm": 0.002420577686280012, "learning_rate": 0.001, "loss": 0.4494, "step": 2761 }, { "epoch": 0.07620965933978761, "grad_norm": 0.0036301531363278627, "learning_rate": 0.001, "loss": 0.3941, "step": 2762 }, { "epoch": 0.07623725154085198, "grad_norm": 0.0024638317991048098, "learning_rate": 0.001, "loss": 0.3652, "step": 2763 }, { "epoch": 0.07626484374191635, "grad_norm": 0.003583789337426424, "learning_rate": 0.001, "loss": 0.3946, "step": 2764 }, { "epoch": 0.07629243594298071, "grad_norm": 0.003300134791061282, "learning_rate": 0.001, "loss": 0.4191, "step": 2765 }, { "epoch": 0.07632002814404508, "grad_norm": 0.0033223924692720175, "learning_rate": 0.001, "loss": 0.4072, "step": 2766 }, { "epoch": 0.07634762034510946, "grad_norm": 0.003998725675046444, "learning_rate": 0.001, "loss": 0.4096, "step": 2767 }, { "epoch": 0.07637521254617383, "grad_norm": 0.0030690559651702642, "learning_rate": 0.001, "loss": 0.3798, "step": 2768 }, { "epoch": 0.07640280474723819, "grad_norm": 0.0026303695049136877, "learning_rate": 0.001, "loss": 0.4359, "step": 2769 }, { "epoch": 0.07643039694830256, "grad_norm": 0.004332841839641333, "learning_rate": 0.001, "loss": 0.3492, "step": 2770 }, { "epoch": 0.07645798914936693, "grad_norm": 0.0027642296627163887, "learning_rate": 0.001, "loss": 0.4068, "step": 2771 }, { "epoch": 0.0764855813504313, "grad_norm": 0.0023598058614879847, "learning_rate": 0.001, "loss": 0.453, "step": 2772 }, { "epoch": 0.07651317355149567, "grad_norm": 0.002782411640509963, "learning_rate": 0.001, "loss": 0.401, "step": 2773 }, { "epoch": 0.07654076575256004, "grad_norm": 0.0030131624080240726, "learning_rate": 0.001, "loss": 0.4043, "step": 2774 }, { "epoch": 0.0765683579536244, "grad_norm": 0.003464979352429509, "learning_rate": 0.001, "loss": 0.3996, "step": 2775 }, { "epoch": 0.07659595015468877, "grad_norm": 0.002998175797984004, "learning_rate": 0.001, "loss": 0.3838, "step": 2776 }, { "epoch": 0.07662354235575315, "grad_norm": 0.004081446677446365, "learning_rate": 0.001, "loss": 0.4126, "step": 2777 }, { "epoch": 0.07665113455681752, "grad_norm": 0.02017812430858612, "learning_rate": 0.001, "loss": 0.4203, "step": 2778 }, { "epoch": 0.07667872675788188, "grad_norm": 0.003272912697866559, "learning_rate": 0.001, "loss": 0.3891, "step": 2779 }, { "epoch": 0.07670631895894625, "grad_norm": 0.005462125409394503, "learning_rate": 0.001, "loss": 0.4047, "step": 2780 }, { "epoch": 0.07673391116001062, "grad_norm": 0.003259886521846056, "learning_rate": 0.001, "loss": 0.3656, "step": 2781 }, { "epoch": 0.076761503361075, "grad_norm": 0.003059667069464922, "learning_rate": 0.001, "loss": 0.3919, "step": 2782 }, { "epoch": 0.07678909556213936, "grad_norm": 0.00428088940680027, "learning_rate": 0.001, "loss": 0.3705, "step": 2783 }, { "epoch": 0.07681668776320373, "grad_norm": 0.002609378658235073, "learning_rate": 0.001, "loss": 0.395, "step": 2784 }, { "epoch": 0.0768442799642681, "grad_norm": 0.005715689156204462, "learning_rate": 0.001, "loss": 0.4003, "step": 2785 }, { "epoch": 0.07687187216533246, "grad_norm": 0.003460571402683854, "learning_rate": 0.001, "loss": 0.4222, "step": 2786 }, { "epoch": 0.07689946436639684, "grad_norm": 0.002582078566774726, "learning_rate": 0.001, "loss": 0.3751, "step": 2787 }, { "epoch": 0.07692705656746121, "grad_norm": 0.003169649513438344, "learning_rate": 0.001, "loss": 0.3896, "step": 2788 }, { "epoch": 0.07695464876852558, "grad_norm": 0.005018650088459253, "learning_rate": 0.001, "loss": 0.4245, "step": 2789 }, { "epoch": 0.07698224096958994, "grad_norm": 0.0027702595107257366, "learning_rate": 0.001, "loss": 0.3781, "step": 2790 }, { "epoch": 0.07700983317065431, "grad_norm": 0.0027409393806010485, "learning_rate": 0.001, "loss": 0.4126, "step": 2791 }, { "epoch": 0.07703742537171869, "grad_norm": 0.0031372166704386473, "learning_rate": 0.001, "loss": 0.438, "step": 2792 }, { "epoch": 0.07706501757278306, "grad_norm": 0.0028250846080482006, "learning_rate": 0.001, "loss": 0.3607, "step": 2793 }, { "epoch": 0.07709260977384742, "grad_norm": 0.003316509071737528, "learning_rate": 0.001, "loss": 0.3957, "step": 2794 }, { "epoch": 0.07712020197491179, "grad_norm": 0.002770907012745738, "learning_rate": 0.001, "loss": 0.3741, "step": 2795 }, { "epoch": 0.07714779417597616, "grad_norm": 0.002429973566904664, "learning_rate": 0.001, "loss": 0.4003, "step": 2796 }, { "epoch": 0.07717538637704054, "grad_norm": 0.0032115073408931494, "learning_rate": 0.001, "loss": 0.3559, "step": 2797 }, { "epoch": 0.0772029785781049, "grad_norm": 0.006297094281762838, "learning_rate": 0.001, "loss": 0.3917, "step": 2798 }, { "epoch": 0.07723057077916927, "grad_norm": 0.00496833398938179, "learning_rate": 0.001, "loss": 0.3812, "step": 2799 }, { "epoch": 0.07725816298023364, "grad_norm": 0.002831167308613658, "learning_rate": 0.001, "loss": 0.3944, "step": 2800 }, { "epoch": 0.077285755181298, "grad_norm": 0.004788990132510662, "learning_rate": 0.001, "loss": 0.4015, "step": 2801 }, { "epoch": 0.07731334738236237, "grad_norm": 0.0026512015610933304, "learning_rate": 0.001, "loss": 0.3873, "step": 2802 }, { "epoch": 0.07734093958342675, "grad_norm": 0.003184010973200202, "learning_rate": 0.001, "loss": 0.3668, "step": 2803 }, { "epoch": 0.07736853178449112, "grad_norm": 0.003882109420374036, "learning_rate": 0.001, "loss": 0.3792, "step": 2804 }, { "epoch": 0.07739612398555548, "grad_norm": 0.003275086637586355, "learning_rate": 0.001, "loss": 0.4097, "step": 2805 }, { "epoch": 0.07742371618661985, "grad_norm": 0.0032583875581622124, "learning_rate": 0.001, "loss": 0.4064, "step": 2806 }, { "epoch": 0.07745130838768421, "grad_norm": 0.0029904379043728113, "learning_rate": 0.001, "loss": 0.3788, "step": 2807 }, { "epoch": 0.0774789005887486, "grad_norm": 0.0035862699151039124, "learning_rate": 0.001, "loss": 0.3749, "step": 2808 }, { "epoch": 0.07750649278981296, "grad_norm": 0.003321669064462185, "learning_rate": 0.001, "loss": 0.3679, "step": 2809 }, { "epoch": 0.07753408499087733, "grad_norm": 0.002322467975318432, "learning_rate": 0.001, "loss": 0.4202, "step": 2810 }, { "epoch": 0.0775616771919417, "grad_norm": 0.0043731010518968105, "learning_rate": 0.001, "loss": 0.3979, "step": 2811 }, { "epoch": 0.07758926939300606, "grad_norm": 0.003245170461013913, "learning_rate": 0.001, "loss": 0.412, "step": 2812 }, { "epoch": 0.07761686159407044, "grad_norm": 0.0024038052652031183, "learning_rate": 0.001, "loss": 0.3928, "step": 2813 }, { "epoch": 0.07764445379513481, "grad_norm": 0.0031776116229593754, "learning_rate": 0.001, "loss": 0.3959, "step": 2814 }, { "epoch": 0.07767204599619917, "grad_norm": 0.0029750748071819544, "learning_rate": 0.001, "loss": 0.4302, "step": 2815 }, { "epoch": 0.07769963819726354, "grad_norm": 0.0033383795525878668, "learning_rate": 0.001, "loss": 0.4222, "step": 2816 }, { "epoch": 0.0777272303983279, "grad_norm": 0.00494232214987278, "learning_rate": 0.001, "loss": 0.3798, "step": 2817 }, { "epoch": 0.07775482259939229, "grad_norm": 0.0031302745919674635, "learning_rate": 0.001, "loss": 0.4021, "step": 2818 }, { "epoch": 0.07778241480045665, "grad_norm": 0.0038369682151824236, "learning_rate": 0.001, "loss": 0.4356, "step": 2819 }, { "epoch": 0.07781000700152102, "grad_norm": 0.00343205570243299, "learning_rate": 0.001, "loss": 0.4297, "step": 2820 }, { "epoch": 0.07783759920258539, "grad_norm": 0.005272636190056801, "learning_rate": 0.001, "loss": 0.4319, "step": 2821 }, { "epoch": 0.07786519140364975, "grad_norm": 0.006007963325828314, "learning_rate": 0.001, "loss": 0.3682, "step": 2822 }, { "epoch": 0.07789278360471413, "grad_norm": 0.004388149362057447, "learning_rate": 0.001, "loss": 0.3658, "step": 2823 }, { "epoch": 0.0779203758057785, "grad_norm": 0.006076582707464695, "learning_rate": 0.001, "loss": 0.3637, "step": 2824 }, { "epoch": 0.07794796800684287, "grad_norm": 0.0026879182551056147, "learning_rate": 0.001, "loss": 0.4033, "step": 2825 }, { "epoch": 0.07797556020790723, "grad_norm": 0.003195406636223197, "learning_rate": 0.001, "loss": 0.4289, "step": 2826 }, { "epoch": 0.0780031524089716, "grad_norm": 0.003109849989414215, "learning_rate": 0.001, "loss": 0.4082, "step": 2827 }, { "epoch": 0.07803074461003598, "grad_norm": 0.004339446779340506, "learning_rate": 0.001, "loss": 0.3928, "step": 2828 }, { "epoch": 0.07805833681110035, "grad_norm": 0.003388351993635297, "learning_rate": 0.001, "loss": 0.3823, "step": 2829 }, { "epoch": 0.07808592901216471, "grad_norm": 0.0037827894557267427, "learning_rate": 0.001, "loss": 0.3721, "step": 2830 }, { "epoch": 0.07811352121322908, "grad_norm": 0.006149903871119022, "learning_rate": 0.001, "loss": 0.3868, "step": 2831 }, { "epoch": 0.07814111341429344, "grad_norm": 0.0027368527371436357, "learning_rate": 0.001, "loss": 0.4037, "step": 2832 }, { "epoch": 0.07816870561535783, "grad_norm": 0.004948263522237539, "learning_rate": 0.001, "loss": 0.4146, "step": 2833 }, { "epoch": 0.07819629781642219, "grad_norm": 0.017221815884113312, "learning_rate": 0.001, "loss": 0.3695, "step": 2834 }, { "epoch": 0.07822389001748656, "grad_norm": 0.004491179715842009, "learning_rate": 0.001, "loss": 0.4319, "step": 2835 }, { "epoch": 0.07825148221855092, "grad_norm": 0.004400896839797497, "learning_rate": 0.001, "loss": 0.3943, "step": 2836 }, { "epoch": 0.07827907441961529, "grad_norm": 0.0028574098832905293, "learning_rate": 0.001, "loss": 0.4322, "step": 2837 }, { "epoch": 0.07830666662067967, "grad_norm": 0.005395799409598112, "learning_rate": 0.001, "loss": 0.4221, "step": 2838 }, { "epoch": 0.07833425882174404, "grad_norm": 0.004169132094830275, "learning_rate": 0.001, "loss": 0.3925, "step": 2839 }, { "epoch": 0.0783618510228084, "grad_norm": 0.0036933980882167816, "learning_rate": 0.001, "loss": 0.4086, "step": 2840 }, { "epoch": 0.07838944322387277, "grad_norm": 0.0037820383440703154, "learning_rate": 0.001, "loss": 0.4078, "step": 2841 }, { "epoch": 0.07841703542493714, "grad_norm": 0.00365295703522861, "learning_rate": 0.001, "loss": 0.4157, "step": 2842 }, { "epoch": 0.07844462762600152, "grad_norm": 0.0040397047996521, "learning_rate": 0.001, "loss": 0.4155, "step": 2843 }, { "epoch": 0.07847221982706588, "grad_norm": 0.0027921211440116167, "learning_rate": 0.001, "loss": 0.4173, "step": 2844 }, { "epoch": 0.07849981202813025, "grad_norm": 0.002542336704209447, "learning_rate": 0.001, "loss": 0.4587, "step": 2845 }, { "epoch": 0.07852740422919462, "grad_norm": 0.0032813462894409895, "learning_rate": 0.001, "loss": 0.4137, "step": 2846 }, { "epoch": 0.07855499643025898, "grad_norm": 0.0026641942095011473, "learning_rate": 0.001, "loss": 0.3809, "step": 2847 }, { "epoch": 0.07858258863132335, "grad_norm": 0.0045136939734220505, "learning_rate": 0.001, "loss": 0.4269, "step": 2848 }, { "epoch": 0.07861018083238773, "grad_norm": 0.003331273328512907, "learning_rate": 0.001, "loss": 0.4074, "step": 2849 }, { "epoch": 0.0786377730334521, "grad_norm": 0.0029903652612119913, "learning_rate": 0.001, "loss": 0.3965, "step": 2850 }, { "epoch": 0.07866536523451646, "grad_norm": 0.003270956454798579, "learning_rate": 0.001, "loss": 0.4201, "step": 2851 }, { "epoch": 0.07869295743558083, "grad_norm": 0.0025751839857548475, "learning_rate": 0.001, "loss": 0.3856, "step": 2852 }, { "epoch": 0.0787205496366452, "grad_norm": 0.004237758927047253, "learning_rate": 0.001, "loss": 0.4029, "step": 2853 }, { "epoch": 0.07874814183770958, "grad_norm": 0.005102077033370733, "learning_rate": 0.001, "loss": 0.4498, "step": 2854 }, { "epoch": 0.07877573403877394, "grad_norm": 0.003620270872488618, "learning_rate": 0.001, "loss": 0.4124, "step": 2855 }, { "epoch": 0.07880332623983831, "grad_norm": 0.003328984137624502, "learning_rate": 0.001, "loss": 0.3582, "step": 2856 }, { "epoch": 0.07883091844090268, "grad_norm": 0.005346687976270914, "learning_rate": 0.001, "loss": 0.4406, "step": 2857 }, { "epoch": 0.07885851064196704, "grad_norm": 0.002766883932054043, "learning_rate": 0.001, "loss": 0.4048, "step": 2858 }, { "epoch": 0.07888610284303142, "grad_norm": 0.0039212643168866634, "learning_rate": 0.001, "loss": 0.3732, "step": 2859 }, { "epoch": 0.07891369504409579, "grad_norm": 0.00360241811722517, "learning_rate": 0.001, "loss": 0.4059, "step": 2860 }, { "epoch": 0.07894128724516015, "grad_norm": 0.009609011001884937, "learning_rate": 0.001, "loss": 0.3921, "step": 2861 }, { "epoch": 0.07896887944622452, "grad_norm": 0.004263875540345907, "learning_rate": 0.001, "loss": 0.4211, "step": 2862 }, { "epoch": 0.07899647164728889, "grad_norm": 0.003934512846171856, "learning_rate": 0.001, "loss": 0.3985, "step": 2863 }, { "epoch": 0.07902406384835327, "grad_norm": 0.0042722225189208984, "learning_rate": 0.001, "loss": 0.3809, "step": 2864 }, { "epoch": 0.07905165604941763, "grad_norm": 0.0036077832337468863, "learning_rate": 0.001, "loss": 0.3958, "step": 2865 }, { "epoch": 0.079079248250482, "grad_norm": 0.0027798449154943228, "learning_rate": 0.001, "loss": 0.4138, "step": 2866 }, { "epoch": 0.07910684045154637, "grad_norm": 0.0025270867627114058, "learning_rate": 0.001, "loss": 0.4042, "step": 2867 }, { "epoch": 0.07913443265261073, "grad_norm": 0.0037853806279599667, "learning_rate": 0.001, "loss": 0.4124, "step": 2868 }, { "epoch": 0.07916202485367511, "grad_norm": 0.004050467163324356, "learning_rate": 0.001, "loss": 0.4246, "step": 2869 }, { "epoch": 0.07918961705473948, "grad_norm": 0.002820851979777217, "learning_rate": 0.001, "loss": 0.3781, "step": 2870 }, { "epoch": 0.07921720925580385, "grad_norm": 0.007634916342794895, "learning_rate": 0.001, "loss": 0.376, "step": 2871 }, { "epoch": 0.07924480145686821, "grad_norm": 0.004927767440676689, "learning_rate": 0.001, "loss": 0.3862, "step": 2872 }, { "epoch": 0.07927239365793258, "grad_norm": 0.00282577658072114, "learning_rate": 0.001, "loss": 0.4018, "step": 2873 }, { "epoch": 0.07929998585899696, "grad_norm": 0.0034970357082784176, "learning_rate": 0.001, "loss": 0.3994, "step": 2874 }, { "epoch": 0.07932757806006133, "grad_norm": 0.00328719150274992, "learning_rate": 0.001, "loss": 0.3795, "step": 2875 }, { "epoch": 0.0793551702611257, "grad_norm": 0.0030249380506575108, "learning_rate": 0.001, "loss": 0.3827, "step": 2876 }, { "epoch": 0.07938276246219006, "grad_norm": 0.0026030796580016613, "learning_rate": 0.001, "loss": 0.4465, "step": 2877 }, { "epoch": 0.07941035466325443, "grad_norm": 0.0026747360825538635, "learning_rate": 0.001, "loss": 0.4162, "step": 2878 }, { "epoch": 0.0794379468643188, "grad_norm": 0.0031104108784347773, "learning_rate": 0.001, "loss": 0.4035, "step": 2879 }, { "epoch": 0.07946553906538317, "grad_norm": 0.0029512427281588316, "learning_rate": 0.001, "loss": 0.426, "step": 2880 }, { "epoch": 0.07949313126644754, "grad_norm": 0.0036311009898781776, "learning_rate": 0.001, "loss": 0.385, "step": 2881 }, { "epoch": 0.0795207234675119, "grad_norm": 0.0028290299233049154, "learning_rate": 0.001, "loss": 0.403, "step": 2882 }, { "epoch": 0.07954831566857627, "grad_norm": 0.0037825354374945164, "learning_rate": 0.001, "loss": 0.4213, "step": 2883 }, { "epoch": 0.07957590786964065, "grad_norm": 0.004042259883135557, "learning_rate": 0.001, "loss": 0.4346, "step": 2884 }, { "epoch": 0.07960350007070502, "grad_norm": 0.0031280801631510258, "learning_rate": 0.001, "loss": 0.3606, "step": 2885 }, { "epoch": 0.07963109227176939, "grad_norm": 0.003348682075738907, "learning_rate": 0.001, "loss": 0.3971, "step": 2886 }, { "epoch": 0.07965868447283375, "grad_norm": 0.003773730481043458, "learning_rate": 0.001, "loss": 0.4046, "step": 2887 }, { "epoch": 0.07968627667389812, "grad_norm": 0.0028589165303856134, "learning_rate": 0.001, "loss": 0.3763, "step": 2888 }, { "epoch": 0.0797138688749625, "grad_norm": 0.003061666851863265, "learning_rate": 0.001, "loss": 0.4058, "step": 2889 }, { "epoch": 0.07974146107602686, "grad_norm": 0.002353568095713854, "learning_rate": 0.001, "loss": 0.3691, "step": 2890 }, { "epoch": 0.07976905327709123, "grad_norm": 0.0026803589425981045, "learning_rate": 0.001, "loss": 0.3797, "step": 2891 }, { "epoch": 0.0797966454781556, "grad_norm": 0.0032909938599914312, "learning_rate": 0.001, "loss": 0.4074, "step": 2892 }, { "epoch": 0.07982423767921996, "grad_norm": 0.005034049041569233, "learning_rate": 0.001, "loss": 0.3969, "step": 2893 }, { "epoch": 0.07985182988028433, "grad_norm": 0.003058070782572031, "learning_rate": 0.001, "loss": 0.3646, "step": 2894 }, { "epoch": 0.07987942208134871, "grad_norm": 0.0028637642972171307, "learning_rate": 0.001, "loss": 0.3609, "step": 2895 }, { "epoch": 0.07990701428241308, "grad_norm": 0.004128556232899427, "learning_rate": 0.001, "loss": 0.391, "step": 2896 }, { "epoch": 0.07993460648347744, "grad_norm": 0.0060408939607441425, "learning_rate": 0.001, "loss": 0.3351, "step": 2897 }, { "epoch": 0.07996219868454181, "grad_norm": 0.0026880092918872833, "learning_rate": 0.001, "loss": 0.3596, "step": 2898 }, { "epoch": 0.07998979088560618, "grad_norm": 0.00707295211032033, "learning_rate": 0.001, "loss": 0.3579, "step": 2899 }, { "epoch": 0.08001738308667056, "grad_norm": 0.0043478901498019695, "learning_rate": 0.001, "loss": 0.4017, "step": 2900 }, { "epoch": 0.08004497528773492, "grad_norm": 0.0034000363666564226, "learning_rate": 0.001, "loss": 0.387, "step": 2901 }, { "epoch": 0.08007256748879929, "grad_norm": 0.004060294013470411, "learning_rate": 0.001, "loss": 0.3805, "step": 2902 }, { "epoch": 0.08010015968986366, "grad_norm": 0.0032872636802494526, "learning_rate": 0.001, "loss": 0.3986, "step": 2903 }, { "epoch": 0.08012775189092802, "grad_norm": 0.006337369792163372, "learning_rate": 0.001, "loss": 0.3986, "step": 2904 }, { "epoch": 0.0801553440919924, "grad_norm": 0.002969271270558238, "learning_rate": 0.001, "loss": 0.4192, "step": 2905 }, { "epoch": 0.08018293629305677, "grad_norm": 0.002581764478236437, "learning_rate": 0.001, "loss": 0.3994, "step": 2906 }, { "epoch": 0.08021052849412114, "grad_norm": 0.0030686580576002598, "learning_rate": 0.001, "loss": 0.4259, "step": 2907 }, { "epoch": 0.0802381206951855, "grad_norm": 0.0032976726070046425, "learning_rate": 0.001, "loss": 0.4098, "step": 2908 }, { "epoch": 0.08026571289624987, "grad_norm": 0.0037495982833206654, "learning_rate": 0.001, "loss": 0.3674, "step": 2909 }, { "epoch": 0.08029330509731425, "grad_norm": 0.0030835745856165886, "learning_rate": 0.001, "loss": 0.3899, "step": 2910 }, { "epoch": 0.08032089729837862, "grad_norm": 0.006115986034274101, "learning_rate": 0.001, "loss": 0.4111, "step": 2911 }, { "epoch": 0.08034848949944298, "grad_norm": 0.003554831026121974, "learning_rate": 0.001, "loss": 0.4173, "step": 2912 }, { "epoch": 0.08037608170050735, "grad_norm": 0.0033155917190015316, "learning_rate": 0.001, "loss": 0.4169, "step": 2913 }, { "epoch": 0.08040367390157172, "grad_norm": 0.0027983803302049637, "learning_rate": 0.001, "loss": 0.4004, "step": 2914 }, { "epoch": 0.0804312661026361, "grad_norm": 0.0023535455111414194, "learning_rate": 0.001, "loss": 0.4107, "step": 2915 }, { "epoch": 0.08045885830370046, "grad_norm": 0.002943321131169796, "learning_rate": 0.001, "loss": 0.4106, "step": 2916 }, { "epoch": 0.08048645050476483, "grad_norm": 0.00304407742805779, "learning_rate": 0.001, "loss": 0.372, "step": 2917 }, { "epoch": 0.0805140427058292, "grad_norm": 0.0032863402739167213, "learning_rate": 0.001, "loss": 0.4026, "step": 2918 }, { "epoch": 0.08054163490689356, "grad_norm": 0.0027361640240997076, "learning_rate": 0.001, "loss": 0.3709, "step": 2919 }, { "epoch": 0.08056922710795794, "grad_norm": 0.0026063849218189716, "learning_rate": 0.001, "loss": 0.4095, "step": 2920 }, { "epoch": 0.08059681930902231, "grad_norm": 0.0022036924492567778, "learning_rate": 0.001, "loss": 0.4303, "step": 2921 }, { "epoch": 0.08062441151008667, "grad_norm": 0.0023288466036319733, "learning_rate": 0.001, "loss": 0.4181, "step": 2922 }, { "epoch": 0.08065200371115104, "grad_norm": 0.0029277384746819735, "learning_rate": 0.001, "loss": 0.4015, "step": 2923 }, { "epoch": 0.08067959591221541, "grad_norm": 0.0061630988493561745, "learning_rate": 0.001, "loss": 0.3958, "step": 2924 }, { "epoch": 0.08070718811327979, "grad_norm": 0.0035532654728740454, "learning_rate": 0.001, "loss": 0.3883, "step": 2925 }, { "epoch": 0.08073478031434415, "grad_norm": 0.0036336968187242746, "learning_rate": 0.001, "loss": 0.38, "step": 2926 }, { "epoch": 0.08076237251540852, "grad_norm": 0.004034141544252634, "learning_rate": 0.001, "loss": 0.4127, "step": 2927 }, { "epoch": 0.08078996471647289, "grad_norm": 0.0049674310721457005, "learning_rate": 0.001, "loss": 0.3995, "step": 2928 }, { "epoch": 0.08081755691753725, "grad_norm": 0.00383196328766644, "learning_rate": 0.001, "loss": 0.4193, "step": 2929 }, { "epoch": 0.08084514911860163, "grad_norm": 0.002637132303789258, "learning_rate": 0.001, "loss": 0.3848, "step": 2930 }, { "epoch": 0.080872741319666, "grad_norm": 0.004422449506819248, "learning_rate": 0.001, "loss": 0.4159, "step": 2931 }, { "epoch": 0.08090033352073037, "grad_norm": 0.005644423421472311, "learning_rate": 0.001, "loss": 0.3794, "step": 2932 }, { "epoch": 0.08092792572179473, "grad_norm": 0.0029548918828368187, "learning_rate": 0.001, "loss": 0.4132, "step": 2933 }, { "epoch": 0.0809555179228591, "grad_norm": 0.0033653799910098314, "learning_rate": 0.001, "loss": 0.407, "step": 2934 }, { "epoch": 0.08098311012392348, "grad_norm": 0.0336541123688221, "learning_rate": 0.001, "loss": 0.4278, "step": 2935 }, { "epoch": 0.08101070232498785, "grad_norm": 0.005585819482803345, "learning_rate": 0.001, "loss": 0.3806, "step": 2936 }, { "epoch": 0.08103829452605221, "grad_norm": 0.0023465941194444895, "learning_rate": 0.001, "loss": 0.462, "step": 2937 }, { "epoch": 0.08106588672711658, "grad_norm": 0.003671723185107112, "learning_rate": 0.001, "loss": 0.4217, "step": 2938 }, { "epoch": 0.08109347892818095, "grad_norm": 0.002763295080512762, "learning_rate": 0.001, "loss": 0.3752, "step": 2939 }, { "epoch": 0.08112107112924533, "grad_norm": 0.0028186712879687548, "learning_rate": 0.001, "loss": 0.3913, "step": 2940 }, { "epoch": 0.08114866333030969, "grad_norm": 0.0023679453879594803, "learning_rate": 0.001, "loss": 0.3965, "step": 2941 }, { "epoch": 0.08117625553137406, "grad_norm": 0.002650237875059247, "learning_rate": 0.001, "loss": 0.4037, "step": 2942 }, { "epoch": 0.08120384773243843, "grad_norm": 0.0028579425998032093, "learning_rate": 0.001, "loss": 0.421, "step": 2943 }, { "epoch": 0.08123143993350279, "grad_norm": 0.003209290560334921, "learning_rate": 0.001, "loss": 0.4071, "step": 2944 }, { "epoch": 0.08125903213456716, "grad_norm": 0.0023669025395065546, "learning_rate": 0.001, "loss": 0.4131, "step": 2945 }, { "epoch": 0.08128662433563154, "grad_norm": 0.002830538898706436, "learning_rate": 0.001, "loss": 0.3586, "step": 2946 }, { "epoch": 0.0813142165366959, "grad_norm": 0.0034998899791389704, "learning_rate": 0.001, "loss": 0.4271, "step": 2947 }, { "epoch": 0.08134180873776027, "grad_norm": 0.003356503788381815, "learning_rate": 0.001, "loss": 0.3916, "step": 2948 }, { "epoch": 0.08136940093882464, "grad_norm": 0.0027026510797441006, "learning_rate": 0.001, "loss": 0.3844, "step": 2949 }, { "epoch": 0.081396993139889, "grad_norm": 0.0025660600513219833, "learning_rate": 0.001, "loss": 0.4176, "step": 2950 }, { "epoch": 0.08142458534095338, "grad_norm": 0.0029600337147712708, "learning_rate": 0.001, "loss": 0.4324, "step": 2951 }, { "epoch": 0.08145217754201775, "grad_norm": 0.00301582389511168, "learning_rate": 0.001, "loss": 0.373, "step": 2952 }, { "epoch": 0.08147976974308212, "grad_norm": 0.0027227008249610662, "learning_rate": 0.001, "loss": 0.4082, "step": 2953 }, { "epoch": 0.08150736194414648, "grad_norm": 0.004939099308103323, "learning_rate": 0.001, "loss": 0.3956, "step": 2954 }, { "epoch": 0.08153495414521085, "grad_norm": 0.0038117829244583845, "learning_rate": 0.001, "loss": 0.3908, "step": 2955 }, { "epoch": 0.08156254634627523, "grad_norm": 0.002671457827091217, "learning_rate": 0.001, "loss": 0.4099, "step": 2956 }, { "epoch": 0.0815901385473396, "grad_norm": 0.0051042488776147366, "learning_rate": 0.001, "loss": 0.4025, "step": 2957 }, { "epoch": 0.08161773074840396, "grad_norm": 0.013458254747092724, "learning_rate": 0.001, "loss": 0.4205, "step": 2958 }, { "epoch": 0.08164532294946833, "grad_norm": 0.004344523418694735, "learning_rate": 0.001, "loss": 0.374, "step": 2959 }, { "epoch": 0.0816729151505327, "grad_norm": 0.003277859417721629, "learning_rate": 0.001, "loss": 0.4002, "step": 2960 }, { "epoch": 0.08170050735159708, "grad_norm": 0.007647217717021704, "learning_rate": 0.001, "loss": 0.4151, "step": 2961 }, { "epoch": 0.08172809955266144, "grad_norm": 0.0031640264205634594, "learning_rate": 0.001, "loss": 0.4208, "step": 2962 }, { "epoch": 0.08175569175372581, "grad_norm": 0.00352554302662611, "learning_rate": 0.001, "loss": 0.4167, "step": 2963 }, { "epoch": 0.08178328395479018, "grad_norm": 0.004366365727037191, "learning_rate": 0.001, "loss": 0.4115, "step": 2964 }, { "epoch": 0.08181087615585454, "grad_norm": 0.0043410733342170715, "learning_rate": 0.001, "loss": 0.3822, "step": 2965 }, { "epoch": 0.08183846835691892, "grad_norm": 0.0039422293193638325, "learning_rate": 0.001, "loss": 0.4293, "step": 2966 }, { "epoch": 0.08186606055798329, "grad_norm": 0.0037924828939139843, "learning_rate": 0.001, "loss": 0.3922, "step": 2967 }, { "epoch": 0.08189365275904766, "grad_norm": 0.007280276622623205, "learning_rate": 0.001, "loss": 0.4094, "step": 2968 }, { "epoch": 0.08192124496011202, "grad_norm": 0.0051049706526100636, "learning_rate": 0.001, "loss": 0.3825, "step": 2969 }, { "epoch": 0.08194883716117639, "grad_norm": 0.003454606281593442, "learning_rate": 0.001, "loss": 0.4029, "step": 2970 }, { "epoch": 0.08197642936224077, "grad_norm": 0.0029868248384445906, "learning_rate": 0.001, "loss": 0.3969, "step": 2971 }, { "epoch": 0.08200402156330514, "grad_norm": 0.004341921303421259, "learning_rate": 0.001, "loss": 0.4353, "step": 2972 }, { "epoch": 0.0820316137643695, "grad_norm": 0.003885595127940178, "learning_rate": 0.001, "loss": 0.4144, "step": 2973 }, { "epoch": 0.08205920596543387, "grad_norm": 0.0032196505926549435, "learning_rate": 0.001, "loss": 0.3865, "step": 2974 }, { "epoch": 0.08208679816649823, "grad_norm": 0.004508745390921831, "learning_rate": 0.001, "loss": 0.4095, "step": 2975 }, { "epoch": 0.08211439036756261, "grad_norm": 0.0032329261302948, "learning_rate": 0.001, "loss": 0.3922, "step": 2976 }, { "epoch": 0.08214198256862698, "grad_norm": 0.004983431659638882, "learning_rate": 0.001, "loss": 0.407, "step": 2977 }, { "epoch": 0.08216957476969135, "grad_norm": 0.03812706470489502, "learning_rate": 0.001, "loss": 0.3978, "step": 2978 }, { "epoch": 0.08219716697075571, "grad_norm": 0.002776419511064887, "learning_rate": 0.001, "loss": 0.3993, "step": 2979 }, { "epoch": 0.08222475917182008, "grad_norm": 0.0030240516643971205, "learning_rate": 0.001, "loss": 0.4176, "step": 2980 }, { "epoch": 0.08225235137288446, "grad_norm": 0.0031225322745740414, "learning_rate": 0.001, "loss": 0.3519, "step": 2981 }, { "epoch": 0.08227994357394883, "grad_norm": 0.002751345979049802, "learning_rate": 0.001, "loss": 0.398, "step": 2982 }, { "epoch": 0.0823075357750132, "grad_norm": 0.003123503876850009, "learning_rate": 0.001, "loss": 0.4157, "step": 2983 }, { "epoch": 0.08233512797607756, "grad_norm": 0.004268140532076359, "learning_rate": 0.001, "loss": 0.4198, "step": 2984 }, { "epoch": 0.08236272017714193, "grad_norm": 0.002731149084866047, "learning_rate": 0.001, "loss": 0.4103, "step": 2985 }, { "epoch": 0.0823903123782063, "grad_norm": 0.002880630549043417, "learning_rate": 0.001, "loss": 0.4146, "step": 2986 }, { "epoch": 0.08241790457927067, "grad_norm": 0.008699797093868256, "learning_rate": 0.001, "loss": 0.4313, "step": 2987 }, { "epoch": 0.08244549678033504, "grad_norm": 0.0032247393392026424, "learning_rate": 0.001, "loss": 0.4263, "step": 2988 }, { "epoch": 0.0824730889813994, "grad_norm": 0.003280255477875471, "learning_rate": 0.001, "loss": 0.4152, "step": 2989 }, { "epoch": 0.08250068118246377, "grad_norm": 0.0024324634578078985, "learning_rate": 0.001, "loss": 0.4188, "step": 2990 }, { "epoch": 0.08252827338352814, "grad_norm": 0.004013401456177235, "learning_rate": 0.001, "loss": 0.3778, "step": 2991 }, { "epoch": 0.08255586558459252, "grad_norm": 0.0036929554771631956, "learning_rate": 0.001, "loss": 0.3752, "step": 2992 }, { "epoch": 0.08258345778565689, "grad_norm": 0.003950448241084814, "learning_rate": 0.001, "loss": 0.3971, "step": 2993 }, { "epoch": 0.08261104998672125, "grad_norm": 0.004668880719691515, "learning_rate": 0.001, "loss": 0.4134, "step": 2994 }, { "epoch": 0.08263864218778562, "grad_norm": 0.002737791510298848, "learning_rate": 0.001, "loss": 0.4388, "step": 2995 }, { "epoch": 0.08266623438884999, "grad_norm": 0.007926344871520996, "learning_rate": 0.001, "loss": 0.3542, "step": 2996 }, { "epoch": 0.08269382658991437, "grad_norm": 0.003492021933197975, "learning_rate": 0.001, "loss": 0.4196, "step": 2997 }, { "epoch": 0.08272141879097873, "grad_norm": 0.0026433023158460855, "learning_rate": 0.001, "loss": 0.3852, "step": 2998 }, { "epoch": 0.0827490109920431, "grad_norm": 0.0029606178868561983, "learning_rate": 0.001, "loss": 0.4082, "step": 2999 }, { "epoch": 0.08277660319310746, "grad_norm": 0.00282181310467422, "learning_rate": 0.001, "loss": 0.4111, "step": 3000 }, { "epoch": 0.08277660319310746, "eval_runtime": 24.5847, "eval_samples_per_second": 1.302, "eval_steps_per_second": 0.163, "step": 3000 }, { "epoch": 0.08280419539417183, "grad_norm": 0.004229205194860697, "learning_rate": 0.001, "loss": 0.3982, "step": 3001 }, { "epoch": 0.08283178759523621, "grad_norm": 0.002889542607590556, "learning_rate": 0.001, "loss": 0.4226, "step": 3002 }, { "epoch": 0.08285937979630058, "grad_norm": 0.0026309038512408733, "learning_rate": 0.001, "loss": 0.4207, "step": 3003 }, { "epoch": 0.08288697199736494, "grad_norm": 0.002682015299797058, "learning_rate": 0.001, "loss": 0.4143, "step": 3004 }, { "epoch": 0.08291456419842931, "grad_norm": 0.0026974889915436506, "learning_rate": 0.001, "loss": 0.3976, "step": 3005 }, { "epoch": 0.08294215639949368, "grad_norm": 0.0023957311641424894, "learning_rate": 0.001, "loss": 0.4082, "step": 3006 }, { "epoch": 0.08296974860055806, "grad_norm": 0.004567582160234451, "learning_rate": 0.001, "loss": 0.4249, "step": 3007 }, { "epoch": 0.08299734080162242, "grad_norm": 0.004289150703698397, "learning_rate": 0.001, "loss": 0.4116, "step": 3008 }, { "epoch": 0.08302493300268679, "grad_norm": 0.004455960355699062, "learning_rate": 0.001, "loss": 0.4003, "step": 3009 }, { "epoch": 0.08305252520375116, "grad_norm": 0.005996396765112877, "learning_rate": 0.001, "loss": 0.3885, "step": 3010 }, { "epoch": 0.08308011740481552, "grad_norm": 0.004294134210795164, "learning_rate": 0.001, "loss": 0.3705, "step": 3011 }, { "epoch": 0.0831077096058799, "grad_norm": 0.0030041569843888283, "learning_rate": 0.001, "loss": 0.3946, "step": 3012 }, { "epoch": 0.08313530180694427, "grad_norm": 0.0038454660680145025, "learning_rate": 0.001, "loss": 0.378, "step": 3013 }, { "epoch": 0.08316289400800864, "grad_norm": 0.003388310084119439, "learning_rate": 0.001, "loss": 0.4139, "step": 3014 }, { "epoch": 0.083190486209073, "grad_norm": 0.003318838309496641, "learning_rate": 0.001, "loss": 0.38, "step": 3015 }, { "epoch": 0.08321807841013737, "grad_norm": 0.002537375781685114, "learning_rate": 0.001, "loss": 0.4243, "step": 3016 }, { "epoch": 0.08324567061120175, "grad_norm": 0.00318319583311677, "learning_rate": 0.001, "loss": 0.408, "step": 3017 }, { "epoch": 0.08327326281226612, "grad_norm": 0.004292814992368221, "learning_rate": 0.001, "loss": 0.3454, "step": 3018 }, { "epoch": 0.08330085501333048, "grad_norm": 0.0027642296627163887, "learning_rate": 0.001, "loss": 0.3928, "step": 3019 }, { "epoch": 0.08332844721439485, "grad_norm": 0.0034603141248226166, "learning_rate": 0.001, "loss": 0.4081, "step": 3020 }, { "epoch": 0.08335603941545922, "grad_norm": 0.006106778047978878, "learning_rate": 0.001, "loss": 0.3804, "step": 3021 }, { "epoch": 0.0833836316165236, "grad_norm": 0.002718998584896326, "learning_rate": 0.001, "loss": 0.4311, "step": 3022 }, { "epoch": 0.08341122381758796, "grad_norm": 0.002778289606794715, "learning_rate": 0.001, "loss": 0.3634, "step": 3023 }, { "epoch": 0.08343881601865233, "grad_norm": 0.002709881402552128, "learning_rate": 0.001, "loss": 0.4208, "step": 3024 }, { "epoch": 0.0834664082197167, "grad_norm": 0.004608054179698229, "learning_rate": 0.001, "loss": 0.3792, "step": 3025 }, { "epoch": 0.08349400042078106, "grad_norm": 0.0070383017882704735, "learning_rate": 0.001, "loss": 0.3838, "step": 3026 }, { "epoch": 0.08352159262184544, "grad_norm": 0.0029459171928465366, "learning_rate": 0.001, "loss": 0.3634, "step": 3027 }, { "epoch": 0.08354918482290981, "grad_norm": 0.002476966939866543, "learning_rate": 0.001, "loss": 0.4168, "step": 3028 }, { "epoch": 0.08357677702397417, "grad_norm": 0.0041242605075240135, "learning_rate": 0.001, "loss": 0.3643, "step": 3029 }, { "epoch": 0.08360436922503854, "grad_norm": 0.0027966529596596956, "learning_rate": 0.001, "loss": 0.4453, "step": 3030 }, { "epoch": 0.08363196142610291, "grad_norm": 0.0032828738912940025, "learning_rate": 0.001, "loss": 0.4019, "step": 3031 }, { "epoch": 0.08365955362716729, "grad_norm": 0.002733866684138775, "learning_rate": 0.001, "loss": 0.4003, "step": 3032 }, { "epoch": 0.08368714582823165, "grad_norm": 0.004932538606226444, "learning_rate": 0.001, "loss": 0.4004, "step": 3033 }, { "epoch": 0.08371473802929602, "grad_norm": 0.0026060608215630054, "learning_rate": 0.001, "loss": 0.3812, "step": 3034 }, { "epoch": 0.08374233023036039, "grad_norm": 0.0029096321668475866, "learning_rate": 0.001, "loss": 0.3949, "step": 3035 }, { "epoch": 0.08376992243142475, "grad_norm": 0.0027539224829524755, "learning_rate": 0.001, "loss": 0.4445, "step": 3036 }, { "epoch": 0.08379751463248912, "grad_norm": 0.00449875695630908, "learning_rate": 0.001, "loss": 0.3852, "step": 3037 }, { "epoch": 0.0838251068335535, "grad_norm": 0.0045863245613873005, "learning_rate": 0.001, "loss": 0.3814, "step": 3038 }, { "epoch": 0.08385269903461787, "grad_norm": 0.004102183040231466, "learning_rate": 0.001, "loss": 0.3675, "step": 3039 }, { "epoch": 0.08388029123568223, "grad_norm": 0.003232220420613885, "learning_rate": 0.001, "loss": 0.3727, "step": 3040 }, { "epoch": 0.0839078834367466, "grad_norm": 0.0032380821648985147, "learning_rate": 0.001, "loss": 0.4175, "step": 3041 }, { "epoch": 0.08393547563781097, "grad_norm": 0.0023660236038267612, "learning_rate": 0.001, "loss": 0.3769, "step": 3042 }, { "epoch": 0.08396306783887535, "grad_norm": 0.006015659775584936, "learning_rate": 0.001, "loss": 0.4042, "step": 3043 }, { "epoch": 0.08399066003993971, "grad_norm": 0.0027081884909421206, "learning_rate": 0.001, "loss": 0.4108, "step": 3044 }, { "epoch": 0.08401825224100408, "grad_norm": 0.002505830954760313, "learning_rate": 0.001, "loss": 0.3933, "step": 3045 }, { "epoch": 0.08404584444206845, "grad_norm": 0.002315351041033864, "learning_rate": 0.001, "loss": 0.4049, "step": 3046 }, { "epoch": 0.08407343664313281, "grad_norm": 0.003794366493821144, "learning_rate": 0.001, "loss": 0.4072, "step": 3047 }, { "epoch": 0.08410102884419719, "grad_norm": 0.006150163244456053, "learning_rate": 0.001, "loss": 0.3874, "step": 3048 }, { "epoch": 0.08412862104526156, "grad_norm": 0.00300444639287889, "learning_rate": 0.001, "loss": 0.3932, "step": 3049 }, { "epoch": 0.08415621324632593, "grad_norm": 0.0030877876561135054, "learning_rate": 0.001, "loss": 0.4026, "step": 3050 }, { "epoch": 0.08418380544739029, "grad_norm": 0.004085875116288662, "learning_rate": 0.001, "loss": 0.4016, "step": 3051 }, { "epoch": 0.08421139764845466, "grad_norm": 0.0042649428360164165, "learning_rate": 0.001, "loss": 0.3727, "step": 3052 }, { "epoch": 0.08423898984951904, "grad_norm": 0.002339342376217246, "learning_rate": 0.001, "loss": 0.3953, "step": 3053 }, { "epoch": 0.0842665820505834, "grad_norm": 0.0029005780816078186, "learning_rate": 0.001, "loss": 0.3983, "step": 3054 }, { "epoch": 0.08429417425164777, "grad_norm": 0.002606838010251522, "learning_rate": 0.001, "loss": 0.4083, "step": 3055 }, { "epoch": 0.08432176645271214, "grad_norm": 0.002856641774997115, "learning_rate": 0.001, "loss": 0.4205, "step": 3056 }, { "epoch": 0.0843493586537765, "grad_norm": 0.021601708605885506, "learning_rate": 0.001, "loss": 0.4017, "step": 3057 }, { "epoch": 0.08437695085484088, "grad_norm": 0.002950535388663411, "learning_rate": 0.001, "loss": 0.4361, "step": 3058 }, { "epoch": 0.08440454305590525, "grad_norm": 0.0030242663342505693, "learning_rate": 0.001, "loss": 0.3981, "step": 3059 }, { "epoch": 0.08443213525696962, "grad_norm": 0.002619365695863962, "learning_rate": 0.001, "loss": 0.4011, "step": 3060 }, { "epoch": 0.08445972745803398, "grad_norm": 0.0032321936450898647, "learning_rate": 0.001, "loss": 0.3978, "step": 3061 }, { "epoch": 0.08448731965909835, "grad_norm": 0.0033744669053703547, "learning_rate": 0.001, "loss": 0.3652, "step": 3062 }, { "epoch": 0.08451491186016273, "grad_norm": 0.0027905574534088373, "learning_rate": 0.001, "loss": 0.3748, "step": 3063 }, { "epoch": 0.0845425040612271, "grad_norm": 0.0028084227815270424, "learning_rate": 0.001, "loss": 0.4155, "step": 3064 }, { "epoch": 0.08457009626229146, "grad_norm": 0.0034166318364441395, "learning_rate": 0.001, "loss": 0.3733, "step": 3065 }, { "epoch": 0.08459768846335583, "grad_norm": 0.0029833640437573195, "learning_rate": 0.001, "loss": 0.4231, "step": 3066 }, { "epoch": 0.0846252806644202, "grad_norm": 0.002973797731101513, "learning_rate": 0.001, "loss": 0.4345, "step": 3067 }, { "epoch": 0.08465287286548458, "grad_norm": 0.0027056816034018993, "learning_rate": 0.001, "loss": 0.3766, "step": 3068 }, { "epoch": 0.08468046506654894, "grad_norm": 0.008972891606390476, "learning_rate": 0.001, "loss": 0.4121, "step": 3069 }, { "epoch": 0.08470805726761331, "grad_norm": 0.003379418281838298, "learning_rate": 0.001, "loss": 0.4089, "step": 3070 }, { "epoch": 0.08473564946867768, "grad_norm": 0.004911572206765413, "learning_rate": 0.001, "loss": 0.3753, "step": 3071 }, { "epoch": 0.08476324166974204, "grad_norm": 0.003271787893027067, "learning_rate": 0.001, "loss": 0.4428, "step": 3072 }, { "epoch": 0.08479083387080642, "grad_norm": 0.0034387686755508184, "learning_rate": 0.001, "loss": 0.3708, "step": 3073 }, { "epoch": 0.08481842607187079, "grad_norm": 0.004575818777084351, "learning_rate": 0.001, "loss": 0.4035, "step": 3074 }, { "epoch": 0.08484601827293516, "grad_norm": 0.040848325937986374, "learning_rate": 0.001, "loss": 0.3992, "step": 3075 }, { "epoch": 0.08487361047399952, "grad_norm": 0.009725205600261688, "learning_rate": 0.001, "loss": 0.3652, "step": 3076 }, { "epoch": 0.08490120267506389, "grad_norm": 0.008186236955225468, "learning_rate": 0.001, "loss": 0.4051, "step": 3077 }, { "epoch": 0.08492879487612827, "grad_norm": 0.0035197827965021133, "learning_rate": 0.001, "loss": 0.445, "step": 3078 }, { "epoch": 0.08495638707719264, "grad_norm": 0.002530729863792658, "learning_rate": 0.001, "loss": 0.3985, "step": 3079 }, { "epoch": 0.084983979278257, "grad_norm": 0.002124677412211895, "learning_rate": 0.001, "loss": 0.4173, "step": 3080 }, { "epoch": 0.08501157147932137, "grad_norm": 0.0033621720504015684, "learning_rate": 0.001, "loss": 0.3879, "step": 3081 }, { "epoch": 0.08503916368038573, "grad_norm": 0.003055825363844633, "learning_rate": 0.001, "loss": 0.3891, "step": 3082 }, { "epoch": 0.0850667558814501, "grad_norm": 0.002876532031223178, "learning_rate": 0.001, "loss": 0.4144, "step": 3083 }, { "epoch": 0.08509434808251448, "grad_norm": 0.0029174459632486105, "learning_rate": 0.001, "loss": 0.4152, "step": 3084 }, { "epoch": 0.08512194028357885, "grad_norm": 0.002031163312494755, "learning_rate": 0.001, "loss": 0.3927, "step": 3085 }, { "epoch": 0.08514953248464321, "grad_norm": 0.003064326476305723, "learning_rate": 0.001, "loss": 0.3883, "step": 3086 }, { "epoch": 0.08517712468570758, "grad_norm": 0.003118072636425495, "learning_rate": 0.001, "loss": 0.4068, "step": 3087 }, { "epoch": 0.08520471688677195, "grad_norm": 0.0032082500401884317, "learning_rate": 0.001, "loss": 0.4125, "step": 3088 }, { "epoch": 0.08523230908783633, "grad_norm": 0.005306860897690058, "learning_rate": 0.001, "loss": 0.3843, "step": 3089 }, { "epoch": 0.0852599012889007, "grad_norm": 0.004819758236408234, "learning_rate": 0.001, "loss": 0.4069, "step": 3090 }, { "epoch": 0.08528749348996506, "grad_norm": 0.003489856142550707, "learning_rate": 0.001, "loss": 0.3821, "step": 3091 }, { "epoch": 0.08531508569102943, "grad_norm": 0.002650489332154393, "learning_rate": 0.001, "loss": 0.4061, "step": 3092 }, { "epoch": 0.0853426778920938, "grad_norm": 0.006152056623250246, "learning_rate": 0.001, "loss": 0.3641, "step": 3093 }, { "epoch": 0.08537027009315817, "grad_norm": 0.002845000009983778, "learning_rate": 0.001, "loss": 0.4092, "step": 3094 }, { "epoch": 0.08539786229422254, "grad_norm": 0.012226511724293232, "learning_rate": 0.001, "loss": 0.3805, "step": 3095 }, { "epoch": 0.0854254544952869, "grad_norm": 0.003854371840134263, "learning_rate": 0.001, "loss": 0.3938, "step": 3096 }, { "epoch": 0.08545304669635127, "grad_norm": 0.0025693075731396675, "learning_rate": 0.001, "loss": 0.413, "step": 3097 }, { "epoch": 0.08548063889741564, "grad_norm": 0.006640615873038769, "learning_rate": 0.001, "loss": 0.3868, "step": 3098 }, { "epoch": 0.08550823109848002, "grad_norm": 0.006392517127096653, "learning_rate": 0.001, "loss": 0.396, "step": 3099 }, { "epoch": 0.08553582329954439, "grad_norm": 0.004845835268497467, "learning_rate": 0.001, "loss": 0.3825, "step": 3100 }, { "epoch": 0.08556341550060875, "grad_norm": 0.005125217605382204, "learning_rate": 0.001, "loss": 0.3911, "step": 3101 }, { "epoch": 0.08559100770167312, "grad_norm": 0.002894837176427245, "learning_rate": 0.001, "loss": 0.4292, "step": 3102 }, { "epoch": 0.08561859990273749, "grad_norm": 0.0028156936168670654, "learning_rate": 0.001, "loss": 0.4005, "step": 3103 }, { "epoch": 0.08564619210380187, "grad_norm": 0.0022546479012817144, "learning_rate": 0.001, "loss": 0.3995, "step": 3104 }, { "epoch": 0.08567378430486623, "grad_norm": 0.0024128523655235767, "learning_rate": 0.001, "loss": 0.4012, "step": 3105 }, { "epoch": 0.0857013765059306, "grad_norm": 0.0024307817220687866, "learning_rate": 0.001, "loss": 0.3972, "step": 3106 }, { "epoch": 0.08572896870699497, "grad_norm": 0.0030128727667033672, "learning_rate": 0.001, "loss": 0.3963, "step": 3107 }, { "epoch": 0.08575656090805933, "grad_norm": 0.00243613263592124, "learning_rate": 0.001, "loss": 0.4105, "step": 3108 }, { "epoch": 0.08578415310912371, "grad_norm": 0.0021383436396718025, "learning_rate": 0.001, "loss": 0.3999, "step": 3109 }, { "epoch": 0.08581174531018808, "grad_norm": 0.003478130092844367, "learning_rate": 0.001, "loss": 0.4161, "step": 3110 }, { "epoch": 0.08583933751125244, "grad_norm": 0.0026314982678741217, "learning_rate": 0.001, "loss": 0.4143, "step": 3111 }, { "epoch": 0.08586692971231681, "grad_norm": 0.0033548614010214806, "learning_rate": 0.001, "loss": 0.3965, "step": 3112 }, { "epoch": 0.08589452191338118, "grad_norm": 0.0054088556207716465, "learning_rate": 0.001, "loss": 0.3519, "step": 3113 }, { "epoch": 0.08592211411444556, "grad_norm": 0.0038994327187538147, "learning_rate": 0.001, "loss": 0.4049, "step": 3114 }, { "epoch": 0.08594970631550992, "grad_norm": 0.0026842011138796806, "learning_rate": 0.001, "loss": 0.4058, "step": 3115 }, { "epoch": 0.08597729851657429, "grad_norm": 0.0026092708576470613, "learning_rate": 0.001, "loss": 0.431, "step": 3116 }, { "epoch": 0.08600489071763866, "grad_norm": 0.004279262386262417, "learning_rate": 0.001, "loss": 0.3745, "step": 3117 }, { "epoch": 0.08603248291870302, "grad_norm": 0.0036713224835693836, "learning_rate": 0.001, "loss": 0.3371, "step": 3118 }, { "epoch": 0.0860600751197674, "grad_norm": 0.004069841001182795, "learning_rate": 0.001, "loss": 0.3678, "step": 3119 }, { "epoch": 0.08608766732083177, "grad_norm": 0.003785800887271762, "learning_rate": 0.001, "loss": 0.3646, "step": 3120 }, { "epoch": 0.08611525952189614, "grad_norm": 0.003927123267203569, "learning_rate": 0.001, "loss": 0.3829, "step": 3121 }, { "epoch": 0.0861428517229605, "grad_norm": 0.004813566338270903, "learning_rate": 0.001, "loss": 0.4, "step": 3122 }, { "epoch": 0.08617044392402487, "grad_norm": 0.002674676012247801, "learning_rate": 0.001, "loss": 0.415, "step": 3123 }, { "epoch": 0.08619803612508925, "grad_norm": 0.0027380958199501038, "learning_rate": 0.001, "loss": 0.4164, "step": 3124 }, { "epoch": 0.08622562832615362, "grad_norm": 0.006593447644263506, "learning_rate": 0.001, "loss": 0.4127, "step": 3125 }, { "epoch": 0.08625322052721798, "grad_norm": 0.005558252800256014, "learning_rate": 0.001, "loss": 0.4081, "step": 3126 }, { "epoch": 0.08628081272828235, "grad_norm": 0.003584665711969137, "learning_rate": 0.001, "loss": 0.3668, "step": 3127 }, { "epoch": 0.08630840492934672, "grad_norm": 0.004839813802391291, "learning_rate": 0.001, "loss": 0.4007, "step": 3128 }, { "epoch": 0.08633599713041108, "grad_norm": 0.0030945283360779285, "learning_rate": 0.001, "loss": 0.4139, "step": 3129 }, { "epoch": 0.08636358933147546, "grad_norm": 0.005693027749657631, "learning_rate": 0.001, "loss": 0.3907, "step": 3130 }, { "epoch": 0.08639118153253983, "grad_norm": 0.0034251015167683363, "learning_rate": 0.001, "loss": 0.4084, "step": 3131 }, { "epoch": 0.0864187737336042, "grad_norm": 0.0030861585400998592, "learning_rate": 0.001, "loss": 0.4367, "step": 3132 }, { "epoch": 0.08644636593466856, "grad_norm": 0.0027904235757887363, "learning_rate": 0.001, "loss": 0.394, "step": 3133 }, { "epoch": 0.08647395813573293, "grad_norm": 0.003176966914907098, "learning_rate": 0.001, "loss": 0.407, "step": 3134 }, { "epoch": 0.08650155033679731, "grad_norm": 0.007988881319761276, "learning_rate": 0.001, "loss": 0.3868, "step": 3135 }, { "epoch": 0.08652914253786168, "grad_norm": 0.0030314160976558924, "learning_rate": 0.001, "loss": 0.4034, "step": 3136 }, { "epoch": 0.08655673473892604, "grad_norm": 0.0032279801089316607, "learning_rate": 0.001, "loss": 0.4025, "step": 3137 }, { "epoch": 0.08658432693999041, "grad_norm": 0.0031009484082460403, "learning_rate": 0.001, "loss": 0.3807, "step": 3138 }, { "epoch": 0.08661191914105477, "grad_norm": 0.0032069345470517874, "learning_rate": 0.001, "loss": 0.3908, "step": 3139 }, { "epoch": 0.08663951134211915, "grad_norm": 0.005810491740703583, "learning_rate": 0.001, "loss": 0.3924, "step": 3140 }, { "epoch": 0.08666710354318352, "grad_norm": 0.003494016360491514, "learning_rate": 0.001, "loss": 0.4004, "step": 3141 }, { "epoch": 0.08669469574424789, "grad_norm": 0.0038405335508286953, "learning_rate": 0.001, "loss": 0.4051, "step": 3142 }, { "epoch": 0.08672228794531225, "grad_norm": 0.03096974827349186, "learning_rate": 0.001, "loss": 0.4013, "step": 3143 }, { "epoch": 0.08674988014637662, "grad_norm": 0.007581043988466263, "learning_rate": 0.001, "loss": 0.3751, "step": 3144 }, { "epoch": 0.086777472347441, "grad_norm": 0.00279928813688457, "learning_rate": 0.001, "loss": 0.4068, "step": 3145 }, { "epoch": 0.08680506454850537, "grad_norm": 0.003512721508741379, "learning_rate": 0.001, "loss": 0.3581, "step": 3146 }, { "epoch": 0.08683265674956973, "grad_norm": 0.0022023352794349194, "learning_rate": 0.001, "loss": 0.4173, "step": 3147 }, { "epoch": 0.0868602489506341, "grad_norm": 0.0022207568399608135, "learning_rate": 0.001, "loss": 0.4223, "step": 3148 }, { "epoch": 0.08688784115169847, "grad_norm": 0.003400396555662155, "learning_rate": 0.001, "loss": 0.3907, "step": 3149 }, { "epoch": 0.08691543335276285, "grad_norm": 0.0031312189530581236, "learning_rate": 0.001, "loss": 0.3781, "step": 3150 }, { "epoch": 0.08694302555382721, "grad_norm": 0.0022443707566708326, "learning_rate": 0.001, "loss": 0.3987, "step": 3151 }, { "epoch": 0.08697061775489158, "grad_norm": 0.0031315688975155354, "learning_rate": 0.001, "loss": 0.3965, "step": 3152 }, { "epoch": 0.08699820995595595, "grad_norm": 0.0029569200705736876, "learning_rate": 0.001, "loss": 0.4351, "step": 3153 }, { "epoch": 0.08702580215702031, "grad_norm": 0.0045159622095525265, "learning_rate": 0.001, "loss": 0.3658, "step": 3154 }, { "epoch": 0.08705339435808469, "grad_norm": 0.0024900075513869524, "learning_rate": 0.001, "loss": 0.4371, "step": 3155 }, { "epoch": 0.08708098655914906, "grad_norm": 0.002761411713436246, "learning_rate": 0.001, "loss": 0.3813, "step": 3156 }, { "epoch": 0.08710857876021343, "grad_norm": 0.005542066879570484, "learning_rate": 0.001, "loss": 0.4075, "step": 3157 }, { "epoch": 0.08713617096127779, "grad_norm": 0.0027586561627686024, "learning_rate": 0.001, "loss": 0.3794, "step": 3158 }, { "epoch": 0.08716376316234216, "grad_norm": 0.0025526464451104403, "learning_rate": 0.001, "loss": 0.3836, "step": 3159 }, { "epoch": 0.08719135536340654, "grad_norm": 0.0028663338162004948, "learning_rate": 0.001, "loss": 0.4001, "step": 3160 }, { "epoch": 0.0872189475644709, "grad_norm": 0.0021977245341986418, "learning_rate": 0.001, "loss": 0.3703, "step": 3161 }, { "epoch": 0.08724653976553527, "grad_norm": 0.003095234278589487, "learning_rate": 0.001, "loss": 0.378, "step": 3162 }, { "epoch": 0.08727413196659964, "grad_norm": 0.002372263465076685, "learning_rate": 0.001, "loss": 0.4313, "step": 3163 }, { "epoch": 0.087301724167664, "grad_norm": 0.0028856871649622917, "learning_rate": 0.001, "loss": 0.415, "step": 3164 }, { "epoch": 0.08732931636872839, "grad_norm": 0.006369113922119141, "learning_rate": 0.001, "loss": 0.3955, "step": 3165 }, { "epoch": 0.08735690856979275, "grad_norm": 0.00523237232118845, "learning_rate": 0.001, "loss": 0.3853, "step": 3166 }, { "epoch": 0.08738450077085712, "grad_norm": 0.0037964817602187395, "learning_rate": 0.001, "loss": 0.4094, "step": 3167 }, { "epoch": 0.08741209297192148, "grad_norm": 0.004853063262999058, "learning_rate": 0.001, "loss": 0.3877, "step": 3168 }, { "epoch": 0.08743968517298585, "grad_norm": 0.0025882385671138763, "learning_rate": 0.001, "loss": 0.3788, "step": 3169 }, { "epoch": 0.08746727737405023, "grad_norm": 0.002228371100500226, "learning_rate": 0.001, "loss": 0.468, "step": 3170 }, { "epoch": 0.0874948695751146, "grad_norm": 0.003534890478476882, "learning_rate": 0.001, "loss": 0.3951, "step": 3171 }, { "epoch": 0.08752246177617896, "grad_norm": 0.002975932788103819, "learning_rate": 0.001, "loss": 0.3763, "step": 3172 }, { "epoch": 0.08755005397724333, "grad_norm": 0.0035217402037233114, "learning_rate": 0.001, "loss": 0.4021, "step": 3173 }, { "epoch": 0.0875776461783077, "grad_norm": 0.002465012250468135, "learning_rate": 0.001, "loss": 0.3913, "step": 3174 }, { "epoch": 0.08760523837937208, "grad_norm": 0.003945467062294483, "learning_rate": 0.001, "loss": 0.391, "step": 3175 }, { "epoch": 0.08763283058043644, "grad_norm": 0.003953505773097277, "learning_rate": 0.001, "loss": 0.4273, "step": 3176 }, { "epoch": 0.08766042278150081, "grad_norm": 0.004156408831477165, "learning_rate": 0.001, "loss": 0.403, "step": 3177 }, { "epoch": 0.08768801498256518, "grad_norm": 0.0024448104668408632, "learning_rate": 0.001, "loss": 0.4369, "step": 3178 }, { "epoch": 0.08771560718362954, "grad_norm": 0.0024597158189862967, "learning_rate": 0.001, "loss": 0.425, "step": 3179 }, { "epoch": 0.08774319938469391, "grad_norm": 0.003507564775645733, "learning_rate": 0.001, "loss": 0.364, "step": 3180 }, { "epoch": 0.08777079158575829, "grad_norm": 0.002295244485139847, "learning_rate": 0.001, "loss": 0.4024, "step": 3181 }, { "epoch": 0.08779838378682266, "grad_norm": 0.0022501791827380657, "learning_rate": 0.001, "loss": 0.4159, "step": 3182 }, { "epoch": 0.08782597598788702, "grad_norm": 0.0027677484322339296, "learning_rate": 0.001, "loss": 0.3919, "step": 3183 }, { "epoch": 0.08785356818895139, "grad_norm": 0.0029434715397655964, "learning_rate": 0.001, "loss": 0.3447, "step": 3184 }, { "epoch": 0.08788116039001576, "grad_norm": 0.0024778309743851423, "learning_rate": 0.001, "loss": 0.3948, "step": 3185 }, { "epoch": 0.08790875259108014, "grad_norm": 0.0023013644386082888, "learning_rate": 0.001, "loss": 0.4046, "step": 3186 }, { "epoch": 0.0879363447921445, "grad_norm": 0.0019937290344387293, "learning_rate": 0.001, "loss": 0.433, "step": 3187 }, { "epoch": 0.08796393699320887, "grad_norm": 0.009529836475849152, "learning_rate": 0.001, "loss": 0.3898, "step": 3188 }, { "epoch": 0.08799152919427324, "grad_norm": 0.0029238457791507244, "learning_rate": 0.001, "loss": 0.3719, "step": 3189 }, { "epoch": 0.0880191213953376, "grad_norm": 0.0023196239490062, "learning_rate": 0.001, "loss": 0.4035, "step": 3190 }, { "epoch": 0.08804671359640198, "grad_norm": 0.0024587539955973625, "learning_rate": 0.001, "loss": 0.426, "step": 3191 }, { "epoch": 0.08807430579746635, "grad_norm": 0.004239838104695082, "learning_rate": 0.001, "loss": 0.3774, "step": 3192 }, { "epoch": 0.08810189799853071, "grad_norm": 0.003386555938050151, "learning_rate": 0.001, "loss": 0.3839, "step": 3193 }, { "epoch": 0.08812949019959508, "grad_norm": 0.0025654241908341646, "learning_rate": 0.001, "loss": 0.3882, "step": 3194 }, { "epoch": 0.08815708240065945, "grad_norm": 0.002837128471583128, "learning_rate": 0.001, "loss": 0.4287, "step": 3195 }, { "epoch": 0.08818467460172383, "grad_norm": 0.0034069865942001343, "learning_rate": 0.001, "loss": 0.3413, "step": 3196 }, { "epoch": 0.0882122668027882, "grad_norm": 0.002375151729211211, "learning_rate": 0.001, "loss": 0.4089, "step": 3197 }, { "epoch": 0.08823985900385256, "grad_norm": 0.003615601221099496, "learning_rate": 0.001, "loss": 0.4382, "step": 3198 }, { "epoch": 0.08826745120491693, "grad_norm": 0.0048666223883628845, "learning_rate": 0.001, "loss": 0.3939, "step": 3199 }, { "epoch": 0.0882950434059813, "grad_norm": 0.0025259742978960276, "learning_rate": 0.001, "loss": 0.3826, "step": 3200 }, { "epoch": 0.08832263560704567, "grad_norm": 0.006654439959675074, "learning_rate": 0.001, "loss": 0.4076, "step": 3201 }, { "epoch": 0.08835022780811004, "grad_norm": 0.002633353229612112, "learning_rate": 0.001, "loss": 0.4206, "step": 3202 }, { "epoch": 0.08837782000917441, "grad_norm": 0.0028525053057819605, "learning_rate": 0.001, "loss": 0.4091, "step": 3203 }, { "epoch": 0.08840541221023877, "grad_norm": 0.0071784635074436665, "learning_rate": 0.001, "loss": 0.4055, "step": 3204 }, { "epoch": 0.08843300441130314, "grad_norm": 0.0030664210207760334, "learning_rate": 0.001, "loss": 0.3847, "step": 3205 }, { "epoch": 0.08846059661236752, "grad_norm": 0.0034981323406100273, "learning_rate": 0.001, "loss": 0.3855, "step": 3206 }, { "epoch": 0.08848818881343189, "grad_norm": 0.006974066607654095, "learning_rate": 0.001, "loss": 0.458, "step": 3207 }, { "epoch": 0.08851578101449625, "grad_norm": 0.002912199590355158, "learning_rate": 0.001, "loss": 0.416, "step": 3208 }, { "epoch": 0.08854337321556062, "grad_norm": 0.0036544064059853554, "learning_rate": 0.001, "loss": 0.4617, "step": 3209 }, { "epoch": 0.08857096541662499, "grad_norm": 0.002682635560631752, "learning_rate": 0.001, "loss": 0.3695, "step": 3210 }, { "epoch": 0.08859855761768937, "grad_norm": 0.0032552520278841257, "learning_rate": 0.001, "loss": 0.4154, "step": 3211 }, { "epoch": 0.08862614981875373, "grad_norm": 0.003252149559557438, "learning_rate": 0.001, "loss": 0.425, "step": 3212 }, { "epoch": 0.0886537420198181, "grad_norm": 0.004062923137098551, "learning_rate": 0.001, "loss": 0.4092, "step": 3213 }, { "epoch": 0.08868133422088247, "grad_norm": 0.0034245557617396116, "learning_rate": 0.001, "loss": 0.3926, "step": 3214 }, { "epoch": 0.08870892642194683, "grad_norm": 0.00209795287810266, "learning_rate": 0.001, "loss": 0.4181, "step": 3215 }, { "epoch": 0.08873651862301121, "grad_norm": 0.002493221778422594, "learning_rate": 0.001, "loss": 0.4211, "step": 3216 }, { "epoch": 0.08876411082407558, "grad_norm": 0.0032907910645008087, "learning_rate": 0.001, "loss": 0.423, "step": 3217 }, { "epoch": 0.08879170302513995, "grad_norm": 0.0022613955661654472, "learning_rate": 0.001, "loss": 0.406, "step": 3218 }, { "epoch": 0.08881929522620431, "grad_norm": 0.0027464418672025204, "learning_rate": 0.001, "loss": 0.3534, "step": 3219 }, { "epoch": 0.08884688742726868, "grad_norm": 0.0038859571795910597, "learning_rate": 0.001, "loss": 0.3634, "step": 3220 }, { "epoch": 0.08887447962833306, "grad_norm": 0.002589226933196187, "learning_rate": 0.001, "loss": 0.3884, "step": 3221 }, { "epoch": 0.08890207182939742, "grad_norm": 0.003947058692574501, "learning_rate": 0.001, "loss": 0.4078, "step": 3222 }, { "epoch": 0.08892966403046179, "grad_norm": 0.0034198507200926542, "learning_rate": 0.001, "loss": 0.3794, "step": 3223 }, { "epoch": 0.08895725623152616, "grad_norm": 0.0028758652042597532, "learning_rate": 0.001, "loss": 0.4017, "step": 3224 }, { "epoch": 0.08898484843259052, "grad_norm": 0.002941399347037077, "learning_rate": 0.001, "loss": 0.4374, "step": 3225 }, { "epoch": 0.08901244063365489, "grad_norm": 0.0027777596842497587, "learning_rate": 0.001, "loss": 0.4068, "step": 3226 }, { "epoch": 0.08904003283471927, "grad_norm": 0.00824811402708292, "learning_rate": 0.001, "loss": 0.4095, "step": 3227 }, { "epoch": 0.08906762503578364, "grad_norm": 0.002942741382867098, "learning_rate": 0.001, "loss": 0.4186, "step": 3228 }, { "epoch": 0.089095217236848, "grad_norm": 0.0034886416979134083, "learning_rate": 0.001, "loss": 0.3866, "step": 3229 }, { "epoch": 0.08912280943791237, "grad_norm": 0.0034925418440252542, "learning_rate": 0.001, "loss": 0.3918, "step": 3230 }, { "epoch": 0.08915040163897674, "grad_norm": 0.0031590645667165518, "learning_rate": 0.001, "loss": 0.4043, "step": 3231 }, { "epoch": 0.08917799384004112, "grad_norm": 0.00394233874976635, "learning_rate": 0.001, "loss": 0.412, "step": 3232 }, { "epoch": 0.08920558604110548, "grad_norm": 0.002954542636871338, "learning_rate": 0.001, "loss": 0.4271, "step": 3233 }, { "epoch": 0.08923317824216985, "grad_norm": 0.0020198922138661146, "learning_rate": 0.001, "loss": 0.4055, "step": 3234 }, { "epoch": 0.08926077044323422, "grad_norm": 0.0032049540895968676, "learning_rate": 0.001, "loss": 0.3885, "step": 3235 }, { "epoch": 0.08928836264429858, "grad_norm": 0.0024705370888113976, "learning_rate": 0.001, "loss": 0.421, "step": 3236 }, { "epoch": 0.08931595484536296, "grad_norm": 0.0023976247757673264, "learning_rate": 0.001, "loss": 0.4301, "step": 3237 }, { "epoch": 0.08934354704642733, "grad_norm": 0.0025141413789242506, "learning_rate": 0.001, "loss": 0.3916, "step": 3238 }, { "epoch": 0.0893711392474917, "grad_norm": 0.0034452369436621666, "learning_rate": 0.001, "loss": 0.4055, "step": 3239 }, { "epoch": 0.08939873144855606, "grad_norm": 0.0036900045815855265, "learning_rate": 0.001, "loss": 0.4006, "step": 3240 }, { "epoch": 0.08942632364962043, "grad_norm": 0.0030278146732598543, "learning_rate": 0.001, "loss": 0.3918, "step": 3241 }, { "epoch": 0.08945391585068481, "grad_norm": 0.0027707985136657953, "learning_rate": 0.001, "loss": 0.3721, "step": 3242 }, { "epoch": 0.08948150805174918, "grad_norm": 0.0024415196385234594, "learning_rate": 0.001, "loss": 0.3999, "step": 3243 }, { "epoch": 0.08950910025281354, "grad_norm": 0.0054499804973602295, "learning_rate": 0.001, "loss": 0.423, "step": 3244 }, { "epoch": 0.08953669245387791, "grad_norm": 0.0034079072065651417, "learning_rate": 0.001, "loss": 0.3607, "step": 3245 }, { "epoch": 0.08956428465494227, "grad_norm": 0.0038858712650835514, "learning_rate": 0.001, "loss": 0.3912, "step": 3246 }, { "epoch": 0.08959187685600666, "grad_norm": 0.007187449838966131, "learning_rate": 0.001, "loss": 0.4164, "step": 3247 }, { "epoch": 0.08961946905707102, "grad_norm": 0.0033226367086172104, "learning_rate": 0.001, "loss": 0.4007, "step": 3248 }, { "epoch": 0.08964706125813539, "grad_norm": 0.0028839895967394114, "learning_rate": 0.001, "loss": 0.4179, "step": 3249 }, { "epoch": 0.08967465345919975, "grad_norm": 0.0032480424270033836, "learning_rate": 0.001, "loss": 0.3933, "step": 3250 }, { "epoch": 0.08970224566026412, "grad_norm": 0.004398911260068417, "learning_rate": 0.001, "loss": 0.3914, "step": 3251 }, { "epoch": 0.0897298378613285, "grad_norm": 0.00363975390791893, "learning_rate": 0.001, "loss": 0.4069, "step": 3252 }, { "epoch": 0.08975743006239287, "grad_norm": 0.008247625082731247, "learning_rate": 0.001, "loss": 0.4085, "step": 3253 }, { "epoch": 0.08978502226345723, "grad_norm": 0.003181576495990157, "learning_rate": 0.001, "loss": 0.3846, "step": 3254 }, { "epoch": 0.0898126144645216, "grad_norm": 0.005014390219002962, "learning_rate": 0.001, "loss": 0.4159, "step": 3255 }, { "epoch": 0.08984020666558597, "grad_norm": 0.0027002303395420313, "learning_rate": 0.001, "loss": 0.3703, "step": 3256 }, { "epoch": 0.08986779886665035, "grad_norm": 0.0028025219216942787, "learning_rate": 0.001, "loss": 0.4148, "step": 3257 }, { "epoch": 0.08989539106771471, "grad_norm": 0.0026610197965055704, "learning_rate": 0.001, "loss": 0.3921, "step": 3258 }, { "epoch": 0.08992298326877908, "grad_norm": 0.003577177645638585, "learning_rate": 0.001, "loss": 0.3467, "step": 3259 }, { "epoch": 0.08995057546984345, "grad_norm": 0.0036113469395786524, "learning_rate": 0.001, "loss": 0.3812, "step": 3260 }, { "epoch": 0.08997816767090781, "grad_norm": 0.0033262385986745358, "learning_rate": 0.001, "loss": 0.3851, "step": 3261 }, { "epoch": 0.0900057598719722, "grad_norm": 0.0021796945948153734, "learning_rate": 0.001, "loss": 0.4053, "step": 3262 }, { "epoch": 0.09003335207303656, "grad_norm": 0.002145004691556096, "learning_rate": 0.001, "loss": 0.3929, "step": 3263 }, { "epoch": 0.09006094427410093, "grad_norm": 0.0034920983016490936, "learning_rate": 0.001, "loss": 0.397, "step": 3264 }, { "epoch": 0.09008853647516529, "grad_norm": 0.0032197278924286366, "learning_rate": 0.001, "loss": 0.3785, "step": 3265 }, { "epoch": 0.09011612867622966, "grad_norm": 0.0023128585889935493, "learning_rate": 0.001, "loss": 0.4024, "step": 3266 }, { "epoch": 0.09014372087729404, "grad_norm": 0.0028581952210515738, "learning_rate": 0.001, "loss": 0.3924, "step": 3267 }, { "epoch": 0.0901713130783584, "grad_norm": 0.002962828380987048, "learning_rate": 0.001, "loss": 0.3842, "step": 3268 }, { "epoch": 0.09019890527942277, "grad_norm": 0.004223112482577562, "learning_rate": 0.001, "loss": 0.4086, "step": 3269 }, { "epoch": 0.09022649748048714, "grad_norm": 0.02053755894303322, "learning_rate": 0.001, "loss": 0.3863, "step": 3270 }, { "epoch": 0.0902540896815515, "grad_norm": 0.0043358695693314075, "learning_rate": 0.001, "loss": 0.4023, "step": 3271 }, { "epoch": 0.09028168188261587, "grad_norm": 0.003768104827031493, "learning_rate": 0.001, "loss": 0.3758, "step": 3272 }, { "epoch": 0.09030927408368025, "grad_norm": 0.0026477002538740635, "learning_rate": 0.001, "loss": 0.3851, "step": 3273 }, { "epoch": 0.09033686628474462, "grad_norm": 0.003647193778306246, "learning_rate": 0.001, "loss": 0.3826, "step": 3274 }, { "epoch": 0.09036445848580898, "grad_norm": 0.002453665481880307, "learning_rate": 0.001, "loss": 0.407, "step": 3275 }, { "epoch": 0.09039205068687335, "grad_norm": 0.002882964676246047, "learning_rate": 0.001, "loss": 0.3935, "step": 3276 }, { "epoch": 0.09041964288793772, "grad_norm": 0.00521261477842927, "learning_rate": 0.001, "loss": 0.4009, "step": 3277 }, { "epoch": 0.0904472350890021, "grad_norm": 0.002587896538898349, "learning_rate": 0.001, "loss": 0.4291, "step": 3278 }, { "epoch": 0.09047482729006646, "grad_norm": 0.002533156191930175, "learning_rate": 0.001, "loss": 0.3831, "step": 3279 }, { "epoch": 0.09050241949113083, "grad_norm": 0.0027842391282320023, "learning_rate": 0.001, "loss": 0.3854, "step": 3280 }, { "epoch": 0.0905300116921952, "grad_norm": 0.0023463580291718245, "learning_rate": 0.001, "loss": 0.4197, "step": 3281 }, { "epoch": 0.09055760389325956, "grad_norm": 0.0023778725881129503, "learning_rate": 0.001, "loss": 0.3927, "step": 3282 }, { "epoch": 0.09058519609432394, "grad_norm": 0.0035691028460860252, "learning_rate": 0.001, "loss": 0.3971, "step": 3283 }, { "epoch": 0.09061278829538831, "grad_norm": 0.0037309350445866585, "learning_rate": 0.001, "loss": 0.3752, "step": 3284 }, { "epoch": 0.09064038049645268, "grad_norm": 0.002625760156661272, "learning_rate": 0.001, "loss": 0.4123, "step": 3285 }, { "epoch": 0.09066797269751704, "grad_norm": 0.003352556610479951, "learning_rate": 0.001, "loss": 0.4268, "step": 3286 }, { "epoch": 0.09069556489858141, "grad_norm": 0.0050033205188810825, "learning_rate": 0.001, "loss": 0.3944, "step": 3287 }, { "epoch": 0.09072315709964579, "grad_norm": 0.002488895785063505, "learning_rate": 0.001, "loss": 0.4261, "step": 3288 }, { "epoch": 0.09075074930071016, "grad_norm": 0.0031474691350013018, "learning_rate": 0.001, "loss": 0.3763, "step": 3289 }, { "epoch": 0.09077834150177452, "grad_norm": 0.002337649930268526, "learning_rate": 0.001, "loss": 0.4015, "step": 3290 }, { "epoch": 0.09080593370283889, "grad_norm": 0.0034202388487756252, "learning_rate": 0.001, "loss": 0.4156, "step": 3291 }, { "epoch": 0.09083352590390326, "grad_norm": 0.004500823561102152, "learning_rate": 0.001, "loss": 0.3663, "step": 3292 }, { "epoch": 0.09086111810496764, "grad_norm": 0.0026400513015687466, "learning_rate": 0.001, "loss": 0.4163, "step": 3293 }, { "epoch": 0.090888710306032, "grad_norm": 0.002835595514625311, "learning_rate": 0.001, "loss": 0.388, "step": 3294 }, { "epoch": 0.09091630250709637, "grad_norm": 0.0021484890021383762, "learning_rate": 0.001, "loss": 0.3625, "step": 3295 }, { "epoch": 0.09094389470816074, "grad_norm": 0.002589087001979351, "learning_rate": 0.001, "loss": 0.4258, "step": 3296 }, { "epoch": 0.0909714869092251, "grad_norm": 0.004091163631528616, "learning_rate": 0.001, "loss": 0.3974, "step": 3297 }, { "epoch": 0.09099907911028948, "grad_norm": 0.002472213003784418, "learning_rate": 0.001, "loss": 0.4512, "step": 3298 }, { "epoch": 0.09102667131135385, "grad_norm": 0.0025115078315138817, "learning_rate": 0.001, "loss": 0.4038, "step": 3299 }, { "epoch": 0.09105426351241822, "grad_norm": 0.002945608925074339, "learning_rate": 0.001, "loss": 0.3899, "step": 3300 }, { "epoch": 0.09108185571348258, "grad_norm": 0.0024975589476525784, "learning_rate": 0.001, "loss": 0.357, "step": 3301 }, { "epoch": 0.09110944791454695, "grad_norm": 0.003322755452245474, "learning_rate": 0.001, "loss": 0.4099, "step": 3302 }, { "epoch": 0.09113704011561133, "grad_norm": 0.003869826439768076, "learning_rate": 0.001, "loss": 0.3477, "step": 3303 }, { "epoch": 0.0911646323166757, "grad_norm": 0.0036806685384362936, "learning_rate": 0.001, "loss": 0.4237, "step": 3304 }, { "epoch": 0.09119222451774006, "grad_norm": 0.002534373663365841, "learning_rate": 0.001, "loss": 0.4156, "step": 3305 }, { "epoch": 0.09121981671880443, "grad_norm": 0.002670099725946784, "learning_rate": 0.001, "loss": 0.4474, "step": 3306 }, { "epoch": 0.0912474089198688, "grad_norm": 0.0026154613588005304, "learning_rate": 0.001, "loss": 0.3922, "step": 3307 }, { "epoch": 0.09127500112093317, "grad_norm": 0.002783595584332943, "learning_rate": 0.001, "loss": 0.3937, "step": 3308 }, { "epoch": 0.09130259332199754, "grad_norm": 0.0024160996545106173, "learning_rate": 0.001, "loss": 0.4107, "step": 3309 }, { "epoch": 0.09133018552306191, "grad_norm": 0.0040392628870904446, "learning_rate": 0.001, "loss": 0.3911, "step": 3310 }, { "epoch": 0.09135777772412627, "grad_norm": 0.0036512308288365602, "learning_rate": 0.001, "loss": 0.3794, "step": 3311 }, { "epoch": 0.09138536992519064, "grad_norm": 0.0026296162977814674, "learning_rate": 0.001, "loss": 0.3898, "step": 3312 }, { "epoch": 0.09141296212625502, "grad_norm": 0.002523603616282344, "learning_rate": 0.001, "loss": 0.4242, "step": 3313 }, { "epoch": 0.09144055432731939, "grad_norm": 0.0027449633926153183, "learning_rate": 0.001, "loss": 0.4058, "step": 3314 }, { "epoch": 0.09146814652838375, "grad_norm": 0.004635502118617296, "learning_rate": 0.001, "loss": 0.3953, "step": 3315 }, { "epoch": 0.09149573872944812, "grad_norm": 0.0026435446925461292, "learning_rate": 0.001, "loss": 0.4115, "step": 3316 }, { "epoch": 0.09152333093051249, "grad_norm": 0.012709138914942741, "learning_rate": 0.001, "loss": 0.3866, "step": 3317 }, { "epoch": 0.09155092313157685, "grad_norm": 0.0032850292045623064, "learning_rate": 0.001, "loss": 0.4388, "step": 3318 }, { "epoch": 0.09157851533264123, "grad_norm": 0.0038955537602305412, "learning_rate": 0.001, "loss": 0.3689, "step": 3319 }, { "epoch": 0.0916061075337056, "grad_norm": 0.004457899369299412, "learning_rate": 0.001, "loss": 0.4155, "step": 3320 }, { "epoch": 0.09163369973476997, "grad_norm": 0.002992943860590458, "learning_rate": 0.001, "loss": 0.4278, "step": 3321 }, { "epoch": 0.09166129193583433, "grad_norm": 0.004083162639290094, "learning_rate": 0.001, "loss": 0.4232, "step": 3322 }, { "epoch": 0.0916888841368987, "grad_norm": 0.0032160687260329723, "learning_rate": 0.001, "loss": 0.4005, "step": 3323 }, { "epoch": 0.09171647633796308, "grad_norm": 0.003969733603298664, "learning_rate": 0.001, "loss": 0.4146, "step": 3324 }, { "epoch": 0.09174406853902745, "grad_norm": 0.004920699633657932, "learning_rate": 0.001, "loss": 0.3879, "step": 3325 }, { "epoch": 0.09177166074009181, "grad_norm": 0.005621886812150478, "learning_rate": 0.001, "loss": 0.4114, "step": 3326 }, { "epoch": 0.09179925294115618, "grad_norm": 0.015430702827870846, "learning_rate": 0.001, "loss": 0.389, "step": 3327 }, { "epoch": 0.09182684514222055, "grad_norm": 0.004751747474074364, "learning_rate": 0.001, "loss": 0.3876, "step": 3328 }, { "epoch": 0.09185443734328493, "grad_norm": 0.0023684531915932894, "learning_rate": 0.001, "loss": 0.3866, "step": 3329 }, { "epoch": 0.09188202954434929, "grad_norm": 0.004853997845202684, "learning_rate": 0.001, "loss": 0.4107, "step": 3330 }, { "epoch": 0.09190962174541366, "grad_norm": 0.005576598923653364, "learning_rate": 0.001, "loss": 0.4054, "step": 3331 }, { "epoch": 0.09193721394647802, "grad_norm": 0.0027397077064961195, "learning_rate": 0.001, "loss": 0.415, "step": 3332 }, { "epoch": 0.09196480614754239, "grad_norm": 0.0026846746914088726, "learning_rate": 0.001, "loss": 0.3813, "step": 3333 }, { "epoch": 0.09199239834860677, "grad_norm": 0.0023833108134567738, "learning_rate": 0.001, "loss": 0.4137, "step": 3334 }, { "epoch": 0.09201999054967114, "grad_norm": 0.002954500960186124, "learning_rate": 0.001, "loss": 0.4484, "step": 3335 }, { "epoch": 0.0920475827507355, "grad_norm": 0.0023365288507193327, "learning_rate": 0.001, "loss": 0.391, "step": 3336 }, { "epoch": 0.09207517495179987, "grad_norm": 0.0031199888326227665, "learning_rate": 0.001, "loss": 0.4011, "step": 3337 }, { "epoch": 0.09210276715286424, "grad_norm": 0.00201621581800282, "learning_rate": 0.001, "loss": 0.3766, "step": 3338 }, { "epoch": 0.09213035935392862, "grad_norm": 0.0036083683371543884, "learning_rate": 0.001, "loss": 0.3851, "step": 3339 }, { "epoch": 0.09215795155499298, "grad_norm": 0.0037763137370347977, "learning_rate": 0.001, "loss": 0.3818, "step": 3340 }, { "epoch": 0.09218554375605735, "grad_norm": 0.002573802135884762, "learning_rate": 0.001, "loss": 0.4039, "step": 3341 }, { "epoch": 0.09221313595712172, "grad_norm": 0.0032212398946285248, "learning_rate": 0.001, "loss": 0.3777, "step": 3342 }, { "epoch": 0.09224072815818608, "grad_norm": 0.002471911022439599, "learning_rate": 0.001, "loss": 0.4051, "step": 3343 }, { "epoch": 0.09226832035925046, "grad_norm": 0.0030415072105824947, "learning_rate": 0.001, "loss": 0.4206, "step": 3344 }, { "epoch": 0.09229591256031483, "grad_norm": 0.0027574030682444572, "learning_rate": 0.001, "loss": 0.3627, "step": 3345 }, { "epoch": 0.0923235047613792, "grad_norm": 0.0020206805784255266, "learning_rate": 0.001, "loss": 0.4212, "step": 3346 }, { "epoch": 0.09235109696244356, "grad_norm": 0.002503051655367017, "learning_rate": 0.001, "loss": 0.3972, "step": 3347 }, { "epoch": 0.09237868916350793, "grad_norm": 0.00392269529402256, "learning_rate": 0.001, "loss": 0.4007, "step": 3348 }, { "epoch": 0.09240628136457231, "grad_norm": 0.0035916061606258154, "learning_rate": 0.001, "loss": 0.4151, "step": 3349 }, { "epoch": 0.09243387356563668, "grad_norm": 0.003222295781597495, "learning_rate": 0.001, "loss": 0.4087, "step": 3350 }, { "epoch": 0.09246146576670104, "grad_norm": 0.003206141758710146, "learning_rate": 0.001, "loss": 0.3968, "step": 3351 }, { "epoch": 0.09248905796776541, "grad_norm": 0.00480427872389555, "learning_rate": 0.001, "loss": 0.398, "step": 3352 }, { "epoch": 0.09251665016882978, "grad_norm": 0.0026392394211143255, "learning_rate": 0.001, "loss": 0.4142, "step": 3353 }, { "epoch": 0.09254424236989416, "grad_norm": 0.003221785882487893, "learning_rate": 0.001, "loss": 0.4083, "step": 3354 }, { "epoch": 0.09257183457095852, "grad_norm": 0.0030678475741297007, "learning_rate": 0.001, "loss": 0.4267, "step": 3355 }, { "epoch": 0.09259942677202289, "grad_norm": 0.0028969766572117805, "learning_rate": 0.001, "loss": 0.4222, "step": 3356 }, { "epoch": 0.09262701897308726, "grad_norm": 0.00338752125389874, "learning_rate": 0.001, "loss": 0.4088, "step": 3357 }, { "epoch": 0.09265461117415162, "grad_norm": 0.003165165428072214, "learning_rate": 0.001, "loss": 0.4127, "step": 3358 }, { "epoch": 0.092682203375216, "grad_norm": 0.002541585825383663, "learning_rate": 0.001, "loss": 0.4281, "step": 3359 }, { "epoch": 0.09270979557628037, "grad_norm": 0.0037303478457033634, "learning_rate": 0.001, "loss": 0.4178, "step": 3360 }, { "epoch": 0.09273738777734473, "grad_norm": 0.006375085562467575, "learning_rate": 0.001, "loss": 0.3953, "step": 3361 }, { "epoch": 0.0927649799784091, "grad_norm": 0.023185908794403076, "learning_rate": 0.001, "loss": 0.3785, "step": 3362 }, { "epoch": 0.09279257217947347, "grad_norm": 0.003924873657524586, "learning_rate": 0.001, "loss": 0.39, "step": 3363 }, { "epoch": 0.09282016438053785, "grad_norm": 0.0037961790803819895, "learning_rate": 0.001, "loss": 0.4434, "step": 3364 }, { "epoch": 0.09284775658160221, "grad_norm": 0.005324463825672865, "learning_rate": 0.001, "loss": 0.3622, "step": 3365 }, { "epoch": 0.09287534878266658, "grad_norm": 0.0031803487800061703, "learning_rate": 0.001, "loss": 0.3928, "step": 3366 }, { "epoch": 0.09290294098373095, "grad_norm": 0.003402640810236335, "learning_rate": 0.001, "loss": 0.4083, "step": 3367 }, { "epoch": 0.09293053318479531, "grad_norm": 0.003059846581891179, "learning_rate": 0.001, "loss": 0.3749, "step": 3368 }, { "epoch": 0.09295812538585968, "grad_norm": 0.002190982224419713, "learning_rate": 0.001, "loss": 0.4093, "step": 3369 }, { "epoch": 0.09298571758692406, "grad_norm": 0.007336355280131102, "learning_rate": 0.001, "loss": 0.4231, "step": 3370 }, { "epoch": 0.09301330978798843, "grad_norm": 0.0030043197330087423, "learning_rate": 0.001, "loss": 0.4232, "step": 3371 }, { "epoch": 0.0930409019890528, "grad_norm": 0.00424328725785017, "learning_rate": 0.001, "loss": 0.3894, "step": 3372 }, { "epoch": 0.09306849419011716, "grad_norm": 0.0036531679797917604, "learning_rate": 0.001, "loss": 0.3866, "step": 3373 }, { "epoch": 0.09309608639118153, "grad_norm": 0.0029019531793892384, "learning_rate": 0.001, "loss": 0.4291, "step": 3374 }, { "epoch": 0.0931236785922459, "grad_norm": 0.002536515239626169, "learning_rate": 0.001, "loss": 0.41, "step": 3375 }, { "epoch": 0.09315127079331027, "grad_norm": 0.003373499261215329, "learning_rate": 0.001, "loss": 0.4067, "step": 3376 }, { "epoch": 0.09317886299437464, "grad_norm": 0.0034982135985046625, "learning_rate": 0.001, "loss": 0.429, "step": 3377 }, { "epoch": 0.093206455195439, "grad_norm": 0.005799585022032261, "learning_rate": 0.001, "loss": 0.4309, "step": 3378 }, { "epoch": 0.09323404739650337, "grad_norm": 0.002723401878029108, "learning_rate": 0.001, "loss": 0.3932, "step": 3379 }, { "epoch": 0.09326163959756775, "grad_norm": 0.002761744661256671, "learning_rate": 0.001, "loss": 0.4301, "step": 3380 }, { "epoch": 0.09328923179863212, "grad_norm": 0.0033628942910581827, "learning_rate": 0.001, "loss": 0.3764, "step": 3381 }, { "epoch": 0.09331682399969649, "grad_norm": 0.004340017680078745, "learning_rate": 0.001, "loss": 0.3864, "step": 3382 }, { "epoch": 0.09334441620076085, "grad_norm": 0.00276694493368268, "learning_rate": 0.001, "loss": 0.4233, "step": 3383 }, { "epoch": 0.09337200840182522, "grad_norm": 0.0040064319036901, "learning_rate": 0.001, "loss": 0.4161, "step": 3384 }, { "epoch": 0.0933996006028896, "grad_norm": 0.004304267466068268, "learning_rate": 0.001, "loss": 0.3773, "step": 3385 }, { "epoch": 0.09342719280395397, "grad_norm": 0.0043169171549379826, "learning_rate": 0.001, "loss": 0.4078, "step": 3386 }, { "epoch": 0.09345478500501833, "grad_norm": 0.0029776948504149914, "learning_rate": 0.001, "loss": 0.3993, "step": 3387 }, { "epoch": 0.0934823772060827, "grad_norm": 0.0030496844556182623, "learning_rate": 0.001, "loss": 0.3938, "step": 3388 }, { "epoch": 0.09350996940714706, "grad_norm": 0.002344959881156683, "learning_rate": 0.001, "loss": 0.3994, "step": 3389 }, { "epoch": 0.09353756160821144, "grad_norm": 0.0035598163958638906, "learning_rate": 0.001, "loss": 0.3724, "step": 3390 }, { "epoch": 0.09356515380927581, "grad_norm": 0.002589760348200798, "learning_rate": 0.001, "loss": 0.3717, "step": 3391 }, { "epoch": 0.09359274601034018, "grad_norm": 0.003679790301248431, "learning_rate": 0.001, "loss": 0.3774, "step": 3392 }, { "epoch": 0.09362033821140454, "grad_norm": 0.0021475160028785467, "learning_rate": 0.001, "loss": 0.4379, "step": 3393 }, { "epoch": 0.09364793041246891, "grad_norm": 0.0025992344599217176, "learning_rate": 0.001, "loss": 0.3772, "step": 3394 }, { "epoch": 0.09367552261353329, "grad_norm": 0.003233823226764798, "learning_rate": 0.001, "loss": 0.3825, "step": 3395 }, { "epoch": 0.09370311481459766, "grad_norm": 0.003669161582365632, "learning_rate": 0.001, "loss": 0.4085, "step": 3396 }, { "epoch": 0.09373070701566202, "grad_norm": 0.0025238466914743185, "learning_rate": 0.001, "loss": 0.4148, "step": 3397 }, { "epoch": 0.09375829921672639, "grad_norm": 0.0033630593679845333, "learning_rate": 0.001, "loss": 0.3781, "step": 3398 }, { "epoch": 0.09378589141779076, "grad_norm": 0.003087608842179179, "learning_rate": 0.001, "loss": 0.3903, "step": 3399 }, { "epoch": 0.09381348361885514, "grad_norm": 0.003995342645794153, "learning_rate": 0.001, "loss": 0.3936, "step": 3400 }, { "epoch": 0.0938410758199195, "grad_norm": 0.0032711985986679792, "learning_rate": 0.001, "loss": 0.3869, "step": 3401 }, { "epoch": 0.09386866802098387, "grad_norm": 0.003242811420932412, "learning_rate": 0.001, "loss": 0.4031, "step": 3402 }, { "epoch": 0.09389626022204824, "grad_norm": 0.003735556034371257, "learning_rate": 0.001, "loss": 0.3986, "step": 3403 }, { "epoch": 0.0939238524231126, "grad_norm": 0.005183308385312557, "learning_rate": 0.001, "loss": 0.3932, "step": 3404 }, { "epoch": 0.09395144462417698, "grad_norm": 0.0034695270005613565, "learning_rate": 0.001, "loss": 0.3867, "step": 3405 }, { "epoch": 0.09397903682524135, "grad_norm": 0.002555908402428031, "learning_rate": 0.001, "loss": 0.3941, "step": 3406 }, { "epoch": 0.09400662902630572, "grad_norm": 0.0033101667650043964, "learning_rate": 0.001, "loss": 0.4072, "step": 3407 }, { "epoch": 0.09403422122737008, "grad_norm": 0.0035219481214880943, "learning_rate": 0.001, "loss": 0.4187, "step": 3408 }, { "epoch": 0.09406181342843445, "grad_norm": 0.002841345267370343, "learning_rate": 0.001, "loss": 0.4292, "step": 3409 }, { "epoch": 0.09408940562949883, "grad_norm": 0.0024063391610980034, "learning_rate": 0.001, "loss": 0.3992, "step": 3410 }, { "epoch": 0.0941169978305632, "grad_norm": 0.004291092045605183, "learning_rate": 0.001, "loss": 0.3888, "step": 3411 }, { "epoch": 0.09414459003162756, "grad_norm": 0.00434610852971673, "learning_rate": 0.001, "loss": 0.4221, "step": 3412 }, { "epoch": 0.09417218223269193, "grad_norm": 0.002690440509468317, "learning_rate": 0.001, "loss": 0.461, "step": 3413 }, { "epoch": 0.0941997744337563, "grad_norm": 0.004084399435669184, "learning_rate": 0.001, "loss": 0.3907, "step": 3414 }, { "epoch": 0.09422736663482066, "grad_norm": 0.003913892433047295, "learning_rate": 0.001, "loss": 0.4183, "step": 3415 }, { "epoch": 0.09425495883588504, "grad_norm": 0.002662160899490118, "learning_rate": 0.001, "loss": 0.4368, "step": 3416 }, { "epoch": 0.09428255103694941, "grad_norm": 0.0030965355690568686, "learning_rate": 0.001, "loss": 0.4074, "step": 3417 }, { "epoch": 0.09431014323801377, "grad_norm": 0.00348603050224483, "learning_rate": 0.001, "loss": 0.4088, "step": 3418 }, { "epoch": 0.09433773543907814, "grad_norm": 0.002485208213329315, "learning_rate": 0.001, "loss": 0.3941, "step": 3419 }, { "epoch": 0.09436532764014251, "grad_norm": 0.0029183162841945887, "learning_rate": 0.001, "loss": 0.3522, "step": 3420 }, { "epoch": 0.09439291984120689, "grad_norm": 0.0036348486319184303, "learning_rate": 0.001, "loss": 0.3738, "step": 3421 }, { "epoch": 0.09442051204227125, "grad_norm": 0.03472241014242172, "learning_rate": 0.001, "loss": 0.4118, "step": 3422 }, { "epoch": 0.09444810424333562, "grad_norm": 0.0032440361101180315, "learning_rate": 0.001, "loss": 0.3886, "step": 3423 }, { "epoch": 0.09447569644439999, "grad_norm": 0.0023810886777937412, "learning_rate": 0.001, "loss": 0.4417, "step": 3424 }, { "epoch": 0.09450328864546435, "grad_norm": 0.0022793947719037533, "learning_rate": 0.001, "loss": 0.4211, "step": 3425 }, { "epoch": 0.09453088084652873, "grad_norm": 0.0028985291719436646, "learning_rate": 0.001, "loss": 0.413, "step": 3426 }, { "epoch": 0.0945584730475931, "grad_norm": 0.00440243910998106, "learning_rate": 0.001, "loss": 0.4098, "step": 3427 }, { "epoch": 0.09458606524865747, "grad_norm": 0.003319642972201109, "learning_rate": 0.001, "loss": 0.3957, "step": 3428 }, { "epoch": 0.09461365744972183, "grad_norm": 0.016772592440247536, "learning_rate": 0.001, "loss": 0.4109, "step": 3429 }, { "epoch": 0.0946412496507862, "grad_norm": 0.005076760891824961, "learning_rate": 0.001, "loss": 0.3902, "step": 3430 }, { "epoch": 0.09466884185185058, "grad_norm": 0.005572374444454908, "learning_rate": 0.001, "loss": 0.3829, "step": 3431 }, { "epoch": 0.09469643405291495, "grad_norm": 0.004039878491312265, "learning_rate": 0.001, "loss": 0.3811, "step": 3432 }, { "epoch": 0.09472402625397931, "grad_norm": 0.0031457028817385435, "learning_rate": 0.001, "loss": 0.4291, "step": 3433 }, { "epoch": 0.09475161845504368, "grad_norm": 0.0031064285431057215, "learning_rate": 0.001, "loss": 0.3624, "step": 3434 }, { "epoch": 0.09477921065610805, "grad_norm": 0.00337353372015059, "learning_rate": 0.001, "loss": 0.3938, "step": 3435 }, { "epoch": 0.09480680285717243, "grad_norm": 0.0029832145664840937, "learning_rate": 0.001, "loss": 0.4154, "step": 3436 }, { "epoch": 0.09483439505823679, "grad_norm": 0.0031751911155879498, "learning_rate": 0.001, "loss": 0.3812, "step": 3437 }, { "epoch": 0.09486198725930116, "grad_norm": 0.056237224489450455, "learning_rate": 0.001, "loss": 0.4069, "step": 3438 }, { "epoch": 0.09488957946036553, "grad_norm": 0.010076162405312061, "learning_rate": 0.001, "loss": 0.3568, "step": 3439 }, { "epoch": 0.09491717166142989, "grad_norm": 0.0027662264183163643, "learning_rate": 0.001, "loss": 0.3932, "step": 3440 }, { "epoch": 0.09494476386249427, "grad_norm": 0.0028696192894130945, "learning_rate": 0.001, "loss": 0.3689, "step": 3441 }, { "epoch": 0.09497235606355864, "grad_norm": 0.0029843011870980263, "learning_rate": 0.001, "loss": 0.3988, "step": 3442 }, { "epoch": 0.094999948264623, "grad_norm": 0.0037254204507917166, "learning_rate": 0.001, "loss": 0.4055, "step": 3443 }, { "epoch": 0.09502754046568737, "grad_norm": 0.0030714944005012512, "learning_rate": 0.001, "loss": 0.4093, "step": 3444 }, { "epoch": 0.09505513266675174, "grad_norm": 0.0032339338213205338, "learning_rate": 0.001, "loss": 0.3934, "step": 3445 }, { "epoch": 0.09508272486781612, "grad_norm": 0.002931388793513179, "learning_rate": 0.001, "loss": 0.3719, "step": 3446 }, { "epoch": 0.09511031706888048, "grad_norm": 0.0032540869433432817, "learning_rate": 0.001, "loss": 0.4122, "step": 3447 }, { "epoch": 0.09513790926994485, "grad_norm": 0.003339814953505993, "learning_rate": 0.001, "loss": 0.3725, "step": 3448 }, { "epoch": 0.09516550147100922, "grad_norm": 0.0028484398499131203, "learning_rate": 0.001, "loss": 0.4023, "step": 3449 }, { "epoch": 0.09519309367207358, "grad_norm": 0.0028640010859817266, "learning_rate": 0.001, "loss": 0.3643, "step": 3450 }, { "epoch": 0.09522068587313796, "grad_norm": 0.003085035365074873, "learning_rate": 0.001, "loss": 0.4297, "step": 3451 }, { "epoch": 0.09524827807420233, "grad_norm": 0.0024918443523347378, "learning_rate": 0.001, "loss": 0.3792, "step": 3452 }, { "epoch": 0.0952758702752667, "grad_norm": 0.0036890755873173475, "learning_rate": 0.001, "loss": 0.4053, "step": 3453 }, { "epoch": 0.09530346247633106, "grad_norm": 0.002531822072342038, "learning_rate": 0.001, "loss": 0.4078, "step": 3454 }, { "epoch": 0.09533105467739543, "grad_norm": 0.00243132165633142, "learning_rate": 0.001, "loss": 0.4177, "step": 3455 }, { "epoch": 0.09535864687845981, "grad_norm": 0.0061992863193154335, "learning_rate": 0.001, "loss": 0.4075, "step": 3456 }, { "epoch": 0.09538623907952418, "grad_norm": 0.0030754937324672937, "learning_rate": 0.001, "loss": 0.3879, "step": 3457 }, { "epoch": 0.09541383128058854, "grad_norm": 0.0024158579763025045, "learning_rate": 0.001, "loss": 0.393, "step": 3458 }, { "epoch": 0.09544142348165291, "grad_norm": 0.002696392824873328, "learning_rate": 0.001, "loss": 0.3766, "step": 3459 }, { "epoch": 0.09546901568271728, "grad_norm": 0.0024891409557312727, "learning_rate": 0.001, "loss": 0.3993, "step": 3460 }, { "epoch": 0.09549660788378164, "grad_norm": 0.004480718169361353, "learning_rate": 0.001, "loss": 0.3939, "step": 3461 }, { "epoch": 0.09552420008484602, "grad_norm": 0.0028512163553386927, "learning_rate": 0.001, "loss": 0.3774, "step": 3462 }, { "epoch": 0.09555179228591039, "grad_norm": 0.003078205045312643, "learning_rate": 0.001, "loss": 0.4043, "step": 3463 }, { "epoch": 0.09557938448697476, "grad_norm": 0.004663311876356602, "learning_rate": 0.001, "loss": 0.3878, "step": 3464 }, { "epoch": 0.09560697668803912, "grad_norm": 0.002411546418443322, "learning_rate": 0.001, "loss": 0.3804, "step": 3465 }, { "epoch": 0.09563456888910349, "grad_norm": 0.0031327796168625355, "learning_rate": 0.001, "loss": 0.4213, "step": 3466 }, { "epoch": 0.09566216109016787, "grad_norm": 0.0027790337335318327, "learning_rate": 0.001, "loss": 0.4219, "step": 3467 }, { "epoch": 0.09568975329123224, "grad_norm": 0.0028543812222778797, "learning_rate": 0.001, "loss": 0.4288, "step": 3468 }, { "epoch": 0.0957173454922966, "grad_norm": 0.003498122561722994, "learning_rate": 0.001, "loss": 0.3993, "step": 3469 }, { "epoch": 0.09574493769336097, "grad_norm": 0.00663920259103179, "learning_rate": 0.001, "loss": 0.4401, "step": 3470 }, { "epoch": 0.09577252989442533, "grad_norm": 0.0032984658610075712, "learning_rate": 0.001, "loss": 0.3899, "step": 3471 }, { "epoch": 0.09580012209548971, "grad_norm": 0.0030672100838273764, "learning_rate": 0.001, "loss": 0.411, "step": 3472 }, { "epoch": 0.09582771429655408, "grad_norm": 0.003116041421890259, "learning_rate": 0.001, "loss": 0.4194, "step": 3473 }, { "epoch": 0.09585530649761845, "grad_norm": 0.0065819453448057175, "learning_rate": 0.001, "loss": 0.4178, "step": 3474 }, { "epoch": 0.09588289869868281, "grad_norm": 0.015010586008429527, "learning_rate": 0.001, "loss": 0.4313, "step": 3475 }, { "epoch": 0.09591049089974718, "grad_norm": 0.008877400308847427, "learning_rate": 0.001, "loss": 0.4106, "step": 3476 }, { "epoch": 0.09593808310081156, "grad_norm": 0.004334705416113138, "learning_rate": 0.001, "loss": 0.3934, "step": 3477 }, { "epoch": 0.09596567530187593, "grad_norm": 0.0028876853175461292, "learning_rate": 0.001, "loss": 0.3759, "step": 3478 }, { "epoch": 0.0959932675029403, "grad_norm": 0.0028831001836806536, "learning_rate": 0.001, "loss": 0.398, "step": 3479 }, { "epoch": 0.09602085970400466, "grad_norm": 0.002438000636175275, "learning_rate": 0.001, "loss": 0.3728, "step": 3480 }, { "epoch": 0.09604845190506903, "grad_norm": 0.004020646680146456, "learning_rate": 0.001, "loss": 0.3893, "step": 3481 }, { "epoch": 0.09607604410613341, "grad_norm": 0.0025036863517016172, "learning_rate": 0.001, "loss": 0.3881, "step": 3482 }, { "epoch": 0.09610363630719777, "grad_norm": 0.0033008300233632326, "learning_rate": 0.001, "loss": 0.4334, "step": 3483 }, { "epoch": 0.09613122850826214, "grad_norm": 0.002855231985449791, "learning_rate": 0.001, "loss": 0.4157, "step": 3484 }, { "epoch": 0.0961588207093265, "grad_norm": 0.008589272387325764, "learning_rate": 0.001, "loss": 0.436, "step": 3485 }, { "epoch": 0.09618641291039087, "grad_norm": 0.0025324004236608744, "learning_rate": 0.001, "loss": 0.3986, "step": 3486 }, { "epoch": 0.09621400511145525, "grad_norm": 0.003534125629812479, "learning_rate": 0.001, "loss": 0.3844, "step": 3487 }, { "epoch": 0.09624159731251962, "grad_norm": 0.003250879468396306, "learning_rate": 0.001, "loss": 0.4009, "step": 3488 }, { "epoch": 0.09626918951358399, "grad_norm": 0.004198684822767973, "learning_rate": 0.001, "loss": 0.381, "step": 3489 }, { "epoch": 0.09629678171464835, "grad_norm": 0.0034648876171559095, "learning_rate": 0.001, "loss": 0.3931, "step": 3490 }, { "epoch": 0.09632437391571272, "grad_norm": 0.003584906691685319, "learning_rate": 0.001, "loss": 0.3903, "step": 3491 }, { "epoch": 0.0963519661167771, "grad_norm": 0.0033312232699245214, "learning_rate": 0.001, "loss": 0.405, "step": 3492 }, { "epoch": 0.09637955831784147, "grad_norm": 0.0032407655380666256, "learning_rate": 0.001, "loss": 0.4217, "step": 3493 }, { "epoch": 0.09640715051890583, "grad_norm": 0.00250063999556005, "learning_rate": 0.001, "loss": 0.4121, "step": 3494 }, { "epoch": 0.0964347427199702, "grad_norm": 0.0022465146612375975, "learning_rate": 0.001, "loss": 0.387, "step": 3495 }, { "epoch": 0.09646233492103456, "grad_norm": 0.002582980552688241, "learning_rate": 0.001, "loss": 0.393, "step": 3496 }, { "epoch": 0.09648992712209895, "grad_norm": 0.003146865637972951, "learning_rate": 0.001, "loss": 0.375, "step": 3497 }, { "epoch": 0.09651751932316331, "grad_norm": 0.004826393909752369, "learning_rate": 0.001, "loss": 0.3753, "step": 3498 }, { "epoch": 0.09654511152422768, "grad_norm": 0.004438623785972595, "learning_rate": 0.001, "loss": 0.4301, "step": 3499 }, { "epoch": 0.09657270372529204, "grad_norm": 0.002986249513924122, "learning_rate": 0.001, "loss": 0.4145, "step": 3500 }, { "epoch": 0.09657270372529204, "eval_runtime": 24.1649, "eval_samples_per_second": 1.324, "eval_steps_per_second": 0.166, "step": 3500 }, { "epoch": 0.09660029592635641, "grad_norm": 0.002994472160935402, "learning_rate": 0.001, "loss": 0.395, "step": 3501 }, { "epoch": 0.09662788812742079, "grad_norm": 0.006290449295192957, "learning_rate": 0.001, "loss": 0.4092, "step": 3502 }, { "epoch": 0.09665548032848516, "grad_norm": 0.0027024163864552975, "learning_rate": 0.001, "loss": 0.3844, "step": 3503 }, { "epoch": 0.09668307252954952, "grad_norm": 0.0026126017328351736, "learning_rate": 0.001, "loss": 0.4056, "step": 3504 }, { "epoch": 0.09671066473061389, "grad_norm": 0.002393176080659032, "learning_rate": 0.001, "loss": 0.4, "step": 3505 }, { "epoch": 0.09673825693167826, "grad_norm": 0.0025012970436364412, "learning_rate": 0.001, "loss": 0.4309, "step": 3506 }, { "epoch": 0.09676584913274262, "grad_norm": 0.0026587117463350296, "learning_rate": 0.001, "loss": 0.394, "step": 3507 }, { "epoch": 0.096793441333807, "grad_norm": 0.004255054052919149, "learning_rate": 0.001, "loss": 0.3657, "step": 3508 }, { "epoch": 0.09682103353487137, "grad_norm": 0.0029915212653577328, "learning_rate": 0.001, "loss": 0.4106, "step": 3509 }, { "epoch": 0.09684862573593574, "grad_norm": 0.002514585852622986, "learning_rate": 0.001, "loss": 0.4136, "step": 3510 }, { "epoch": 0.0968762179370001, "grad_norm": 0.0031415291596204042, "learning_rate": 0.001, "loss": 0.4172, "step": 3511 }, { "epoch": 0.09690381013806447, "grad_norm": 0.004052399192005396, "learning_rate": 0.001, "loss": 0.3988, "step": 3512 }, { "epoch": 0.09693140233912885, "grad_norm": 0.003938235808163881, "learning_rate": 0.001, "loss": 0.3865, "step": 3513 }, { "epoch": 0.09695899454019322, "grad_norm": 0.0027284801471978426, "learning_rate": 0.001, "loss": 0.4085, "step": 3514 }, { "epoch": 0.09698658674125758, "grad_norm": 0.0021578462328761816, "learning_rate": 0.001, "loss": 0.4513, "step": 3515 }, { "epoch": 0.09701417894232195, "grad_norm": 0.0030869885813444853, "learning_rate": 0.001, "loss": 0.4159, "step": 3516 }, { "epoch": 0.09704177114338632, "grad_norm": 0.0028977058827877045, "learning_rate": 0.001, "loss": 0.4035, "step": 3517 }, { "epoch": 0.0970693633444507, "grad_norm": 0.0025139932986348867, "learning_rate": 0.001, "loss": 0.4132, "step": 3518 }, { "epoch": 0.09709695554551506, "grad_norm": 0.0022785612381994724, "learning_rate": 0.001, "loss": 0.4084, "step": 3519 }, { "epoch": 0.09712454774657943, "grad_norm": 0.002874610014259815, "learning_rate": 0.001, "loss": 0.3645, "step": 3520 }, { "epoch": 0.0971521399476438, "grad_norm": 0.004266508389264345, "learning_rate": 0.001, "loss": 0.3449, "step": 3521 }, { "epoch": 0.09717973214870816, "grad_norm": 0.0025132927112281322, "learning_rate": 0.001, "loss": 0.3937, "step": 3522 }, { "epoch": 0.09720732434977254, "grad_norm": 0.006651229690760374, "learning_rate": 0.001, "loss": 0.3854, "step": 3523 }, { "epoch": 0.09723491655083691, "grad_norm": 0.0053511932492256165, "learning_rate": 0.001, "loss": 0.3865, "step": 3524 }, { "epoch": 0.09726250875190127, "grad_norm": 0.0037280949763953686, "learning_rate": 0.001, "loss": 0.4106, "step": 3525 }, { "epoch": 0.09729010095296564, "grad_norm": 0.0037587578408420086, "learning_rate": 0.001, "loss": 0.4043, "step": 3526 }, { "epoch": 0.09731769315403001, "grad_norm": 0.0037075839936733246, "learning_rate": 0.001, "loss": 0.3948, "step": 3527 }, { "epoch": 0.09734528535509439, "grad_norm": 0.004948961082845926, "learning_rate": 0.001, "loss": 0.4124, "step": 3528 }, { "epoch": 0.09737287755615875, "grad_norm": 0.006435552146285772, "learning_rate": 0.001, "loss": 0.3938, "step": 3529 }, { "epoch": 0.09740046975722312, "grad_norm": 0.0021128810476511717, "learning_rate": 0.001, "loss": 0.41, "step": 3530 }, { "epoch": 0.09742806195828749, "grad_norm": 0.0025616176426410675, "learning_rate": 0.001, "loss": 0.4178, "step": 3531 }, { "epoch": 0.09745565415935185, "grad_norm": 0.00574469892308116, "learning_rate": 0.001, "loss": 0.4012, "step": 3532 }, { "epoch": 0.09748324636041623, "grad_norm": 0.0031522875651717186, "learning_rate": 0.001, "loss": 0.3611, "step": 3533 }, { "epoch": 0.0975108385614806, "grad_norm": 0.0031021784525364637, "learning_rate": 0.001, "loss": 0.3709, "step": 3534 }, { "epoch": 0.09753843076254497, "grad_norm": 0.005699521396309137, "learning_rate": 0.001, "loss": 0.4012, "step": 3535 }, { "epoch": 0.09756602296360933, "grad_norm": 0.003169464645907283, "learning_rate": 0.001, "loss": 0.3986, "step": 3536 }, { "epoch": 0.0975936151646737, "grad_norm": 0.003686879761517048, "learning_rate": 0.001, "loss": 0.4229, "step": 3537 }, { "epoch": 0.09762120736573808, "grad_norm": 0.0031342299189418554, "learning_rate": 0.001, "loss": 0.4062, "step": 3538 }, { "epoch": 0.09764879956680245, "grad_norm": 0.0023811724968254566, "learning_rate": 0.001, "loss": 0.4048, "step": 3539 }, { "epoch": 0.09767639176786681, "grad_norm": 0.003792383009567857, "learning_rate": 0.001, "loss": 0.4013, "step": 3540 }, { "epoch": 0.09770398396893118, "grad_norm": 0.0024562154430896044, "learning_rate": 0.001, "loss": 0.4049, "step": 3541 }, { "epoch": 0.09773157616999555, "grad_norm": 0.0034171422012150288, "learning_rate": 0.001, "loss": 0.3906, "step": 3542 }, { "epoch": 0.09775916837105993, "grad_norm": 0.003149349009618163, "learning_rate": 0.001, "loss": 0.3842, "step": 3543 }, { "epoch": 0.09778676057212429, "grad_norm": 0.002210955834016204, "learning_rate": 0.001, "loss": 0.4131, "step": 3544 }, { "epoch": 0.09781435277318866, "grad_norm": 0.003593403846025467, "learning_rate": 0.001, "loss": 0.3938, "step": 3545 }, { "epoch": 0.09784194497425303, "grad_norm": 0.0022881280165165663, "learning_rate": 0.001, "loss": 0.441, "step": 3546 }, { "epoch": 0.09786953717531739, "grad_norm": 0.002236244734376669, "learning_rate": 0.001, "loss": 0.4306, "step": 3547 }, { "epoch": 0.09789712937638177, "grad_norm": 0.004320134408771992, "learning_rate": 0.001, "loss": 0.3519, "step": 3548 }, { "epoch": 0.09792472157744614, "grad_norm": 0.003616459434852004, "learning_rate": 0.001, "loss": 0.3748, "step": 3549 }, { "epoch": 0.0979523137785105, "grad_norm": 0.0072233411483466625, "learning_rate": 0.001, "loss": 0.3692, "step": 3550 }, { "epoch": 0.09797990597957487, "grad_norm": 0.017990432679653168, "learning_rate": 0.001, "loss": 0.4225, "step": 3551 }, { "epoch": 0.09800749818063924, "grad_norm": 0.0036775225307792425, "learning_rate": 0.001, "loss": 0.3654, "step": 3552 }, { "epoch": 0.0980350903817036, "grad_norm": 0.0034110434353351593, "learning_rate": 0.001, "loss": 0.3865, "step": 3553 }, { "epoch": 0.09806268258276798, "grad_norm": 0.005091147031635046, "learning_rate": 0.001, "loss": 0.385, "step": 3554 }, { "epoch": 0.09809027478383235, "grad_norm": 0.011450120247900486, "learning_rate": 0.001, "loss": 0.4183, "step": 3555 }, { "epoch": 0.09811786698489672, "grad_norm": 0.004091903567314148, "learning_rate": 0.001, "loss": 0.4219, "step": 3556 }, { "epoch": 0.09814545918596108, "grad_norm": 0.00835462100803852, "learning_rate": 0.001, "loss": 0.4033, "step": 3557 }, { "epoch": 0.09817305138702545, "grad_norm": 0.0032325852662324905, "learning_rate": 0.001, "loss": 0.4103, "step": 3558 }, { "epoch": 0.09820064358808983, "grad_norm": 0.003423454938456416, "learning_rate": 0.001, "loss": 0.4135, "step": 3559 }, { "epoch": 0.0982282357891542, "grad_norm": 0.0024230678100138903, "learning_rate": 0.001, "loss": 0.4107, "step": 3560 }, { "epoch": 0.09825582799021856, "grad_norm": 0.003030325984582305, "learning_rate": 0.001, "loss": 0.4148, "step": 3561 }, { "epoch": 0.09828342019128293, "grad_norm": 0.0031275860965251923, "learning_rate": 0.001, "loss": 0.3591, "step": 3562 }, { "epoch": 0.0983110123923473, "grad_norm": 0.010123792104423046, "learning_rate": 0.001, "loss": 0.3923, "step": 3563 }, { "epoch": 0.09833860459341168, "grad_norm": 0.0048133572563529015, "learning_rate": 0.001, "loss": 0.3917, "step": 3564 }, { "epoch": 0.09836619679447604, "grad_norm": 0.002648024819791317, "learning_rate": 0.001, "loss": 0.4199, "step": 3565 }, { "epoch": 0.09839378899554041, "grad_norm": 0.006900025065988302, "learning_rate": 0.001, "loss": 0.4124, "step": 3566 }, { "epoch": 0.09842138119660478, "grad_norm": 0.011950865387916565, "learning_rate": 0.001, "loss": 0.4279, "step": 3567 }, { "epoch": 0.09844897339766914, "grad_norm": 0.0075233737006783485, "learning_rate": 0.001, "loss": 0.3639, "step": 3568 }, { "epoch": 0.09847656559873352, "grad_norm": 0.006645913701504469, "learning_rate": 0.001, "loss": 0.4328, "step": 3569 }, { "epoch": 0.09850415779979789, "grad_norm": 0.005523411091417074, "learning_rate": 0.001, "loss": 0.4071, "step": 3570 }, { "epoch": 0.09853175000086226, "grad_norm": 0.006219461094588041, "learning_rate": 0.001, "loss": 0.3802, "step": 3571 }, { "epoch": 0.09855934220192662, "grad_norm": 0.002192398766055703, "learning_rate": 0.001, "loss": 0.3657, "step": 3572 }, { "epoch": 0.09858693440299099, "grad_norm": 0.002795976819470525, "learning_rate": 0.001, "loss": 0.3928, "step": 3573 }, { "epoch": 0.09861452660405537, "grad_norm": 0.0030005681328475475, "learning_rate": 0.001, "loss": 0.3737, "step": 3574 }, { "epoch": 0.09864211880511974, "grad_norm": 0.002711366629227996, "learning_rate": 0.001, "loss": 0.4349, "step": 3575 }, { "epoch": 0.0986697110061841, "grad_norm": 0.0025707653257995844, "learning_rate": 0.001, "loss": 0.4344, "step": 3576 }, { "epoch": 0.09869730320724847, "grad_norm": 0.002858672058209777, "learning_rate": 0.001, "loss": 0.3998, "step": 3577 }, { "epoch": 0.09872489540831283, "grad_norm": 0.003099956549704075, "learning_rate": 0.001, "loss": 0.4192, "step": 3578 }, { "epoch": 0.09875248760937722, "grad_norm": 0.014343291521072388, "learning_rate": 0.001, "loss": 0.3881, "step": 3579 }, { "epoch": 0.09878007981044158, "grad_norm": 0.0043120840564370155, "learning_rate": 0.001, "loss": 0.3958, "step": 3580 }, { "epoch": 0.09880767201150595, "grad_norm": 0.0042751263827085495, "learning_rate": 0.001, "loss": 0.3954, "step": 3581 }, { "epoch": 0.09883526421257031, "grad_norm": 0.0026994547806680202, "learning_rate": 0.001, "loss": 0.3887, "step": 3582 }, { "epoch": 0.09886285641363468, "grad_norm": 0.002389197936281562, "learning_rate": 0.001, "loss": 0.419, "step": 3583 }, { "epoch": 0.09889044861469906, "grad_norm": 0.003209868213161826, "learning_rate": 0.001, "loss": 0.4094, "step": 3584 }, { "epoch": 0.09891804081576343, "grad_norm": 0.004312742967158556, "learning_rate": 0.001, "loss": 0.3506, "step": 3585 }, { "epoch": 0.0989456330168278, "grad_norm": 0.003966677933931351, "learning_rate": 0.001, "loss": 0.426, "step": 3586 }, { "epoch": 0.09897322521789216, "grad_norm": 0.008509870618581772, "learning_rate": 0.001, "loss": 0.402, "step": 3587 }, { "epoch": 0.09900081741895653, "grad_norm": 0.003062241477891803, "learning_rate": 0.001, "loss": 0.3783, "step": 3588 }, { "epoch": 0.09902840962002091, "grad_norm": 0.0022860439494252205, "learning_rate": 0.001, "loss": 0.426, "step": 3589 }, { "epoch": 0.09905600182108527, "grad_norm": 0.004430666100233793, "learning_rate": 0.001, "loss": 0.4015, "step": 3590 }, { "epoch": 0.09908359402214964, "grad_norm": 0.002458814997226, "learning_rate": 0.001, "loss": 0.417, "step": 3591 }, { "epoch": 0.099111186223214, "grad_norm": 0.0029090861789882183, "learning_rate": 0.001, "loss": 0.3965, "step": 3592 }, { "epoch": 0.09913877842427837, "grad_norm": 0.0032796203158795834, "learning_rate": 0.001, "loss": 0.3852, "step": 3593 }, { "epoch": 0.09916637062534275, "grad_norm": 0.002556765917688608, "learning_rate": 0.001, "loss": 0.4206, "step": 3594 }, { "epoch": 0.09919396282640712, "grad_norm": 0.0023709458764642477, "learning_rate": 0.001, "loss": 0.415, "step": 3595 }, { "epoch": 0.09922155502747149, "grad_norm": 0.003395737847313285, "learning_rate": 0.001, "loss": 0.3762, "step": 3596 }, { "epoch": 0.09924914722853585, "grad_norm": 0.0026961613912135363, "learning_rate": 0.001, "loss": 0.3874, "step": 3597 }, { "epoch": 0.09927673942960022, "grad_norm": 0.0020530601032078266, "learning_rate": 0.001, "loss": 0.4465, "step": 3598 }, { "epoch": 0.0993043316306646, "grad_norm": 0.0026466413401067257, "learning_rate": 0.001, "loss": 0.4052, "step": 3599 }, { "epoch": 0.09933192383172897, "grad_norm": 0.003969122655689716, "learning_rate": 0.001, "loss": 0.406, "step": 3600 }, { "epoch": 0.09935951603279333, "grad_norm": 0.0035336946602910757, "learning_rate": 0.001, "loss": 0.3836, "step": 3601 }, { "epoch": 0.0993871082338577, "grad_norm": 0.0030988729558885098, "learning_rate": 0.001, "loss": 0.3876, "step": 3602 }, { "epoch": 0.09941470043492207, "grad_norm": 0.0029431709554046392, "learning_rate": 0.001, "loss": 0.3831, "step": 3603 }, { "epoch": 0.09944229263598643, "grad_norm": 0.002947601256892085, "learning_rate": 0.001, "loss": 0.3737, "step": 3604 }, { "epoch": 0.09946988483705081, "grad_norm": 0.002736450405791402, "learning_rate": 0.001, "loss": 0.4254, "step": 3605 }, { "epoch": 0.09949747703811518, "grad_norm": 0.0027800817042589188, "learning_rate": 0.001, "loss": 0.4183, "step": 3606 }, { "epoch": 0.09952506923917954, "grad_norm": 0.003060448681935668, "learning_rate": 0.001, "loss": 0.3972, "step": 3607 }, { "epoch": 0.09955266144024391, "grad_norm": 0.0032676290720701218, "learning_rate": 0.001, "loss": 0.3543, "step": 3608 }, { "epoch": 0.09958025364130828, "grad_norm": 0.0039128996431827545, "learning_rate": 0.001, "loss": 0.398, "step": 3609 }, { "epoch": 0.09960784584237266, "grad_norm": 0.0037549827247858047, "learning_rate": 0.001, "loss": 0.3962, "step": 3610 }, { "epoch": 0.09963543804343702, "grad_norm": 0.00438270578160882, "learning_rate": 0.001, "loss": 0.4057, "step": 3611 }, { "epoch": 0.09966303024450139, "grad_norm": 0.003163162851706147, "learning_rate": 0.001, "loss": 0.3996, "step": 3612 }, { "epoch": 0.09969062244556576, "grad_norm": 0.0033597201108932495, "learning_rate": 0.001, "loss": 0.3825, "step": 3613 }, { "epoch": 0.09971821464663012, "grad_norm": 0.003219824517145753, "learning_rate": 0.001, "loss": 0.4046, "step": 3614 }, { "epoch": 0.0997458068476945, "grad_norm": 0.0035223192535340786, "learning_rate": 0.001, "loss": 0.3996, "step": 3615 }, { "epoch": 0.09977339904875887, "grad_norm": 0.00964295119047165, "learning_rate": 0.001, "loss": 0.4004, "step": 3616 }, { "epoch": 0.09980099124982324, "grad_norm": 0.002427217550575733, "learning_rate": 0.001, "loss": 0.4164, "step": 3617 }, { "epoch": 0.0998285834508876, "grad_norm": 0.004480184521526098, "learning_rate": 0.001, "loss": 0.4118, "step": 3618 }, { "epoch": 0.09985617565195197, "grad_norm": 0.0030322298407554626, "learning_rate": 0.001, "loss": 0.4015, "step": 3619 }, { "epoch": 0.09988376785301635, "grad_norm": 0.02180991880595684, "learning_rate": 0.001, "loss": 0.4125, "step": 3620 }, { "epoch": 0.09991136005408072, "grad_norm": 0.0030360252130776644, "learning_rate": 0.001, "loss": 0.433, "step": 3621 }, { "epoch": 0.09993895225514508, "grad_norm": 0.0025207020808011293, "learning_rate": 0.001, "loss": 0.4051, "step": 3622 }, { "epoch": 0.09996654445620945, "grad_norm": 0.004299542400985956, "learning_rate": 0.001, "loss": 0.4061, "step": 3623 }, { "epoch": 0.09999413665727382, "grad_norm": 0.004565478768199682, "learning_rate": 0.001, "loss": 0.394, "step": 3624 }, { "epoch": 0.1000217288583382, "grad_norm": 0.0028781460132449865, "learning_rate": 0.001, "loss": 0.3982, "step": 3625 }, { "epoch": 0.10004932105940256, "grad_norm": 0.0026228884235024452, "learning_rate": 0.001, "loss": 0.372, "step": 3626 }, { "epoch": 0.10007691326046693, "grad_norm": 0.0029596835374832153, "learning_rate": 0.001, "loss": 0.4371, "step": 3627 }, { "epoch": 0.1001045054615313, "grad_norm": 0.007199972402304411, "learning_rate": 0.001, "loss": 0.3777, "step": 3628 }, { "epoch": 0.10013209766259566, "grad_norm": 0.003069596830755472, "learning_rate": 0.001, "loss": 0.4199, "step": 3629 }, { "epoch": 0.10015968986366004, "grad_norm": 0.002500646049156785, "learning_rate": 0.001, "loss": 0.4078, "step": 3630 }, { "epoch": 0.10018728206472441, "grad_norm": 0.0024019493721425533, "learning_rate": 0.001, "loss": 0.4107, "step": 3631 }, { "epoch": 0.10021487426578878, "grad_norm": 0.0032364402431994677, "learning_rate": 0.001, "loss": 0.4136, "step": 3632 }, { "epoch": 0.10024246646685314, "grad_norm": 0.00393798528239131, "learning_rate": 0.001, "loss": 0.3833, "step": 3633 }, { "epoch": 0.10027005866791751, "grad_norm": 0.0036344374530017376, "learning_rate": 0.001, "loss": 0.4075, "step": 3634 }, { "epoch": 0.10029765086898189, "grad_norm": 0.003431879449635744, "learning_rate": 0.001, "loss": 0.3957, "step": 3635 }, { "epoch": 0.10032524307004625, "grad_norm": 0.003278666641563177, "learning_rate": 0.001, "loss": 0.4089, "step": 3636 }, { "epoch": 0.10035283527111062, "grad_norm": 0.003398419125005603, "learning_rate": 0.001, "loss": 0.3683, "step": 3637 }, { "epoch": 0.10038042747217499, "grad_norm": 0.002605091081932187, "learning_rate": 0.001, "loss": 0.4285, "step": 3638 }, { "epoch": 0.10040801967323935, "grad_norm": 0.0038700527511537075, "learning_rate": 0.001, "loss": 0.3759, "step": 3639 }, { "epoch": 0.10043561187430373, "grad_norm": 0.002678008284419775, "learning_rate": 0.001, "loss": 0.4215, "step": 3640 }, { "epoch": 0.1004632040753681, "grad_norm": 0.004668472800403833, "learning_rate": 0.001, "loss": 0.3779, "step": 3641 }, { "epoch": 0.10049079627643247, "grad_norm": 0.003300663083791733, "learning_rate": 0.001, "loss": 0.4322, "step": 3642 }, { "epoch": 0.10051838847749683, "grad_norm": 0.0024147978983819485, "learning_rate": 0.001, "loss": 0.3779, "step": 3643 }, { "epoch": 0.1005459806785612, "grad_norm": 0.005644883494824171, "learning_rate": 0.001, "loss": 0.4082, "step": 3644 }, { "epoch": 0.10057357287962558, "grad_norm": 0.004027125891298056, "learning_rate": 0.001, "loss": 0.4173, "step": 3645 }, { "epoch": 0.10060116508068995, "grad_norm": 0.003284280654042959, "learning_rate": 0.001, "loss": 0.3697, "step": 3646 }, { "epoch": 0.10062875728175431, "grad_norm": 0.0025769018102437258, "learning_rate": 0.001, "loss": 0.4057, "step": 3647 }, { "epoch": 0.10065634948281868, "grad_norm": 0.002732061082497239, "learning_rate": 0.001, "loss": 0.4222, "step": 3648 }, { "epoch": 0.10068394168388305, "grad_norm": 0.00284641538746655, "learning_rate": 0.001, "loss": 0.4051, "step": 3649 }, { "epoch": 0.10071153388494741, "grad_norm": 0.0040368628688156605, "learning_rate": 0.001, "loss": 0.4206, "step": 3650 }, { "epoch": 0.1007391260860118, "grad_norm": 0.0030586514621973038, "learning_rate": 0.001, "loss": 0.4049, "step": 3651 }, { "epoch": 0.10076671828707616, "grad_norm": 0.002908149268478155, "learning_rate": 0.001, "loss": 0.4325, "step": 3652 }, { "epoch": 0.10079431048814053, "grad_norm": 0.0038641381543129683, "learning_rate": 0.001, "loss": 0.3943, "step": 3653 }, { "epoch": 0.10082190268920489, "grad_norm": 0.0032964826095849276, "learning_rate": 0.001, "loss": 0.39, "step": 3654 }, { "epoch": 0.10084949489026926, "grad_norm": 0.0040243249386549, "learning_rate": 0.001, "loss": 0.3653, "step": 3655 }, { "epoch": 0.10087708709133364, "grad_norm": 0.004164101555943489, "learning_rate": 0.001, "loss": 0.4091, "step": 3656 }, { "epoch": 0.100904679292398, "grad_norm": 0.003107170108705759, "learning_rate": 0.001, "loss": 0.4099, "step": 3657 }, { "epoch": 0.10093227149346237, "grad_norm": 0.0029900551307946444, "learning_rate": 0.001, "loss": 0.3947, "step": 3658 }, { "epoch": 0.10095986369452674, "grad_norm": 0.0030398843809962273, "learning_rate": 0.001, "loss": 0.4166, "step": 3659 }, { "epoch": 0.1009874558955911, "grad_norm": 0.0038001432549208403, "learning_rate": 0.001, "loss": 0.3986, "step": 3660 }, { "epoch": 0.10101504809665549, "grad_norm": 0.0026995730586349964, "learning_rate": 0.001, "loss": 0.3667, "step": 3661 }, { "epoch": 0.10104264029771985, "grad_norm": 0.0022786613553762436, "learning_rate": 0.001, "loss": 0.4441, "step": 3662 }, { "epoch": 0.10107023249878422, "grad_norm": 0.002397672738879919, "learning_rate": 0.001, "loss": 0.4011, "step": 3663 }, { "epoch": 0.10109782469984858, "grad_norm": 0.002657962031662464, "learning_rate": 0.001, "loss": 0.3886, "step": 3664 }, { "epoch": 0.10112541690091295, "grad_norm": 0.004640195053070784, "learning_rate": 0.001, "loss": 0.4264, "step": 3665 }, { "epoch": 0.10115300910197733, "grad_norm": 0.0031627060379832983, "learning_rate": 0.001, "loss": 0.3886, "step": 3666 }, { "epoch": 0.1011806013030417, "grad_norm": 0.003310044063255191, "learning_rate": 0.001, "loss": 0.4333, "step": 3667 }, { "epoch": 0.10120819350410606, "grad_norm": 0.0024094157852232456, "learning_rate": 0.001, "loss": 0.4183, "step": 3668 }, { "epoch": 0.10123578570517043, "grad_norm": 0.0029870392754673958, "learning_rate": 0.001, "loss": 0.3704, "step": 3669 }, { "epoch": 0.1012633779062348, "grad_norm": 0.002329483861103654, "learning_rate": 0.001, "loss": 0.4167, "step": 3670 }, { "epoch": 0.10129097010729918, "grad_norm": 0.003403107402846217, "learning_rate": 0.001, "loss": 0.4032, "step": 3671 }, { "epoch": 0.10131856230836354, "grad_norm": 0.0027673887088894844, "learning_rate": 0.001, "loss": 0.4072, "step": 3672 }, { "epoch": 0.10134615450942791, "grad_norm": 0.0028799972496926785, "learning_rate": 0.001, "loss": 0.3715, "step": 3673 }, { "epoch": 0.10137374671049228, "grad_norm": 0.003228268353268504, "learning_rate": 0.001, "loss": 0.4128, "step": 3674 }, { "epoch": 0.10140133891155664, "grad_norm": 0.0028087422251701355, "learning_rate": 0.001, "loss": 0.4151, "step": 3675 }, { "epoch": 0.10142893111262102, "grad_norm": 0.0026430152356624603, "learning_rate": 0.001, "loss": 0.3818, "step": 3676 }, { "epoch": 0.10145652331368539, "grad_norm": 0.0036075664684176445, "learning_rate": 0.001, "loss": 0.3834, "step": 3677 }, { "epoch": 0.10148411551474976, "grad_norm": 0.0028451229445636272, "learning_rate": 0.001, "loss": 0.4127, "step": 3678 }, { "epoch": 0.10151170771581412, "grad_norm": 0.0037802942097187042, "learning_rate": 0.001, "loss": 0.4182, "step": 3679 }, { "epoch": 0.10153929991687849, "grad_norm": 0.0029138477984815836, "learning_rate": 0.001, "loss": 0.436, "step": 3680 }, { "epoch": 0.10156689211794287, "grad_norm": 0.003684982191771269, "learning_rate": 0.001, "loss": 0.4201, "step": 3681 }, { "epoch": 0.10159448431900724, "grad_norm": 0.005630989093333483, "learning_rate": 0.001, "loss": 0.3818, "step": 3682 }, { "epoch": 0.1016220765200716, "grad_norm": 0.006454580929130316, "learning_rate": 0.001, "loss": 0.4174, "step": 3683 }, { "epoch": 0.10164966872113597, "grad_norm": 0.007667763624340296, "learning_rate": 0.001, "loss": 0.3887, "step": 3684 }, { "epoch": 0.10167726092220034, "grad_norm": 0.004302634857594967, "learning_rate": 0.001, "loss": 0.3893, "step": 3685 }, { "epoch": 0.10170485312326472, "grad_norm": 0.003696159226819873, "learning_rate": 0.001, "loss": 0.4209, "step": 3686 }, { "epoch": 0.10173244532432908, "grad_norm": 0.004956797696650028, "learning_rate": 0.001, "loss": 0.3985, "step": 3687 }, { "epoch": 0.10176003752539345, "grad_norm": 0.0026316859293729067, "learning_rate": 0.001, "loss": 0.395, "step": 3688 }, { "epoch": 0.10178762972645782, "grad_norm": 0.002009750111028552, "learning_rate": 0.001, "loss": 0.4335, "step": 3689 }, { "epoch": 0.10181522192752218, "grad_norm": 0.003345980541780591, "learning_rate": 0.001, "loss": 0.4194, "step": 3690 }, { "epoch": 0.10184281412858656, "grad_norm": 0.002231464022770524, "learning_rate": 0.001, "loss": 0.4316, "step": 3691 }, { "epoch": 0.10187040632965093, "grad_norm": 0.003336479654535651, "learning_rate": 0.001, "loss": 0.3732, "step": 3692 }, { "epoch": 0.1018979985307153, "grad_norm": 0.002393354196101427, "learning_rate": 0.001, "loss": 0.4018, "step": 3693 }, { "epoch": 0.10192559073177966, "grad_norm": 0.0024670169223099947, "learning_rate": 0.001, "loss": 0.3915, "step": 3694 }, { "epoch": 0.10195318293284403, "grad_norm": 0.0034887620713561773, "learning_rate": 0.001, "loss": 0.4231, "step": 3695 }, { "epoch": 0.1019807751339084, "grad_norm": 0.003547382541000843, "learning_rate": 0.001, "loss": 0.3934, "step": 3696 }, { "epoch": 0.10200836733497277, "grad_norm": 0.0034907760564237833, "learning_rate": 0.001, "loss": 0.4244, "step": 3697 }, { "epoch": 0.10203595953603714, "grad_norm": 0.002545100636780262, "learning_rate": 0.001, "loss": 0.3918, "step": 3698 }, { "epoch": 0.10206355173710151, "grad_norm": 0.004985075909644365, "learning_rate": 0.001, "loss": 0.3924, "step": 3699 }, { "epoch": 0.10209114393816587, "grad_norm": 0.004573920741677284, "learning_rate": 0.001, "loss": 0.3743, "step": 3700 }, { "epoch": 0.10211873613923024, "grad_norm": 0.004074465949088335, "learning_rate": 0.001, "loss": 0.4086, "step": 3701 }, { "epoch": 0.10214632834029462, "grad_norm": 0.0037853543180972338, "learning_rate": 0.001, "loss": 0.4263, "step": 3702 }, { "epoch": 0.10217392054135899, "grad_norm": 0.002464262768626213, "learning_rate": 0.001, "loss": 0.4116, "step": 3703 }, { "epoch": 0.10220151274242335, "grad_norm": 0.0036815868224948645, "learning_rate": 0.001, "loss": 0.388, "step": 3704 }, { "epoch": 0.10222910494348772, "grad_norm": 0.0033971264492720366, "learning_rate": 0.001, "loss": 0.4613, "step": 3705 }, { "epoch": 0.10225669714455209, "grad_norm": 0.0029886479023844004, "learning_rate": 0.001, "loss": 0.413, "step": 3706 }, { "epoch": 0.10228428934561647, "grad_norm": 0.0036080891732126474, "learning_rate": 0.001, "loss": 0.3937, "step": 3707 }, { "epoch": 0.10231188154668083, "grad_norm": 0.0026688736397773027, "learning_rate": 0.001, "loss": 0.432, "step": 3708 }, { "epoch": 0.1023394737477452, "grad_norm": 0.003568600630387664, "learning_rate": 0.001, "loss": 0.3813, "step": 3709 }, { "epoch": 0.10236706594880957, "grad_norm": 0.0037499042227864265, "learning_rate": 0.001, "loss": 0.4049, "step": 3710 }, { "epoch": 0.10239465814987393, "grad_norm": 0.0027967335190624, "learning_rate": 0.001, "loss": 0.4164, "step": 3711 }, { "epoch": 0.10242225035093831, "grad_norm": 0.002266339026391506, "learning_rate": 0.001, "loss": 0.3947, "step": 3712 }, { "epoch": 0.10244984255200268, "grad_norm": 0.002678538439795375, "learning_rate": 0.001, "loss": 0.3994, "step": 3713 }, { "epoch": 0.10247743475306705, "grad_norm": 0.007051249034702778, "learning_rate": 0.001, "loss": 0.3694, "step": 3714 }, { "epoch": 0.10250502695413141, "grad_norm": 0.002434907481074333, "learning_rate": 0.001, "loss": 0.3636, "step": 3715 }, { "epoch": 0.10253261915519578, "grad_norm": 0.002865745685994625, "learning_rate": 0.001, "loss": 0.4641, "step": 3716 }, { "epoch": 0.10256021135626016, "grad_norm": 0.0022143360693007708, "learning_rate": 0.001, "loss": 0.3947, "step": 3717 }, { "epoch": 0.10258780355732453, "grad_norm": 0.0022539508063346148, "learning_rate": 0.001, "loss": 0.4341, "step": 3718 }, { "epoch": 0.10261539575838889, "grad_norm": 0.0032584406435489655, "learning_rate": 0.001, "loss": 0.4105, "step": 3719 }, { "epoch": 0.10264298795945326, "grad_norm": 0.003400850109755993, "learning_rate": 0.001, "loss": 0.3808, "step": 3720 }, { "epoch": 0.10267058016051762, "grad_norm": 0.004264235496520996, "learning_rate": 0.001, "loss": 0.3823, "step": 3721 }, { "epoch": 0.102698172361582, "grad_norm": 0.0028461632318794727, "learning_rate": 0.001, "loss": 0.4444, "step": 3722 }, { "epoch": 0.10272576456264637, "grad_norm": 0.00392636563628912, "learning_rate": 0.001, "loss": 0.3891, "step": 3723 }, { "epoch": 0.10275335676371074, "grad_norm": 0.002802118891850114, "learning_rate": 0.001, "loss": 0.4593, "step": 3724 }, { "epoch": 0.1027809489647751, "grad_norm": 0.0052732862532138824, "learning_rate": 0.001, "loss": 0.4124, "step": 3725 }, { "epoch": 0.10280854116583947, "grad_norm": 0.009514648467302322, "learning_rate": 0.001, "loss": 0.3976, "step": 3726 }, { "epoch": 0.10283613336690385, "grad_norm": 0.0024788815062493086, "learning_rate": 0.001, "loss": 0.4052, "step": 3727 }, { "epoch": 0.10286372556796822, "grad_norm": 0.003208071691915393, "learning_rate": 0.001, "loss": 0.4126, "step": 3728 }, { "epoch": 0.10289131776903258, "grad_norm": 0.002506793709471822, "learning_rate": 0.001, "loss": 0.4013, "step": 3729 }, { "epoch": 0.10291890997009695, "grad_norm": 0.005780140403658152, "learning_rate": 0.001, "loss": 0.3869, "step": 3730 }, { "epoch": 0.10294650217116132, "grad_norm": 0.005190226249396801, "learning_rate": 0.001, "loss": 0.4279, "step": 3731 }, { "epoch": 0.1029740943722257, "grad_norm": 0.005167331546545029, "learning_rate": 0.001, "loss": 0.3991, "step": 3732 }, { "epoch": 0.10300168657329006, "grad_norm": 0.003800647798925638, "learning_rate": 0.001, "loss": 0.3657, "step": 3733 }, { "epoch": 0.10302927877435443, "grad_norm": 0.004843884147703648, "learning_rate": 0.001, "loss": 0.4188, "step": 3734 }, { "epoch": 0.1030568709754188, "grad_norm": 0.0037740804255008698, "learning_rate": 0.001, "loss": 0.3795, "step": 3735 }, { "epoch": 0.10308446317648316, "grad_norm": 0.004264209885150194, "learning_rate": 0.001, "loss": 0.3936, "step": 3736 }, { "epoch": 0.10311205537754754, "grad_norm": 0.007586845196783543, "learning_rate": 0.001, "loss": 0.3892, "step": 3737 }, { "epoch": 0.10313964757861191, "grad_norm": 0.005896701943129301, "learning_rate": 0.001, "loss": 0.4083, "step": 3738 }, { "epoch": 0.10316723977967628, "grad_norm": 0.004077561665326357, "learning_rate": 0.001, "loss": 0.442, "step": 3739 }, { "epoch": 0.10319483198074064, "grad_norm": 0.003110091434791684, "learning_rate": 0.001, "loss": 0.4075, "step": 3740 }, { "epoch": 0.10322242418180501, "grad_norm": 0.0023520744871348143, "learning_rate": 0.001, "loss": 0.428, "step": 3741 }, { "epoch": 0.10325001638286938, "grad_norm": 0.006090542767196894, "learning_rate": 0.001, "loss": 0.3767, "step": 3742 }, { "epoch": 0.10327760858393376, "grad_norm": 0.002889704890549183, "learning_rate": 0.001, "loss": 0.4024, "step": 3743 }, { "epoch": 0.10330520078499812, "grad_norm": 0.0024959116708487272, "learning_rate": 0.001, "loss": 0.3798, "step": 3744 }, { "epoch": 0.10333279298606249, "grad_norm": 0.002503210911527276, "learning_rate": 0.001, "loss": 0.4306, "step": 3745 }, { "epoch": 0.10336038518712685, "grad_norm": 0.0028133681043982506, "learning_rate": 0.001, "loss": 0.4008, "step": 3746 }, { "epoch": 0.10338797738819122, "grad_norm": 0.0032689927611500025, "learning_rate": 0.001, "loss": 0.3972, "step": 3747 }, { "epoch": 0.1034155695892556, "grad_norm": 0.002301878994330764, "learning_rate": 0.001, "loss": 0.4217, "step": 3748 }, { "epoch": 0.10344316179031997, "grad_norm": 0.0024407797027379274, "learning_rate": 0.001, "loss": 0.376, "step": 3749 }, { "epoch": 0.10347075399138433, "grad_norm": 0.0035565400030463934, "learning_rate": 0.001, "loss": 0.3959, "step": 3750 }, { "epoch": 0.1034983461924487, "grad_norm": 0.002976832212880254, "learning_rate": 0.001, "loss": 0.3942, "step": 3751 }, { "epoch": 0.10352593839351307, "grad_norm": 0.0027037430554628372, "learning_rate": 0.001, "loss": 0.4186, "step": 3752 }, { "epoch": 0.10355353059457745, "grad_norm": 0.004547620192170143, "learning_rate": 0.001, "loss": 0.4047, "step": 3753 }, { "epoch": 0.10358112279564181, "grad_norm": 0.0025993280578404665, "learning_rate": 0.001, "loss": 0.4146, "step": 3754 }, { "epoch": 0.10360871499670618, "grad_norm": 0.0020117738749831915, "learning_rate": 0.001, "loss": 0.4048, "step": 3755 }, { "epoch": 0.10363630719777055, "grad_norm": 0.003054060973227024, "learning_rate": 0.001, "loss": 0.4195, "step": 3756 }, { "epoch": 0.10366389939883491, "grad_norm": 0.0028975980821996927, "learning_rate": 0.001, "loss": 0.3869, "step": 3757 }, { "epoch": 0.1036914915998993, "grad_norm": 0.004843092989176512, "learning_rate": 0.001, "loss": 0.3785, "step": 3758 }, { "epoch": 0.10371908380096366, "grad_norm": 0.003735753009095788, "learning_rate": 0.001, "loss": 0.3961, "step": 3759 }, { "epoch": 0.10374667600202803, "grad_norm": 0.0024528366047888994, "learning_rate": 0.001, "loss": 0.4391, "step": 3760 }, { "epoch": 0.10377426820309239, "grad_norm": 0.003306907368823886, "learning_rate": 0.001, "loss": 0.3829, "step": 3761 }, { "epoch": 0.10380186040415676, "grad_norm": 0.0029531391337513924, "learning_rate": 0.001, "loss": 0.3595, "step": 3762 }, { "epoch": 0.10382945260522114, "grad_norm": 0.004337473772466183, "learning_rate": 0.001, "loss": 0.4374, "step": 3763 }, { "epoch": 0.1038570448062855, "grad_norm": 0.0033757942728698254, "learning_rate": 0.001, "loss": 0.386, "step": 3764 }, { "epoch": 0.10388463700734987, "grad_norm": 0.004451198037713766, "learning_rate": 0.001, "loss": 0.4011, "step": 3765 }, { "epoch": 0.10391222920841424, "grad_norm": 0.0029722759500145912, "learning_rate": 0.001, "loss": 0.3771, "step": 3766 }, { "epoch": 0.1039398214094786, "grad_norm": 0.003191061783581972, "learning_rate": 0.001, "loss": 0.4029, "step": 3767 }, { "epoch": 0.10396741361054299, "grad_norm": 0.0037221303209662437, "learning_rate": 0.001, "loss": 0.3659, "step": 3768 }, { "epoch": 0.10399500581160735, "grad_norm": 0.0024105177726596594, "learning_rate": 0.001, "loss": 0.421, "step": 3769 }, { "epoch": 0.10402259801267172, "grad_norm": 0.002493768697604537, "learning_rate": 0.001, "loss": 0.3957, "step": 3770 }, { "epoch": 0.10405019021373609, "grad_norm": 0.0025503532961010933, "learning_rate": 0.001, "loss": 0.3833, "step": 3771 }, { "epoch": 0.10407778241480045, "grad_norm": 0.0032149364706128836, "learning_rate": 0.001, "loss": 0.3904, "step": 3772 }, { "epoch": 0.10410537461586483, "grad_norm": 0.004015072248876095, "learning_rate": 0.001, "loss": 0.4076, "step": 3773 }, { "epoch": 0.1041329668169292, "grad_norm": 0.008095541037619114, "learning_rate": 0.001, "loss": 0.37, "step": 3774 }, { "epoch": 0.10416055901799356, "grad_norm": 0.0025133301969617605, "learning_rate": 0.001, "loss": 0.4088, "step": 3775 }, { "epoch": 0.10418815121905793, "grad_norm": 0.0035531746689230204, "learning_rate": 0.001, "loss": 0.379, "step": 3776 }, { "epoch": 0.1042157434201223, "grad_norm": 0.003656880697235465, "learning_rate": 0.001, "loss": 0.388, "step": 3777 }, { "epoch": 0.10424333562118668, "grad_norm": 0.005002745892852545, "learning_rate": 0.001, "loss": 0.3961, "step": 3778 }, { "epoch": 0.10427092782225104, "grad_norm": 0.0031288950704038143, "learning_rate": 0.001, "loss": 0.4222, "step": 3779 }, { "epoch": 0.10429852002331541, "grad_norm": 0.0030303194653242826, "learning_rate": 0.001, "loss": 0.387, "step": 3780 }, { "epoch": 0.10432611222437978, "grad_norm": 0.004291849210858345, "learning_rate": 0.001, "loss": 0.3933, "step": 3781 }, { "epoch": 0.10435370442544414, "grad_norm": 0.003425056580454111, "learning_rate": 0.001, "loss": 0.3847, "step": 3782 }, { "epoch": 0.10438129662650852, "grad_norm": 0.0025096056051552296, "learning_rate": 0.001, "loss": 0.3992, "step": 3783 }, { "epoch": 0.10440888882757289, "grad_norm": 0.002500693080946803, "learning_rate": 0.001, "loss": 0.403, "step": 3784 }, { "epoch": 0.10443648102863726, "grad_norm": 0.003590058069676161, "learning_rate": 0.001, "loss": 0.3998, "step": 3785 }, { "epoch": 0.10446407322970162, "grad_norm": 0.004308347124606371, "learning_rate": 0.001, "loss": 0.4492, "step": 3786 }, { "epoch": 0.10449166543076599, "grad_norm": 0.002307620132341981, "learning_rate": 0.001, "loss": 0.4146, "step": 3787 }, { "epoch": 0.10451925763183036, "grad_norm": 0.0036616444122046232, "learning_rate": 0.001, "loss": 0.3787, "step": 3788 }, { "epoch": 0.10454684983289474, "grad_norm": 0.0035604690201580524, "learning_rate": 0.001, "loss": 0.3597, "step": 3789 }, { "epoch": 0.1045744420339591, "grad_norm": 0.00397746916860342, "learning_rate": 0.001, "loss": 0.3954, "step": 3790 }, { "epoch": 0.10460203423502347, "grad_norm": 0.009449174627661705, "learning_rate": 0.001, "loss": 0.3521, "step": 3791 }, { "epoch": 0.10462962643608784, "grad_norm": 0.0032818394247442484, "learning_rate": 0.001, "loss": 0.3655, "step": 3792 }, { "epoch": 0.1046572186371522, "grad_norm": 0.0032766228541731834, "learning_rate": 0.001, "loss": 0.4209, "step": 3793 }, { "epoch": 0.10468481083821658, "grad_norm": 0.002585778711363673, "learning_rate": 0.001, "loss": 0.4043, "step": 3794 }, { "epoch": 0.10471240303928095, "grad_norm": 0.0029392503201961517, "learning_rate": 0.001, "loss": 0.4474, "step": 3795 }, { "epoch": 0.10473999524034532, "grad_norm": 0.0027161298785358667, "learning_rate": 0.001, "loss": 0.4161, "step": 3796 }, { "epoch": 0.10476758744140968, "grad_norm": 0.0034348920453339815, "learning_rate": 0.001, "loss": 0.4016, "step": 3797 }, { "epoch": 0.10479517964247405, "grad_norm": 0.007731628604233265, "learning_rate": 0.001, "loss": 0.3592, "step": 3798 }, { "epoch": 0.10482277184353843, "grad_norm": 0.0029055611230432987, "learning_rate": 0.001, "loss": 0.3931, "step": 3799 }, { "epoch": 0.1048503640446028, "grad_norm": 0.0027976164128631353, "learning_rate": 0.001, "loss": 0.388, "step": 3800 }, { "epoch": 0.10487795624566716, "grad_norm": 0.003006401937454939, "learning_rate": 0.001, "loss": 0.3864, "step": 3801 }, { "epoch": 0.10490554844673153, "grad_norm": 0.002237136010080576, "learning_rate": 0.001, "loss": 0.4018, "step": 3802 }, { "epoch": 0.1049331406477959, "grad_norm": 0.003247616346925497, "learning_rate": 0.001, "loss": 0.381, "step": 3803 }, { "epoch": 0.10496073284886027, "grad_norm": 0.002951403148472309, "learning_rate": 0.001, "loss": 0.3899, "step": 3804 }, { "epoch": 0.10498832504992464, "grad_norm": 0.002603907370939851, "learning_rate": 0.001, "loss": 0.4005, "step": 3805 }, { "epoch": 0.10501591725098901, "grad_norm": 0.0022911475971341133, "learning_rate": 0.001, "loss": 0.4007, "step": 3806 }, { "epoch": 0.10504350945205337, "grad_norm": 0.002795920707285404, "learning_rate": 0.001, "loss": 0.3948, "step": 3807 }, { "epoch": 0.10507110165311774, "grad_norm": 0.0031260910909622908, "learning_rate": 0.001, "loss": 0.386, "step": 3808 }, { "epoch": 0.10509869385418212, "grad_norm": 0.003506281180307269, "learning_rate": 0.001, "loss": 0.3794, "step": 3809 }, { "epoch": 0.10512628605524649, "grad_norm": 0.0027451482601463795, "learning_rate": 0.001, "loss": 0.3876, "step": 3810 }, { "epoch": 0.10515387825631085, "grad_norm": 0.0057808831334114075, "learning_rate": 0.001, "loss": 0.4082, "step": 3811 }, { "epoch": 0.10518147045737522, "grad_norm": 0.003006263403221965, "learning_rate": 0.001, "loss": 0.3698, "step": 3812 }, { "epoch": 0.10520906265843959, "grad_norm": 0.004194669425487518, "learning_rate": 0.001, "loss": 0.4142, "step": 3813 }, { "epoch": 0.10523665485950397, "grad_norm": 0.004824482370167971, "learning_rate": 0.001, "loss": 0.4101, "step": 3814 }, { "epoch": 0.10526424706056833, "grad_norm": 0.0029831102583557367, "learning_rate": 0.001, "loss": 0.4164, "step": 3815 }, { "epoch": 0.1052918392616327, "grad_norm": 0.004361843690276146, "learning_rate": 0.001, "loss": 0.3874, "step": 3816 }, { "epoch": 0.10531943146269707, "grad_norm": 0.002336485544219613, "learning_rate": 0.001, "loss": 0.4153, "step": 3817 }, { "epoch": 0.10534702366376143, "grad_norm": 0.0023848165292292833, "learning_rate": 0.001, "loss": 0.4001, "step": 3818 }, { "epoch": 0.10537461586482581, "grad_norm": 0.0029371667187660933, "learning_rate": 0.001, "loss": 0.361, "step": 3819 }, { "epoch": 0.10540220806589018, "grad_norm": 0.003010603366419673, "learning_rate": 0.001, "loss": 0.3866, "step": 3820 }, { "epoch": 0.10542980026695455, "grad_norm": 0.007170096971094608, "learning_rate": 0.001, "loss": 0.4616, "step": 3821 }, { "epoch": 0.10545739246801891, "grad_norm": 0.002445077523589134, "learning_rate": 0.001, "loss": 0.4147, "step": 3822 }, { "epoch": 0.10548498466908328, "grad_norm": 0.0034536407329142094, "learning_rate": 0.001, "loss": 0.4023, "step": 3823 }, { "epoch": 0.10551257687014766, "grad_norm": 0.0021885402966290712, "learning_rate": 0.001, "loss": 0.4053, "step": 3824 }, { "epoch": 0.10554016907121203, "grad_norm": 0.0027080499567091465, "learning_rate": 0.001, "loss": 0.416, "step": 3825 }, { "epoch": 0.10556776127227639, "grad_norm": 0.0036047815810889006, "learning_rate": 0.001, "loss": 0.3697, "step": 3826 }, { "epoch": 0.10559535347334076, "grad_norm": 0.0027917807456105947, "learning_rate": 0.001, "loss": 0.3715, "step": 3827 }, { "epoch": 0.10562294567440512, "grad_norm": 0.0034859776496887207, "learning_rate": 0.001, "loss": 0.3811, "step": 3828 }, { "epoch": 0.1056505378754695, "grad_norm": 0.0031901709735393524, "learning_rate": 0.001, "loss": 0.4007, "step": 3829 }, { "epoch": 0.10567813007653387, "grad_norm": 0.004165272694081068, "learning_rate": 0.001, "loss": 0.3996, "step": 3830 }, { "epoch": 0.10570572227759824, "grad_norm": 0.0031863113399595022, "learning_rate": 0.001, "loss": 0.3956, "step": 3831 }, { "epoch": 0.1057333144786626, "grad_norm": 0.0035512226168066263, "learning_rate": 0.001, "loss": 0.3881, "step": 3832 }, { "epoch": 0.10576090667972697, "grad_norm": 0.0030755288898944855, "learning_rate": 0.001, "loss": 0.3944, "step": 3833 }, { "epoch": 0.10578849888079135, "grad_norm": 0.0031162879895418882, "learning_rate": 0.001, "loss": 0.4025, "step": 3834 }, { "epoch": 0.10581609108185572, "grad_norm": 0.0038108036387711763, "learning_rate": 0.001, "loss": 0.4228, "step": 3835 }, { "epoch": 0.10584368328292008, "grad_norm": 0.0033550935331732035, "learning_rate": 0.001, "loss": 0.3907, "step": 3836 }, { "epoch": 0.10587127548398445, "grad_norm": 0.0031523280777037144, "learning_rate": 0.001, "loss": 0.4011, "step": 3837 }, { "epoch": 0.10589886768504882, "grad_norm": 0.0038961879909038544, "learning_rate": 0.001, "loss": 0.395, "step": 3838 }, { "epoch": 0.10592645988611318, "grad_norm": 0.005496688652783632, "learning_rate": 0.001, "loss": 0.3864, "step": 3839 }, { "epoch": 0.10595405208717756, "grad_norm": 0.0032198880799114704, "learning_rate": 0.001, "loss": 0.3935, "step": 3840 }, { "epoch": 0.10598164428824193, "grad_norm": 0.003234037896618247, "learning_rate": 0.001, "loss": 0.4042, "step": 3841 }, { "epoch": 0.1060092364893063, "grad_norm": 0.002870423486456275, "learning_rate": 0.001, "loss": 0.3869, "step": 3842 }, { "epoch": 0.10603682869037066, "grad_norm": 0.004519653040915728, "learning_rate": 0.001, "loss": 0.4125, "step": 3843 }, { "epoch": 0.10606442089143503, "grad_norm": 0.003621830837801099, "learning_rate": 0.001, "loss": 0.3895, "step": 3844 }, { "epoch": 0.10609201309249941, "grad_norm": 0.0029260909650474787, "learning_rate": 0.001, "loss": 0.4112, "step": 3845 }, { "epoch": 0.10611960529356378, "grad_norm": 0.0031509913969784975, "learning_rate": 0.001, "loss": 0.3623, "step": 3846 }, { "epoch": 0.10614719749462814, "grad_norm": 0.006669745780527592, "learning_rate": 0.001, "loss": 0.4215, "step": 3847 }, { "epoch": 0.10617478969569251, "grad_norm": 0.003406877163797617, "learning_rate": 0.001, "loss": 0.4188, "step": 3848 }, { "epoch": 0.10620238189675688, "grad_norm": 0.008724176324903965, "learning_rate": 0.001, "loss": 0.4219, "step": 3849 }, { "epoch": 0.10622997409782126, "grad_norm": 0.0026642687153071165, "learning_rate": 0.001, "loss": 0.4441, "step": 3850 }, { "epoch": 0.10625756629888562, "grad_norm": 0.003902031574398279, "learning_rate": 0.001, "loss": 0.4033, "step": 3851 }, { "epoch": 0.10628515849994999, "grad_norm": 0.0034857727587223053, "learning_rate": 0.001, "loss": 0.3652, "step": 3852 }, { "epoch": 0.10631275070101436, "grad_norm": 0.0022453153505921364, "learning_rate": 0.001, "loss": 0.4164, "step": 3853 }, { "epoch": 0.10634034290207872, "grad_norm": 0.002694975584745407, "learning_rate": 0.001, "loss": 0.4143, "step": 3854 }, { "epoch": 0.1063679351031431, "grad_norm": 0.005093062296509743, "learning_rate": 0.001, "loss": 0.4107, "step": 3855 }, { "epoch": 0.10639552730420747, "grad_norm": 0.004576206207275391, "learning_rate": 0.001, "loss": 0.4017, "step": 3856 }, { "epoch": 0.10642311950527183, "grad_norm": 0.0031380197033286095, "learning_rate": 0.001, "loss": 0.3979, "step": 3857 }, { "epoch": 0.1064507117063362, "grad_norm": 0.003493053140118718, "learning_rate": 0.001, "loss": 0.3976, "step": 3858 }, { "epoch": 0.10647830390740057, "grad_norm": 0.0024905947502702475, "learning_rate": 0.001, "loss": 0.4554, "step": 3859 }, { "epoch": 0.10650589610846495, "grad_norm": 0.00544704170897603, "learning_rate": 0.001, "loss": 0.3989, "step": 3860 }, { "epoch": 0.10653348830952931, "grad_norm": 0.0029771511908620596, "learning_rate": 0.001, "loss": 0.3882, "step": 3861 }, { "epoch": 0.10656108051059368, "grad_norm": 0.0027052282821387053, "learning_rate": 0.001, "loss": 0.3955, "step": 3862 }, { "epoch": 0.10658867271165805, "grad_norm": 0.003082839772105217, "learning_rate": 0.001, "loss": 0.3908, "step": 3863 }, { "epoch": 0.10661626491272241, "grad_norm": 0.003736154641956091, "learning_rate": 0.001, "loss": 0.3857, "step": 3864 }, { "epoch": 0.1066438571137868, "grad_norm": 0.0027385384310036898, "learning_rate": 0.001, "loss": 0.4051, "step": 3865 }, { "epoch": 0.10667144931485116, "grad_norm": 0.002778218826279044, "learning_rate": 0.001, "loss": 0.4041, "step": 3866 }, { "epoch": 0.10669904151591553, "grad_norm": 0.01646745204925537, "learning_rate": 0.001, "loss": 0.3694, "step": 3867 }, { "epoch": 0.1067266337169799, "grad_norm": 0.0036807360593229532, "learning_rate": 0.001, "loss": 0.3961, "step": 3868 }, { "epoch": 0.10675422591804426, "grad_norm": 0.006385852582752705, "learning_rate": 0.001, "loss": 0.3896, "step": 3869 }, { "epoch": 0.10678181811910864, "grad_norm": 0.0027478185947984457, "learning_rate": 0.001, "loss": 0.4045, "step": 3870 }, { "epoch": 0.106809410320173, "grad_norm": 0.00484267994761467, "learning_rate": 0.001, "loss": 0.4049, "step": 3871 }, { "epoch": 0.10683700252123737, "grad_norm": 0.0029064714908599854, "learning_rate": 0.001, "loss": 0.3901, "step": 3872 }, { "epoch": 0.10686459472230174, "grad_norm": 0.004293619655072689, "learning_rate": 0.001, "loss": 0.4189, "step": 3873 }, { "epoch": 0.1068921869233661, "grad_norm": 0.009066743776202202, "learning_rate": 0.001, "loss": 0.4004, "step": 3874 }, { "epoch": 0.10691977912443049, "grad_norm": 0.004455687943845987, "learning_rate": 0.001, "loss": 0.3816, "step": 3875 }, { "epoch": 0.10694737132549485, "grad_norm": 0.008621391840279102, "learning_rate": 0.001, "loss": 0.3958, "step": 3876 }, { "epoch": 0.10697496352655922, "grad_norm": 0.0032543812412768602, "learning_rate": 0.001, "loss": 0.394, "step": 3877 }, { "epoch": 0.10700255572762359, "grad_norm": 0.003056267276406288, "learning_rate": 0.001, "loss": 0.4099, "step": 3878 }, { "epoch": 0.10703014792868795, "grad_norm": 0.003457016311585903, "learning_rate": 0.001, "loss": 0.3706, "step": 3879 }, { "epoch": 0.10705774012975233, "grad_norm": 0.0029117148369550705, "learning_rate": 0.001, "loss": 0.3916, "step": 3880 }, { "epoch": 0.1070853323308167, "grad_norm": 0.003112402046099305, "learning_rate": 0.001, "loss": 0.394, "step": 3881 }, { "epoch": 0.10711292453188107, "grad_norm": 0.003459386760368943, "learning_rate": 0.001, "loss": 0.382, "step": 3882 }, { "epoch": 0.10714051673294543, "grad_norm": 0.007629405707120895, "learning_rate": 0.001, "loss": 0.3746, "step": 3883 }, { "epoch": 0.1071681089340098, "grad_norm": 0.007017344702035189, "learning_rate": 0.001, "loss": 0.3503, "step": 3884 }, { "epoch": 0.10719570113507416, "grad_norm": 0.006143512669950724, "learning_rate": 0.001, "loss": 0.4128, "step": 3885 }, { "epoch": 0.10722329333613854, "grad_norm": 0.007819131016731262, "learning_rate": 0.001, "loss": 0.4314, "step": 3886 }, { "epoch": 0.10725088553720291, "grad_norm": 0.0033061886206269264, "learning_rate": 0.001, "loss": 0.44, "step": 3887 }, { "epoch": 0.10727847773826728, "grad_norm": 0.0026390962302684784, "learning_rate": 0.001, "loss": 0.3773, "step": 3888 }, { "epoch": 0.10730606993933164, "grad_norm": 0.003349416656419635, "learning_rate": 0.001, "loss": 0.4298, "step": 3889 }, { "epoch": 0.10733366214039601, "grad_norm": 0.004052475094795227, "learning_rate": 0.001, "loss": 0.3929, "step": 3890 }, { "epoch": 0.10736125434146039, "grad_norm": 0.008710183203220367, "learning_rate": 0.001, "loss": 0.3899, "step": 3891 }, { "epoch": 0.10738884654252476, "grad_norm": 0.0028958211187273264, "learning_rate": 0.001, "loss": 0.4112, "step": 3892 }, { "epoch": 0.10741643874358912, "grad_norm": 0.002961238846182823, "learning_rate": 0.001, "loss": 0.3816, "step": 3893 }, { "epoch": 0.10744403094465349, "grad_norm": 0.0025948896072804928, "learning_rate": 0.001, "loss": 0.4133, "step": 3894 }, { "epoch": 0.10747162314571786, "grad_norm": 0.0037905005738139153, "learning_rate": 0.001, "loss": 0.3927, "step": 3895 }, { "epoch": 0.10749921534678224, "grad_norm": 0.003213467774912715, "learning_rate": 0.001, "loss": 0.4159, "step": 3896 }, { "epoch": 0.1075268075478466, "grad_norm": 0.002972138812765479, "learning_rate": 0.001, "loss": 0.4206, "step": 3897 }, { "epoch": 0.10755439974891097, "grad_norm": 0.0030399637762457132, "learning_rate": 0.001, "loss": 0.4082, "step": 3898 }, { "epoch": 0.10758199194997534, "grad_norm": 0.002291029319167137, "learning_rate": 0.001, "loss": 0.382, "step": 3899 }, { "epoch": 0.1076095841510397, "grad_norm": 0.002236071042716503, "learning_rate": 0.001, "loss": 0.4434, "step": 3900 }, { "epoch": 0.10763717635210408, "grad_norm": 0.0024943517055362463, "learning_rate": 0.001, "loss": 0.3686, "step": 3901 }, { "epoch": 0.10766476855316845, "grad_norm": 0.002602664055302739, "learning_rate": 0.001, "loss": 0.4001, "step": 3902 }, { "epoch": 0.10769236075423282, "grad_norm": 0.0031983822118490934, "learning_rate": 0.001, "loss": 0.4126, "step": 3903 }, { "epoch": 0.10771995295529718, "grad_norm": 0.0027117652352899313, "learning_rate": 0.001, "loss": 0.4063, "step": 3904 }, { "epoch": 0.10774754515636155, "grad_norm": 0.002372644143179059, "learning_rate": 0.001, "loss": 0.3827, "step": 3905 }, { "epoch": 0.10777513735742593, "grad_norm": 0.0038946103304624557, "learning_rate": 0.001, "loss": 0.3759, "step": 3906 }, { "epoch": 0.1078027295584903, "grad_norm": 0.00335517106577754, "learning_rate": 0.001, "loss": 0.4195, "step": 3907 }, { "epoch": 0.10783032175955466, "grad_norm": 0.015401921235024929, "learning_rate": 0.001, "loss": 0.4003, "step": 3908 }, { "epoch": 0.10785791396061903, "grad_norm": 0.003028671722859144, "learning_rate": 0.001, "loss": 0.4093, "step": 3909 }, { "epoch": 0.1078855061616834, "grad_norm": 0.003672859398648143, "learning_rate": 0.001, "loss": 0.3891, "step": 3910 }, { "epoch": 0.10791309836274778, "grad_norm": 0.0038496828638017178, "learning_rate": 0.001, "loss": 0.429, "step": 3911 }, { "epoch": 0.10794069056381214, "grad_norm": 0.0068849422968924046, "learning_rate": 0.001, "loss": 0.4226, "step": 3912 }, { "epoch": 0.10796828276487651, "grad_norm": 0.0074515510350465775, "learning_rate": 0.001, "loss": 0.3765, "step": 3913 }, { "epoch": 0.10799587496594087, "grad_norm": 0.0070841130800545216, "learning_rate": 0.001, "loss": 0.4285, "step": 3914 }, { "epoch": 0.10802346716700524, "grad_norm": 0.0054721771739423275, "learning_rate": 0.001, "loss": 0.3939, "step": 3915 }, { "epoch": 0.10805105936806962, "grad_norm": 0.004237642977386713, "learning_rate": 0.001, "loss": 0.4349, "step": 3916 }, { "epoch": 0.10807865156913399, "grad_norm": 0.004252060316503048, "learning_rate": 0.001, "loss": 0.3891, "step": 3917 }, { "epoch": 0.10810624377019835, "grad_norm": 0.0034951562993228436, "learning_rate": 0.001, "loss": 0.3818, "step": 3918 }, { "epoch": 0.10813383597126272, "grad_norm": 0.004239407833665609, "learning_rate": 0.001, "loss": 0.4368, "step": 3919 }, { "epoch": 0.10816142817232709, "grad_norm": 0.0040299855172634125, "learning_rate": 0.001, "loss": 0.36, "step": 3920 }, { "epoch": 0.10818902037339147, "grad_norm": 0.0028297097887843847, "learning_rate": 0.001, "loss": 0.4012, "step": 3921 }, { "epoch": 0.10821661257445583, "grad_norm": 0.0039049547631293535, "learning_rate": 0.001, "loss": 0.4299, "step": 3922 }, { "epoch": 0.1082442047755202, "grad_norm": 0.0031370699871331453, "learning_rate": 0.001, "loss": 0.4042, "step": 3923 }, { "epoch": 0.10827179697658457, "grad_norm": 0.004705764818936586, "learning_rate": 0.001, "loss": 0.3727, "step": 3924 }, { "epoch": 0.10829938917764893, "grad_norm": 0.0024270943831652403, "learning_rate": 0.001, "loss": 0.3855, "step": 3925 }, { "epoch": 0.10832698137871331, "grad_norm": 0.003461951157078147, "learning_rate": 0.001, "loss": 0.4069, "step": 3926 }, { "epoch": 0.10835457357977768, "grad_norm": 0.008057190105319023, "learning_rate": 0.001, "loss": 0.3887, "step": 3927 }, { "epoch": 0.10838216578084205, "grad_norm": 0.0031221345998346806, "learning_rate": 0.001, "loss": 0.3631, "step": 3928 }, { "epoch": 0.10840975798190641, "grad_norm": 0.0029621014837175608, "learning_rate": 0.001, "loss": 0.3889, "step": 3929 }, { "epoch": 0.10843735018297078, "grad_norm": 0.002558749634772539, "learning_rate": 0.001, "loss": 0.3698, "step": 3930 }, { "epoch": 0.10846494238403515, "grad_norm": 0.002620436018332839, "learning_rate": 0.001, "loss": 0.4195, "step": 3931 }, { "epoch": 0.10849253458509953, "grad_norm": 0.0027069852221757174, "learning_rate": 0.001, "loss": 0.4048, "step": 3932 }, { "epoch": 0.10852012678616389, "grad_norm": 0.003972397185862064, "learning_rate": 0.001, "loss": 0.3961, "step": 3933 }, { "epoch": 0.10854771898722826, "grad_norm": 0.0021818478126078844, "learning_rate": 0.001, "loss": 0.4113, "step": 3934 }, { "epoch": 0.10857531118829263, "grad_norm": 0.0026342314667999744, "learning_rate": 0.001, "loss": 0.409, "step": 3935 }, { "epoch": 0.10860290338935699, "grad_norm": 0.002321448177099228, "learning_rate": 0.001, "loss": 0.4049, "step": 3936 }, { "epoch": 0.10863049559042137, "grad_norm": 0.002941095968708396, "learning_rate": 0.001, "loss": 0.3933, "step": 3937 }, { "epoch": 0.10865808779148574, "grad_norm": 0.0027615423314273357, "learning_rate": 0.001, "loss": 0.393, "step": 3938 }, { "epoch": 0.1086856799925501, "grad_norm": 0.0035208980552852154, "learning_rate": 0.001, "loss": 0.4064, "step": 3939 }, { "epoch": 0.10871327219361447, "grad_norm": 0.0035143953282386065, "learning_rate": 0.001, "loss": 0.3768, "step": 3940 }, { "epoch": 0.10874086439467884, "grad_norm": 0.003013983368873596, "learning_rate": 0.001, "loss": 0.3959, "step": 3941 }, { "epoch": 0.10876845659574322, "grad_norm": 0.003047554986551404, "learning_rate": 0.001, "loss": 0.4309, "step": 3942 }, { "epoch": 0.10879604879680758, "grad_norm": 0.0038127705920487642, "learning_rate": 0.001, "loss": 0.3845, "step": 3943 }, { "epoch": 0.10882364099787195, "grad_norm": 0.003960581962019205, "learning_rate": 0.001, "loss": 0.3942, "step": 3944 }, { "epoch": 0.10885123319893632, "grad_norm": 0.0032766389194875956, "learning_rate": 0.001, "loss": 0.4133, "step": 3945 }, { "epoch": 0.10887882540000068, "grad_norm": 0.005258220247924328, "learning_rate": 0.001, "loss": 0.3556, "step": 3946 }, { "epoch": 0.10890641760106506, "grad_norm": 0.0038592154160141945, "learning_rate": 0.001, "loss": 0.3538, "step": 3947 }, { "epoch": 0.10893400980212943, "grad_norm": 0.004304266069084406, "learning_rate": 0.001, "loss": 0.3872, "step": 3948 }, { "epoch": 0.1089616020031938, "grad_norm": 0.0032214997336268425, "learning_rate": 0.001, "loss": 0.378, "step": 3949 }, { "epoch": 0.10898919420425816, "grad_norm": 0.0059113227762281895, "learning_rate": 0.001, "loss": 0.3923, "step": 3950 }, { "epoch": 0.10901678640532253, "grad_norm": 0.0029449171852320433, "learning_rate": 0.001, "loss": 0.393, "step": 3951 }, { "epoch": 0.10904437860638691, "grad_norm": 0.0030160732567310333, "learning_rate": 0.001, "loss": 0.3834, "step": 3952 }, { "epoch": 0.10907197080745128, "grad_norm": 0.012647976167500019, "learning_rate": 0.001, "loss": 0.3665, "step": 3953 }, { "epoch": 0.10909956300851564, "grad_norm": 0.0026820586062967777, "learning_rate": 0.001, "loss": 0.4019, "step": 3954 }, { "epoch": 0.10912715520958001, "grad_norm": 0.006842675618827343, "learning_rate": 0.001, "loss": 0.4137, "step": 3955 }, { "epoch": 0.10915474741064438, "grad_norm": 0.0030738625209778547, "learning_rate": 0.001, "loss": 0.3864, "step": 3956 }, { "epoch": 0.10918233961170876, "grad_norm": 0.0023346368689090014, "learning_rate": 0.001, "loss": 0.3768, "step": 3957 }, { "epoch": 0.10920993181277312, "grad_norm": 0.005200542975217104, "learning_rate": 0.001, "loss": 0.3889, "step": 3958 }, { "epoch": 0.10923752401383749, "grad_norm": 0.0057869781740009785, "learning_rate": 0.001, "loss": 0.3675, "step": 3959 }, { "epoch": 0.10926511621490186, "grad_norm": 0.0025842657778412104, "learning_rate": 0.001, "loss": 0.3964, "step": 3960 }, { "epoch": 0.10929270841596622, "grad_norm": 0.002922437386587262, "learning_rate": 0.001, "loss": 0.4117, "step": 3961 }, { "epoch": 0.1093203006170306, "grad_norm": 0.0038535622879862785, "learning_rate": 0.001, "loss": 0.4104, "step": 3962 }, { "epoch": 0.10934789281809497, "grad_norm": 0.0034645821433514357, "learning_rate": 0.001, "loss": 0.364, "step": 3963 }, { "epoch": 0.10937548501915934, "grad_norm": 0.0022498066537082195, "learning_rate": 0.001, "loss": 0.4021, "step": 3964 }, { "epoch": 0.1094030772202237, "grad_norm": 0.0035552133340388536, "learning_rate": 0.001, "loss": 0.4002, "step": 3965 }, { "epoch": 0.10943066942128807, "grad_norm": 0.0029964253772050142, "learning_rate": 0.001, "loss": 0.4, "step": 3966 }, { "epoch": 0.10945826162235245, "grad_norm": 0.002939821919426322, "learning_rate": 0.001, "loss": 0.3842, "step": 3967 }, { "epoch": 0.10948585382341681, "grad_norm": 0.0021805204451084137, "learning_rate": 0.001, "loss": 0.4022, "step": 3968 }, { "epoch": 0.10951344602448118, "grad_norm": 0.010238613933324814, "learning_rate": 0.001, "loss": 0.3945, "step": 3969 }, { "epoch": 0.10954103822554555, "grad_norm": 0.00320064858533442, "learning_rate": 0.001, "loss": 0.4245, "step": 3970 }, { "epoch": 0.10956863042660991, "grad_norm": 0.0038515296764671803, "learning_rate": 0.001, "loss": 0.3847, "step": 3971 }, { "epoch": 0.1095962226276743, "grad_norm": 0.0033414731733500957, "learning_rate": 0.001, "loss": 0.3992, "step": 3972 }, { "epoch": 0.10962381482873866, "grad_norm": 0.002315077930688858, "learning_rate": 0.001, "loss": 0.4104, "step": 3973 }, { "epoch": 0.10965140702980303, "grad_norm": 0.0025413348339498043, "learning_rate": 0.001, "loss": 0.4289, "step": 3974 }, { "epoch": 0.1096789992308674, "grad_norm": 0.010324854403734207, "learning_rate": 0.001, "loss": 0.3901, "step": 3975 }, { "epoch": 0.10970659143193176, "grad_norm": 0.009162775240838528, "learning_rate": 0.001, "loss": 0.3952, "step": 3976 }, { "epoch": 0.10973418363299613, "grad_norm": 0.00687329052016139, "learning_rate": 0.001, "loss": 0.4187, "step": 3977 }, { "epoch": 0.10976177583406051, "grad_norm": 0.009686012752354145, "learning_rate": 0.001, "loss": 0.3732, "step": 3978 }, { "epoch": 0.10978936803512487, "grad_norm": 0.003329911269247532, "learning_rate": 0.001, "loss": 0.3573, "step": 3979 }, { "epoch": 0.10981696023618924, "grad_norm": 0.003336430061608553, "learning_rate": 0.001, "loss": 0.4589, "step": 3980 }, { "epoch": 0.1098445524372536, "grad_norm": 0.0021596092265099287, "learning_rate": 0.001, "loss": 0.4357, "step": 3981 }, { "epoch": 0.10987214463831797, "grad_norm": 0.003178425133228302, "learning_rate": 0.001, "loss": 0.4034, "step": 3982 }, { "epoch": 0.10989973683938235, "grad_norm": 0.0019768651109188795, "learning_rate": 0.001, "loss": 0.3811, "step": 3983 }, { "epoch": 0.10992732904044672, "grad_norm": 0.0026141603011637926, "learning_rate": 0.001, "loss": 0.4071, "step": 3984 }, { "epoch": 0.10995492124151109, "grad_norm": 0.00246097962372005, "learning_rate": 0.001, "loss": 0.3946, "step": 3985 }, { "epoch": 0.10998251344257545, "grad_norm": 0.002583438763394952, "learning_rate": 0.001, "loss": 0.377, "step": 3986 }, { "epoch": 0.11001010564363982, "grad_norm": 0.003509828122332692, "learning_rate": 0.001, "loss": 0.437, "step": 3987 }, { "epoch": 0.1100376978447042, "grad_norm": 0.002548534655943513, "learning_rate": 0.001, "loss": 0.3785, "step": 3988 }, { "epoch": 0.11006529004576857, "grad_norm": 0.002522020833566785, "learning_rate": 0.001, "loss": 0.379, "step": 3989 }, { "epoch": 0.11009288224683293, "grad_norm": 0.0027163205668330193, "learning_rate": 0.001, "loss": 0.3933, "step": 3990 }, { "epoch": 0.1101204744478973, "grad_norm": 0.0020943363197147846, "learning_rate": 0.001, "loss": 0.3713, "step": 3991 }, { "epoch": 0.11014806664896166, "grad_norm": 0.002914323704317212, "learning_rate": 0.001, "loss": 0.4143, "step": 3992 }, { "epoch": 0.11017565885002605, "grad_norm": 0.0023269762750715017, "learning_rate": 0.001, "loss": 0.4048, "step": 3993 }, { "epoch": 0.11020325105109041, "grad_norm": 0.002514853375032544, "learning_rate": 0.001, "loss": 0.4124, "step": 3994 }, { "epoch": 0.11023084325215478, "grad_norm": 0.002852360252290964, "learning_rate": 0.001, "loss": 0.3768, "step": 3995 }, { "epoch": 0.11025843545321914, "grad_norm": 0.00278305122628808, "learning_rate": 0.001, "loss": 0.399, "step": 3996 }, { "epoch": 0.11028602765428351, "grad_norm": 0.0031666734721511602, "learning_rate": 0.001, "loss": 0.3916, "step": 3997 }, { "epoch": 0.11031361985534789, "grad_norm": 0.006654566153883934, "learning_rate": 0.001, "loss": 0.4034, "step": 3998 }, { "epoch": 0.11034121205641226, "grad_norm": 0.002697068266570568, "learning_rate": 0.001, "loss": 0.4105, "step": 3999 }, { "epoch": 0.11036880425747662, "grad_norm": 0.003131921635940671, "learning_rate": 0.001, "loss": 0.4092, "step": 4000 }, { "epoch": 0.11036880425747662, "eval_runtime": 24.9249, "eval_samples_per_second": 1.284, "eval_steps_per_second": 0.16, "step": 4000 }, { "epoch": 0.11039639645854099, "grad_norm": 0.009388426318764687, "learning_rate": 0.001, "loss": 0.3925, "step": 4001 }, { "epoch": 0.11042398865960536, "grad_norm": 0.002704363316297531, "learning_rate": 0.001, "loss": 0.3666, "step": 4002 }, { "epoch": 0.11045158086066974, "grad_norm": 0.0023160416167229414, "learning_rate": 0.001, "loss": 0.4108, "step": 4003 }, { "epoch": 0.1104791730617341, "grad_norm": 0.0025515344459563494, "learning_rate": 0.001, "loss": 0.4213, "step": 4004 }, { "epoch": 0.11050676526279847, "grad_norm": 0.0036002611741423607, "learning_rate": 0.001, "loss": 0.4029, "step": 4005 }, { "epoch": 0.11053435746386284, "grad_norm": 0.003040984272956848, "learning_rate": 0.001, "loss": 0.3941, "step": 4006 }, { "epoch": 0.1105619496649272, "grad_norm": 0.00276200077496469, "learning_rate": 0.001, "loss": 0.3724, "step": 4007 }, { "epoch": 0.11058954186599158, "grad_norm": 0.0028444197960197926, "learning_rate": 0.001, "loss": 0.3991, "step": 4008 }, { "epoch": 0.11061713406705595, "grad_norm": 0.0025242117699235678, "learning_rate": 0.001, "loss": 0.416, "step": 4009 }, { "epoch": 0.11064472626812032, "grad_norm": 0.004357707686722279, "learning_rate": 0.001, "loss": 0.3811, "step": 4010 }, { "epoch": 0.11067231846918468, "grad_norm": 0.01803600788116455, "learning_rate": 0.001, "loss": 0.4133, "step": 4011 }, { "epoch": 0.11069991067024905, "grad_norm": 0.0028698742389678955, "learning_rate": 0.001, "loss": 0.4018, "step": 4012 }, { "epoch": 0.11072750287131343, "grad_norm": 0.0033235198352485895, "learning_rate": 0.001, "loss": 0.4138, "step": 4013 }, { "epoch": 0.1107550950723778, "grad_norm": 0.0038872750010341406, "learning_rate": 0.001, "loss": 0.4174, "step": 4014 }, { "epoch": 0.11078268727344216, "grad_norm": 0.002596453996375203, "learning_rate": 0.001, "loss": 0.4487, "step": 4015 }, { "epoch": 0.11081027947450653, "grad_norm": 0.004450778476893902, "learning_rate": 0.001, "loss": 0.406, "step": 4016 }, { "epoch": 0.1108378716755709, "grad_norm": 0.0031362990848720074, "learning_rate": 0.001, "loss": 0.405, "step": 4017 }, { "epoch": 0.11086546387663528, "grad_norm": 0.0024727729614824057, "learning_rate": 0.001, "loss": 0.4156, "step": 4018 }, { "epoch": 0.11089305607769964, "grad_norm": 0.003262293990701437, "learning_rate": 0.001, "loss": 0.3506, "step": 4019 }, { "epoch": 0.11092064827876401, "grad_norm": 0.002469596453011036, "learning_rate": 0.001, "loss": 0.4224, "step": 4020 }, { "epoch": 0.11094824047982838, "grad_norm": 0.0029696666169911623, "learning_rate": 0.001, "loss": 0.4081, "step": 4021 }, { "epoch": 0.11097583268089274, "grad_norm": 0.0031070455443114042, "learning_rate": 0.001, "loss": 0.3808, "step": 4022 }, { "epoch": 0.11100342488195711, "grad_norm": 0.002378135221078992, "learning_rate": 0.001, "loss": 0.3993, "step": 4023 }, { "epoch": 0.11103101708302149, "grad_norm": 0.0020418402273207903, "learning_rate": 0.001, "loss": 0.3987, "step": 4024 }, { "epoch": 0.11105860928408585, "grad_norm": 0.00241975043900311, "learning_rate": 0.001, "loss": 0.3835, "step": 4025 }, { "epoch": 0.11108620148515022, "grad_norm": 0.0036054837983101606, "learning_rate": 0.001, "loss": 0.375, "step": 4026 }, { "epoch": 0.11111379368621459, "grad_norm": 0.002273577032610774, "learning_rate": 0.001, "loss": 0.4122, "step": 4027 }, { "epoch": 0.11114138588727895, "grad_norm": 0.008908730000257492, "learning_rate": 0.001, "loss": 0.4113, "step": 4028 }, { "epoch": 0.11116897808834333, "grad_norm": 0.0027021903079003096, "learning_rate": 0.001, "loss": 0.3773, "step": 4029 }, { "epoch": 0.1111965702894077, "grad_norm": 0.002809008816257119, "learning_rate": 0.001, "loss": 0.3948, "step": 4030 }, { "epoch": 0.11122416249047207, "grad_norm": 0.004108759108930826, "learning_rate": 0.001, "loss": 0.3934, "step": 4031 }, { "epoch": 0.11125175469153643, "grad_norm": 0.004110720008611679, "learning_rate": 0.001, "loss": 0.3871, "step": 4032 }, { "epoch": 0.1112793468926008, "grad_norm": 0.003450944786891341, "learning_rate": 0.001, "loss": 0.4001, "step": 4033 }, { "epoch": 0.11130693909366518, "grad_norm": 0.0027327281422913074, "learning_rate": 0.001, "loss": 0.42, "step": 4034 }, { "epoch": 0.11133453129472955, "grad_norm": 0.003815416479483247, "learning_rate": 0.001, "loss": 0.4287, "step": 4035 }, { "epoch": 0.11136212349579391, "grad_norm": 0.0020829702261835337, "learning_rate": 0.001, "loss": 0.3838, "step": 4036 }, { "epoch": 0.11138971569685828, "grad_norm": 0.0024780267849564552, "learning_rate": 0.001, "loss": 0.4031, "step": 4037 }, { "epoch": 0.11141730789792265, "grad_norm": 0.004010303877294064, "learning_rate": 0.001, "loss": 0.4179, "step": 4038 }, { "epoch": 0.11144490009898703, "grad_norm": 0.0030929851345717907, "learning_rate": 0.001, "loss": 0.3852, "step": 4039 }, { "epoch": 0.11147249230005139, "grad_norm": 0.0041397190652787685, "learning_rate": 0.001, "loss": 0.3881, "step": 4040 }, { "epoch": 0.11150008450111576, "grad_norm": 0.0039605977945029736, "learning_rate": 0.001, "loss": 0.3877, "step": 4041 }, { "epoch": 0.11152767670218013, "grad_norm": 0.003413001075387001, "learning_rate": 0.001, "loss": 0.3726, "step": 4042 }, { "epoch": 0.11155526890324449, "grad_norm": 0.01140381395816803, "learning_rate": 0.001, "loss": 0.3825, "step": 4043 }, { "epoch": 0.11158286110430887, "grad_norm": 0.005510971415787935, "learning_rate": 0.001, "loss": 0.4163, "step": 4044 }, { "epoch": 0.11161045330537324, "grad_norm": 0.007506036199629307, "learning_rate": 0.001, "loss": 0.4018, "step": 4045 }, { "epoch": 0.1116380455064376, "grad_norm": 0.007963884621858597, "learning_rate": 0.001, "loss": 0.3802, "step": 4046 }, { "epoch": 0.11166563770750197, "grad_norm": 0.007050946820527315, "learning_rate": 0.001, "loss": 0.4122, "step": 4047 }, { "epoch": 0.11169322990856634, "grad_norm": 0.00468503637239337, "learning_rate": 0.001, "loss": 0.3957, "step": 4048 }, { "epoch": 0.11172082210963072, "grad_norm": 0.003186695510521531, "learning_rate": 0.001, "loss": 0.4234, "step": 4049 }, { "epoch": 0.11174841431069509, "grad_norm": 0.03650260344147682, "learning_rate": 0.001, "loss": 0.3924, "step": 4050 }, { "epoch": 0.11177600651175945, "grad_norm": 0.006556427571922541, "learning_rate": 0.001, "loss": 0.3908, "step": 4051 }, { "epoch": 0.11180359871282382, "grad_norm": 0.003500849474221468, "learning_rate": 0.001, "loss": 0.3871, "step": 4052 }, { "epoch": 0.11183119091388818, "grad_norm": 0.005811590701341629, "learning_rate": 0.001, "loss": 0.4169, "step": 4053 }, { "epoch": 0.11185878311495256, "grad_norm": 0.004504525102674961, "learning_rate": 0.001, "loss": 0.4123, "step": 4054 }, { "epoch": 0.11188637531601693, "grad_norm": 0.00391266867518425, "learning_rate": 0.001, "loss": 0.3716, "step": 4055 }, { "epoch": 0.1119139675170813, "grad_norm": 0.002217537024989724, "learning_rate": 0.001, "loss": 0.4385, "step": 4056 }, { "epoch": 0.11194155971814566, "grad_norm": 0.0025836548302322626, "learning_rate": 0.001, "loss": 0.3797, "step": 4057 }, { "epoch": 0.11196915191921003, "grad_norm": 0.0025196904316544533, "learning_rate": 0.001, "loss": 0.3861, "step": 4058 }, { "epoch": 0.11199674412027441, "grad_norm": 0.0020293437410146, "learning_rate": 0.001, "loss": 0.4053, "step": 4059 }, { "epoch": 0.11202433632133878, "grad_norm": 0.0022750168573111296, "learning_rate": 0.001, "loss": 0.4117, "step": 4060 }, { "epoch": 0.11205192852240314, "grad_norm": 0.0026435197796672583, "learning_rate": 0.001, "loss": 0.4171, "step": 4061 }, { "epoch": 0.11207952072346751, "grad_norm": 0.0026955234352499247, "learning_rate": 0.001, "loss": 0.4178, "step": 4062 }, { "epoch": 0.11210711292453188, "grad_norm": 0.0022352158557623625, "learning_rate": 0.001, "loss": 0.3989, "step": 4063 }, { "epoch": 0.11213470512559626, "grad_norm": 0.00261326739564538, "learning_rate": 0.001, "loss": 0.431, "step": 4064 }, { "epoch": 0.11216229732666062, "grad_norm": 0.0022083420772105455, "learning_rate": 0.001, "loss": 0.3838, "step": 4065 }, { "epoch": 0.11218988952772499, "grad_norm": 0.002633447526022792, "learning_rate": 0.001, "loss": 0.406, "step": 4066 }, { "epoch": 0.11221748172878936, "grad_norm": 0.00373605964705348, "learning_rate": 0.001, "loss": 0.4227, "step": 4067 }, { "epoch": 0.11224507392985372, "grad_norm": 0.00297834281809628, "learning_rate": 0.001, "loss": 0.3889, "step": 4068 }, { "epoch": 0.1122726661309181, "grad_norm": 0.015468292869627476, "learning_rate": 0.001, "loss": 0.3891, "step": 4069 }, { "epoch": 0.11230025833198247, "grad_norm": 0.004757543094456196, "learning_rate": 0.001, "loss": 0.4245, "step": 4070 }, { "epoch": 0.11232785053304684, "grad_norm": 0.004110514651983976, "learning_rate": 0.001, "loss": 0.4556, "step": 4071 }, { "epoch": 0.1123554427341112, "grad_norm": 0.0029956744983792305, "learning_rate": 0.001, "loss": 0.4207, "step": 4072 }, { "epoch": 0.11238303493517557, "grad_norm": 0.006121024955064058, "learning_rate": 0.001, "loss": 0.4228, "step": 4073 }, { "epoch": 0.11241062713623994, "grad_norm": 0.011045140214264393, "learning_rate": 0.001, "loss": 0.377, "step": 4074 }, { "epoch": 0.11243821933730432, "grad_norm": 0.002652381081134081, "learning_rate": 0.001, "loss": 0.3963, "step": 4075 }, { "epoch": 0.11246581153836868, "grad_norm": 0.005784259643405676, "learning_rate": 0.001, "loss": 0.4095, "step": 4076 }, { "epoch": 0.11249340373943305, "grad_norm": 0.004484557081013918, "learning_rate": 0.001, "loss": 0.4009, "step": 4077 }, { "epoch": 0.11252099594049741, "grad_norm": 0.004235697444528341, "learning_rate": 0.001, "loss": 0.3975, "step": 4078 }, { "epoch": 0.11254858814156178, "grad_norm": 0.002368953777477145, "learning_rate": 0.001, "loss": 0.4364, "step": 4079 }, { "epoch": 0.11257618034262616, "grad_norm": 0.002926155459135771, "learning_rate": 0.001, "loss": 0.3957, "step": 4080 }, { "epoch": 0.11260377254369053, "grad_norm": 0.0026046691928058863, "learning_rate": 0.001, "loss": 0.396, "step": 4081 }, { "epoch": 0.1126313647447549, "grad_norm": 0.0025377613492310047, "learning_rate": 0.001, "loss": 0.3658, "step": 4082 }, { "epoch": 0.11265895694581926, "grad_norm": 0.0027987242210656404, "learning_rate": 0.001, "loss": 0.398, "step": 4083 }, { "epoch": 0.11268654914688363, "grad_norm": 0.002613126765936613, "learning_rate": 0.001, "loss": 0.4128, "step": 4084 }, { "epoch": 0.11271414134794801, "grad_norm": 0.002449605381116271, "learning_rate": 0.001, "loss": 0.3999, "step": 4085 }, { "epoch": 0.11274173354901237, "grad_norm": 0.0033523484598845243, "learning_rate": 0.001, "loss": 0.4057, "step": 4086 }, { "epoch": 0.11276932575007674, "grad_norm": 0.003658158238977194, "learning_rate": 0.001, "loss": 0.3999, "step": 4087 }, { "epoch": 0.1127969179511411, "grad_norm": 0.0031146903056651354, "learning_rate": 0.001, "loss": 0.4306, "step": 4088 }, { "epoch": 0.11282451015220547, "grad_norm": 0.004147498402744532, "learning_rate": 0.001, "loss": 0.3737, "step": 4089 }, { "epoch": 0.11285210235326985, "grad_norm": 0.0027466421015560627, "learning_rate": 0.001, "loss": 0.4213, "step": 4090 }, { "epoch": 0.11287969455433422, "grad_norm": 0.00301600550301373, "learning_rate": 0.001, "loss": 0.3818, "step": 4091 }, { "epoch": 0.11290728675539859, "grad_norm": 0.002960977843031287, "learning_rate": 0.001, "loss": 0.4128, "step": 4092 }, { "epoch": 0.11293487895646295, "grad_norm": 0.0030630468390882015, "learning_rate": 0.001, "loss": 0.3792, "step": 4093 }, { "epoch": 0.11296247115752732, "grad_norm": 0.002365349093452096, "learning_rate": 0.001, "loss": 0.4169, "step": 4094 }, { "epoch": 0.1129900633585917, "grad_norm": 0.005711345002055168, "learning_rate": 0.001, "loss": 0.3701, "step": 4095 }, { "epoch": 0.11301765555965607, "grad_norm": 0.003600204363465309, "learning_rate": 0.001, "loss": 0.4023, "step": 4096 }, { "epoch": 0.11304524776072043, "grad_norm": 0.003578650299459696, "learning_rate": 0.001, "loss": 0.4069, "step": 4097 }, { "epoch": 0.1130728399617848, "grad_norm": 0.003018326824530959, "learning_rate": 0.001, "loss": 0.4217, "step": 4098 }, { "epoch": 0.11310043216284917, "grad_norm": 0.0023286626674234867, "learning_rate": 0.001, "loss": 0.4477, "step": 4099 }, { "epoch": 0.11312802436391355, "grad_norm": 0.0055235689505934715, "learning_rate": 0.001, "loss": 0.4097, "step": 4100 }, { "epoch": 0.11315561656497791, "grad_norm": 0.003630418796092272, "learning_rate": 0.001, "loss": 0.3977, "step": 4101 }, { "epoch": 0.11318320876604228, "grad_norm": 0.0026847817935049534, "learning_rate": 0.001, "loss": 0.3564, "step": 4102 }, { "epoch": 0.11321080096710665, "grad_norm": 0.0022519559133797884, "learning_rate": 0.001, "loss": 0.4048, "step": 4103 }, { "epoch": 0.11323839316817101, "grad_norm": 0.002188954036682844, "learning_rate": 0.001, "loss": 0.4181, "step": 4104 }, { "epoch": 0.11326598536923539, "grad_norm": 0.006972792092710733, "learning_rate": 0.001, "loss": 0.4001, "step": 4105 }, { "epoch": 0.11329357757029976, "grad_norm": 0.0025407804641872644, "learning_rate": 0.001, "loss": 0.4251, "step": 4106 }, { "epoch": 0.11332116977136412, "grad_norm": 0.006640659179538488, "learning_rate": 0.001, "loss": 0.3743, "step": 4107 }, { "epoch": 0.11334876197242849, "grad_norm": 0.0022770599462091923, "learning_rate": 0.001, "loss": 0.4122, "step": 4108 }, { "epoch": 0.11337635417349286, "grad_norm": 0.0027237252797931433, "learning_rate": 0.001, "loss": 0.3964, "step": 4109 }, { "epoch": 0.11340394637455724, "grad_norm": 0.0026092720218002796, "learning_rate": 0.001, "loss": 0.4007, "step": 4110 }, { "epoch": 0.1134315385756216, "grad_norm": 0.0025469979736953974, "learning_rate": 0.001, "loss": 0.3901, "step": 4111 }, { "epoch": 0.11345913077668597, "grad_norm": 0.004103371873497963, "learning_rate": 0.001, "loss": 0.4138, "step": 4112 }, { "epoch": 0.11348672297775034, "grad_norm": 0.002415394876152277, "learning_rate": 0.001, "loss": 0.3828, "step": 4113 }, { "epoch": 0.1135143151788147, "grad_norm": 0.0024486854672431946, "learning_rate": 0.001, "loss": 0.3623, "step": 4114 }, { "epoch": 0.11354190737987908, "grad_norm": 0.003065606579184532, "learning_rate": 0.001, "loss": 0.3931, "step": 4115 }, { "epoch": 0.11356949958094345, "grad_norm": 0.0029552297201007605, "learning_rate": 0.001, "loss": 0.3854, "step": 4116 }, { "epoch": 0.11359709178200782, "grad_norm": 0.0028423569165170193, "learning_rate": 0.001, "loss": 0.4042, "step": 4117 }, { "epoch": 0.11362468398307218, "grad_norm": 0.002471365500241518, "learning_rate": 0.001, "loss": 0.3874, "step": 4118 }, { "epoch": 0.11365227618413655, "grad_norm": 0.0026159638073295355, "learning_rate": 0.001, "loss": 0.4352, "step": 4119 }, { "epoch": 0.11367986838520092, "grad_norm": 0.00349643686786294, "learning_rate": 0.001, "loss": 0.4144, "step": 4120 }, { "epoch": 0.1137074605862653, "grad_norm": 0.003111085621640086, "learning_rate": 0.001, "loss": 0.3827, "step": 4121 }, { "epoch": 0.11373505278732966, "grad_norm": 0.002907720860093832, "learning_rate": 0.001, "loss": 0.4288, "step": 4122 }, { "epoch": 0.11376264498839403, "grad_norm": 0.004095634911209345, "learning_rate": 0.001, "loss": 0.3959, "step": 4123 }, { "epoch": 0.1137902371894584, "grad_norm": 0.0028071727138012648, "learning_rate": 0.001, "loss": 0.4061, "step": 4124 }, { "epoch": 0.11381782939052276, "grad_norm": 0.0032708507496863604, "learning_rate": 0.001, "loss": 0.3787, "step": 4125 }, { "epoch": 0.11384542159158714, "grad_norm": 0.002617691410705447, "learning_rate": 0.001, "loss": 0.4297, "step": 4126 }, { "epoch": 0.11387301379265151, "grad_norm": 0.0025609673466533422, "learning_rate": 0.001, "loss": 0.4136, "step": 4127 }, { "epoch": 0.11390060599371588, "grad_norm": 0.003452820936217904, "learning_rate": 0.001, "loss": 0.4136, "step": 4128 }, { "epoch": 0.11392819819478024, "grad_norm": 0.0029548651073127985, "learning_rate": 0.001, "loss": 0.4409, "step": 4129 }, { "epoch": 0.11395579039584461, "grad_norm": 0.002860912587493658, "learning_rate": 0.001, "loss": 0.3902, "step": 4130 }, { "epoch": 0.11398338259690899, "grad_norm": 0.0028807439375668764, "learning_rate": 0.001, "loss": 0.4236, "step": 4131 }, { "epoch": 0.11401097479797336, "grad_norm": 0.0028092057909816504, "learning_rate": 0.001, "loss": 0.3951, "step": 4132 }, { "epoch": 0.11403856699903772, "grad_norm": 0.003919110633432865, "learning_rate": 0.001, "loss": 0.4015, "step": 4133 }, { "epoch": 0.11406615920010209, "grad_norm": 0.0034238446969538927, "learning_rate": 0.001, "loss": 0.3811, "step": 4134 }, { "epoch": 0.11409375140116645, "grad_norm": 0.0033501333091408014, "learning_rate": 0.001, "loss": 0.4174, "step": 4135 }, { "epoch": 0.11412134360223083, "grad_norm": 0.002704891376197338, "learning_rate": 0.001, "loss": 0.4056, "step": 4136 }, { "epoch": 0.1141489358032952, "grad_norm": 0.003555488074198365, "learning_rate": 0.001, "loss": 0.3915, "step": 4137 }, { "epoch": 0.11417652800435957, "grad_norm": 0.0075040231458842754, "learning_rate": 0.001, "loss": 0.4176, "step": 4138 }, { "epoch": 0.11420412020542393, "grad_norm": 0.008047969080507755, "learning_rate": 0.001, "loss": 0.4179, "step": 4139 }, { "epoch": 0.1142317124064883, "grad_norm": 0.003022789489477873, "learning_rate": 0.001, "loss": 0.4296, "step": 4140 }, { "epoch": 0.11425930460755268, "grad_norm": 0.0042315032333135605, "learning_rate": 0.001, "loss": 0.4537, "step": 4141 }, { "epoch": 0.11428689680861705, "grad_norm": 0.0028275889344513416, "learning_rate": 0.001, "loss": 0.449, "step": 4142 }, { "epoch": 0.11431448900968141, "grad_norm": 0.002496670465916395, "learning_rate": 0.001, "loss": 0.3984, "step": 4143 }, { "epoch": 0.11434208121074578, "grad_norm": 0.004964656662195921, "learning_rate": 0.001, "loss": 0.3953, "step": 4144 }, { "epoch": 0.11436967341181015, "grad_norm": 0.0024401163682341576, "learning_rate": 0.001, "loss": 0.4282, "step": 4145 }, { "epoch": 0.11439726561287453, "grad_norm": 0.004458567593246698, "learning_rate": 0.001, "loss": 0.3921, "step": 4146 }, { "epoch": 0.1144248578139389, "grad_norm": 0.002872915705665946, "learning_rate": 0.001, "loss": 0.4262, "step": 4147 }, { "epoch": 0.11445245001500326, "grad_norm": 0.004791326820850372, "learning_rate": 0.001, "loss": 0.4012, "step": 4148 }, { "epoch": 0.11448004221606763, "grad_norm": 0.0037970049306750298, "learning_rate": 0.001, "loss": 0.381, "step": 4149 }, { "epoch": 0.11450763441713199, "grad_norm": 0.0025677571538835764, "learning_rate": 0.001, "loss": 0.4225, "step": 4150 }, { "epoch": 0.11453522661819637, "grad_norm": 0.0031208605505526066, "learning_rate": 0.001, "loss": 0.4148, "step": 4151 }, { "epoch": 0.11456281881926074, "grad_norm": 0.0031444996129721403, "learning_rate": 0.001, "loss": 0.3881, "step": 4152 }, { "epoch": 0.1145904110203251, "grad_norm": 0.0035791201516985893, "learning_rate": 0.001, "loss": 0.4098, "step": 4153 }, { "epoch": 0.11461800322138947, "grad_norm": 0.0027255616150796413, "learning_rate": 0.001, "loss": 0.3751, "step": 4154 }, { "epoch": 0.11464559542245384, "grad_norm": 0.003225408960133791, "learning_rate": 0.001, "loss": 0.374, "step": 4155 }, { "epoch": 0.11467318762351822, "grad_norm": 0.0035840212367475033, "learning_rate": 0.001, "loss": 0.3518, "step": 4156 }, { "epoch": 0.11470077982458259, "grad_norm": 0.0035705710761249065, "learning_rate": 0.001, "loss": 0.3689, "step": 4157 }, { "epoch": 0.11472837202564695, "grad_norm": 0.0029657899867743254, "learning_rate": 0.001, "loss": 0.4079, "step": 4158 }, { "epoch": 0.11475596422671132, "grad_norm": 0.003734529484063387, "learning_rate": 0.001, "loss": 0.3859, "step": 4159 }, { "epoch": 0.11478355642777568, "grad_norm": 0.00309072551317513, "learning_rate": 0.001, "loss": 0.3934, "step": 4160 }, { "epoch": 0.11481114862884007, "grad_norm": 0.004012387245893478, "learning_rate": 0.001, "loss": 0.3629, "step": 4161 }, { "epoch": 0.11483874082990443, "grad_norm": 0.002934156684204936, "learning_rate": 0.001, "loss": 0.4, "step": 4162 }, { "epoch": 0.1148663330309688, "grad_norm": 0.004066895227879286, "learning_rate": 0.001, "loss": 0.4157, "step": 4163 }, { "epoch": 0.11489392523203316, "grad_norm": 0.004248685669153929, "learning_rate": 0.001, "loss": 0.4092, "step": 4164 }, { "epoch": 0.11492151743309753, "grad_norm": 0.0024275535251945257, "learning_rate": 0.001, "loss": 0.4446, "step": 4165 }, { "epoch": 0.1149491096341619, "grad_norm": 0.0025079327169805765, "learning_rate": 0.001, "loss": 0.4163, "step": 4166 }, { "epoch": 0.11497670183522628, "grad_norm": 0.0024487797636538744, "learning_rate": 0.001, "loss": 0.3915, "step": 4167 }, { "epoch": 0.11500429403629064, "grad_norm": 0.002970887813717127, "learning_rate": 0.001, "loss": 0.4153, "step": 4168 }, { "epoch": 0.11503188623735501, "grad_norm": 0.0025243964046239853, "learning_rate": 0.001, "loss": 0.3893, "step": 4169 }, { "epoch": 0.11505947843841938, "grad_norm": 0.004984840750694275, "learning_rate": 0.001, "loss": 0.3893, "step": 4170 }, { "epoch": 0.11508707063948374, "grad_norm": 0.0044680144637823105, "learning_rate": 0.001, "loss": 0.3893, "step": 4171 }, { "epoch": 0.11511466284054812, "grad_norm": 0.0035056746564805508, "learning_rate": 0.001, "loss": 0.3963, "step": 4172 }, { "epoch": 0.11514225504161249, "grad_norm": 0.0023045954294502735, "learning_rate": 0.001, "loss": 0.4143, "step": 4173 }, { "epoch": 0.11516984724267686, "grad_norm": 0.0029870544094592333, "learning_rate": 0.001, "loss": 0.3857, "step": 4174 }, { "epoch": 0.11519743944374122, "grad_norm": 0.002851466415449977, "learning_rate": 0.001, "loss": 0.4221, "step": 4175 }, { "epoch": 0.11522503164480559, "grad_norm": 0.0037020803429186344, "learning_rate": 0.001, "loss": 0.3927, "step": 4176 }, { "epoch": 0.11525262384586997, "grad_norm": 0.0025792547967284918, "learning_rate": 0.001, "loss": 0.4076, "step": 4177 }, { "epoch": 0.11528021604693434, "grad_norm": 0.005641425959765911, "learning_rate": 0.001, "loss": 0.3746, "step": 4178 }, { "epoch": 0.1153078082479987, "grad_norm": 0.006405304651707411, "learning_rate": 0.001, "loss": 0.4176, "step": 4179 }, { "epoch": 0.11533540044906307, "grad_norm": 0.003738192841410637, "learning_rate": 0.001, "loss": 0.4232, "step": 4180 }, { "epoch": 0.11536299265012744, "grad_norm": 0.002722861710935831, "learning_rate": 0.001, "loss": 0.3712, "step": 4181 }, { "epoch": 0.11539058485119182, "grad_norm": 0.003077869303524494, "learning_rate": 0.001, "loss": 0.3665, "step": 4182 }, { "epoch": 0.11541817705225618, "grad_norm": 0.0023466874845325947, "learning_rate": 0.001, "loss": 0.391, "step": 4183 }, { "epoch": 0.11544576925332055, "grad_norm": 0.0029894413892179728, "learning_rate": 0.001, "loss": 0.4229, "step": 4184 }, { "epoch": 0.11547336145438492, "grad_norm": 0.005931466352194548, "learning_rate": 0.001, "loss": 0.4229, "step": 4185 }, { "epoch": 0.11550095365544928, "grad_norm": 0.002602703869342804, "learning_rate": 0.001, "loss": 0.4107, "step": 4186 }, { "epoch": 0.11552854585651366, "grad_norm": 0.00304593937471509, "learning_rate": 0.001, "loss": 0.3973, "step": 4187 }, { "epoch": 0.11555613805757803, "grad_norm": 0.002939543453976512, "learning_rate": 0.001, "loss": 0.3698, "step": 4188 }, { "epoch": 0.1155837302586424, "grad_norm": 0.0026717365253716707, "learning_rate": 0.001, "loss": 0.4273, "step": 4189 }, { "epoch": 0.11561132245970676, "grad_norm": 0.0039357393980026245, "learning_rate": 0.001, "loss": 0.3866, "step": 4190 }, { "epoch": 0.11563891466077113, "grad_norm": 0.0033047099132090807, "learning_rate": 0.001, "loss": 0.3634, "step": 4191 }, { "epoch": 0.11566650686183551, "grad_norm": 0.00248891394585371, "learning_rate": 0.001, "loss": 0.4038, "step": 4192 }, { "epoch": 0.11569409906289987, "grad_norm": 0.003228937741369009, "learning_rate": 0.001, "loss": 0.4254, "step": 4193 }, { "epoch": 0.11572169126396424, "grad_norm": 0.004180791787803173, "learning_rate": 0.001, "loss": 0.389, "step": 4194 }, { "epoch": 0.11574928346502861, "grad_norm": 0.0026013990864157677, "learning_rate": 0.001, "loss": 0.396, "step": 4195 }, { "epoch": 0.11577687566609297, "grad_norm": 0.003035642672330141, "learning_rate": 0.001, "loss": 0.3863, "step": 4196 }, { "epoch": 0.11580446786715735, "grad_norm": 0.004497555084526539, "learning_rate": 0.001, "loss": 0.426, "step": 4197 }, { "epoch": 0.11583206006822172, "grad_norm": 0.0024611612316221, "learning_rate": 0.001, "loss": 0.3838, "step": 4198 }, { "epoch": 0.11585965226928609, "grad_norm": 0.002211876679211855, "learning_rate": 0.001, "loss": 0.4061, "step": 4199 }, { "epoch": 0.11588724447035045, "grad_norm": 0.0038244640454649925, "learning_rate": 0.001, "loss": 0.367, "step": 4200 }, { "epoch": 0.11591483667141482, "grad_norm": 0.002824941882863641, "learning_rate": 0.001, "loss": 0.3977, "step": 4201 }, { "epoch": 0.1159424288724792, "grad_norm": 0.00297146150842309, "learning_rate": 0.001, "loss": 0.3969, "step": 4202 }, { "epoch": 0.11597002107354357, "grad_norm": 0.003329311031848192, "learning_rate": 0.001, "loss": 0.4067, "step": 4203 }, { "epoch": 0.11599761327460793, "grad_norm": 0.0036552376113831997, "learning_rate": 0.001, "loss": 0.3593, "step": 4204 }, { "epoch": 0.1160252054756723, "grad_norm": 0.003157126484438777, "learning_rate": 0.001, "loss": 0.3867, "step": 4205 }, { "epoch": 0.11605279767673667, "grad_norm": 0.0030856141820549965, "learning_rate": 0.001, "loss": 0.3612, "step": 4206 }, { "epoch": 0.11608038987780105, "grad_norm": 0.0030666659586131573, "learning_rate": 0.001, "loss": 0.4336, "step": 4207 }, { "epoch": 0.11610798207886541, "grad_norm": 0.00340241938829422, "learning_rate": 0.001, "loss": 0.3949, "step": 4208 }, { "epoch": 0.11613557427992978, "grad_norm": 0.005646420642733574, "learning_rate": 0.001, "loss": 0.407, "step": 4209 }, { "epoch": 0.11616316648099415, "grad_norm": 0.002865233225747943, "learning_rate": 0.001, "loss": 0.4253, "step": 4210 }, { "epoch": 0.11619075868205851, "grad_norm": 0.006409808062016964, "learning_rate": 0.001, "loss": 0.3962, "step": 4211 }, { "epoch": 0.11621835088312288, "grad_norm": 0.0022089029662311077, "learning_rate": 0.001, "loss": 0.4485, "step": 4212 }, { "epoch": 0.11624594308418726, "grad_norm": 0.002593854209408164, "learning_rate": 0.001, "loss": 0.4342, "step": 4213 }, { "epoch": 0.11627353528525163, "grad_norm": 0.004724476020783186, "learning_rate": 0.001, "loss": 0.3933, "step": 4214 }, { "epoch": 0.11630112748631599, "grad_norm": 0.0066048745065927505, "learning_rate": 0.001, "loss": 0.3659, "step": 4215 }, { "epoch": 0.11632871968738036, "grad_norm": 0.004728097002953291, "learning_rate": 0.001, "loss": 0.3812, "step": 4216 }, { "epoch": 0.11635631188844472, "grad_norm": 0.003980184905230999, "learning_rate": 0.001, "loss": 0.4053, "step": 4217 }, { "epoch": 0.1163839040895091, "grad_norm": 0.002691940637305379, "learning_rate": 0.001, "loss": 0.384, "step": 4218 }, { "epoch": 0.11641149629057347, "grad_norm": 0.004341395106166601, "learning_rate": 0.001, "loss": 0.3608, "step": 4219 }, { "epoch": 0.11643908849163784, "grad_norm": 0.0023949614260345697, "learning_rate": 0.001, "loss": 0.3989, "step": 4220 }, { "epoch": 0.1164666806927022, "grad_norm": 0.0021929224021732807, "learning_rate": 0.001, "loss": 0.3927, "step": 4221 }, { "epoch": 0.11649427289376657, "grad_norm": 0.002848616801202297, "learning_rate": 0.001, "loss": 0.4129, "step": 4222 }, { "epoch": 0.11652186509483095, "grad_norm": 0.0023948801681399345, "learning_rate": 0.001, "loss": 0.3991, "step": 4223 }, { "epoch": 0.11654945729589532, "grad_norm": 0.0029203318990767, "learning_rate": 0.001, "loss": 0.3962, "step": 4224 }, { "epoch": 0.11657704949695968, "grad_norm": 0.0027072993107140064, "learning_rate": 0.001, "loss": 0.4326, "step": 4225 }, { "epoch": 0.11660464169802405, "grad_norm": 0.007629503961652517, "learning_rate": 0.001, "loss": 0.4366, "step": 4226 }, { "epoch": 0.11663223389908842, "grad_norm": 0.004317782819271088, "learning_rate": 0.001, "loss": 0.4254, "step": 4227 }, { "epoch": 0.1166598261001528, "grad_norm": 0.004796279594302177, "learning_rate": 0.001, "loss": 0.4165, "step": 4228 }, { "epoch": 0.11668741830121716, "grad_norm": 0.0048171901144087315, "learning_rate": 0.001, "loss": 0.4078, "step": 4229 }, { "epoch": 0.11671501050228153, "grad_norm": 0.005509024020284414, "learning_rate": 0.001, "loss": 0.3921, "step": 4230 }, { "epoch": 0.1167426027033459, "grad_norm": 0.002434986876323819, "learning_rate": 0.001, "loss": 0.3991, "step": 4231 }, { "epoch": 0.11677019490441026, "grad_norm": 0.0024681081995368004, "learning_rate": 0.001, "loss": 0.3868, "step": 4232 }, { "epoch": 0.11679778710547464, "grad_norm": 0.0029520918615162373, "learning_rate": 0.001, "loss": 0.4171, "step": 4233 }, { "epoch": 0.11682537930653901, "grad_norm": 0.0039764223620295525, "learning_rate": 0.001, "loss": 0.4279, "step": 4234 }, { "epoch": 0.11685297150760338, "grad_norm": 0.0033561012241989374, "learning_rate": 0.001, "loss": 0.3801, "step": 4235 }, { "epoch": 0.11688056370866774, "grad_norm": 0.002082569058984518, "learning_rate": 0.001, "loss": 0.4333, "step": 4236 }, { "epoch": 0.11690815590973211, "grad_norm": 0.002162642776966095, "learning_rate": 0.001, "loss": 0.3956, "step": 4237 }, { "epoch": 0.11693574811079649, "grad_norm": 0.004019891377538443, "learning_rate": 0.001, "loss": 0.3855, "step": 4238 }, { "epoch": 0.11696334031186086, "grad_norm": 0.0039974479004740715, "learning_rate": 0.001, "loss": 0.392, "step": 4239 }, { "epoch": 0.11699093251292522, "grad_norm": 0.0032143592834472656, "learning_rate": 0.001, "loss": 0.3754, "step": 4240 }, { "epoch": 0.11701852471398959, "grad_norm": 0.0021860511042177677, "learning_rate": 0.001, "loss": 0.4301, "step": 4241 }, { "epoch": 0.11704611691505395, "grad_norm": 0.0034913863055408, "learning_rate": 0.001, "loss": 0.4232, "step": 4242 }, { "epoch": 0.11707370911611834, "grad_norm": 0.002544558374211192, "learning_rate": 0.001, "loss": 0.4308, "step": 4243 }, { "epoch": 0.1171013013171827, "grad_norm": 0.004531691782176495, "learning_rate": 0.001, "loss": 0.4217, "step": 4244 }, { "epoch": 0.11712889351824707, "grad_norm": 0.002380553400143981, "learning_rate": 0.001, "loss": 0.3713, "step": 4245 }, { "epoch": 0.11715648571931143, "grad_norm": 0.003138060914352536, "learning_rate": 0.001, "loss": 0.4387, "step": 4246 }, { "epoch": 0.1171840779203758, "grad_norm": 0.0038936040364205837, "learning_rate": 0.001, "loss": 0.3675, "step": 4247 }, { "epoch": 0.11721167012144018, "grad_norm": 0.0034198390785604715, "learning_rate": 0.001, "loss": 0.4041, "step": 4248 }, { "epoch": 0.11723926232250455, "grad_norm": 0.004554110579192638, "learning_rate": 0.001, "loss": 0.4036, "step": 4249 }, { "epoch": 0.11726685452356891, "grad_norm": 0.002692369045689702, "learning_rate": 0.001, "loss": 0.3955, "step": 4250 }, { "epoch": 0.11729444672463328, "grad_norm": 0.0031687277369201183, "learning_rate": 0.001, "loss": 0.3685, "step": 4251 }, { "epoch": 0.11732203892569765, "grad_norm": 0.0030465414747595787, "learning_rate": 0.001, "loss": 0.4197, "step": 4252 }, { "epoch": 0.11734963112676203, "grad_norm": 0.0028786477632820606, "learning_rate": 0.001, "loss": 0.3941, "step": 4253 }, { "epoch": 0.1173772233278264, "grad_norm": 0.0026728706434369087, "learning_rate": 0.001, "loss": 0.4235, "step": 4254 }, { "epoch": 0.11740481552889076, "grad_norm": 0.0030022338032722473, "learning_rate": 0.001, "loss": 0.407, "step": 4255 }, { "epoch": 0.11743240772995513, "grad_norm": 0.004039818421006203, "learning_rate": 0.001, "loss": 0.4007, "step": 4256 }, { "epoch": 0.1174599999310195, "grad_norm": 0.0031344417948275805, "learning_rate": 0.001, "loss": 0.4295, "step": 4257 }, { "epoch": 0.11748759213208387, "grad_norm": 0.0020141347777098417, "learning_rate": 0.001, "loss": 0.4067, "step": 4258 }, { "epoch": 0.11751518433314824, "grad_norm": 0.003466543275862932, "learning_rate": 0.001, "loss": 0.3938, "step": 4259 }, { "epoch": 0.1175427765342126, "grad_norm": 0.006833492312580347, "learning_rate": 0.001, "loss": 0.4395, "step": 4260 }, { "epoch": 0.11757036873527697, "grad_norm": 0.002628098940476775, "learning_rate": 0.001, "loss": 0.4086, "step": 4261 }, { "epoch": 0.11759796093634134, "grad_norm": 0.0029284025076776743, "learning_rate": 0.001, "loss": 0.3746, "step": 4262 }, { "epoch": 0.1176255531374057, "grad_norm": 0.002273330232128501, "learning_rate": 0.001, "loss": 0.4022, "step": 4263 }, { "epoch": 0.11765314533847009, "grad_norm": 0.004239256959408522, "learning_rate": 0.001, "loss": 0.409, "step": 4264 }, { "epoch": 0.11768073753953445, "grad_norm": 0.003706045914441347, "learning_rate": 0.001, "loss": 0.3751, "step": 4265 }, { "epoch": 0.11770832974059882, "grad_norm": 0.004043275490403175, "learning_rate": 0.001, "loss": 0.3808, "step": 4266 }, { "epoch": 0.11773592194166319, "grad_norm": 0.00288589159026742, "learning_rate": 0.001, "loss": 0.3952, "step": 4267 }, { "epoch": 0.11776351414272755, "grad_norm": 0.002828913275152445, "learning_rate": 0.001, "loss": 0.3861, "step": 4268 }, { "epoch": 0.11779110634379193, "grad_norm": 0.0026987362653017044, "learning_rate": 0.001, "loss": 0.4127, "step": 4269 }, { "epoch": 0.1178186985448563, "grad_norm": 0.004176660440862179, "learning_rate": 0.001, "loss": 0.3911, "step": 4270 }, { "epoch": 0.11784629074592066, "grad_norm": 0.0025856473948806524, "learning_rate": 0.001, "loss": 0.4223, "step": 4271 }, { "epoch": 0.11787388294698503, "grad_norm": 0.002688355278223753, "learning_rate": 0.001, "loss": 0.4427, "step": 4272 }, { "epoch": 0.1179014751480494, "grad_norm": 0.0035168598406016827, "learning_rate": 0.001, "loss": 0.4053, "step": 4273 }, { "epoch": 0.11792906734911378, "grad_norm": 0.00464267935603857, "learning_rate": 0.001, "loss": 0.3594, "step": 4274 }, { "epoch": 0.11795665955017814, "grad_norm": 0.002608151640743017, "learning_rate": 0.001, "loss": 0.3941, "step": 4275 }, { "epoch": 0.11798425175124251, "grad_norm": 0.003174230456352234, "learning_rate": 0.001, "loss": 0.3583, "step": 4276 }, { "epoch": 0.11801184395230688, "grad_norm": 0.005926152691245079, "learning_rate": 0.001, "loss": 0.4115, "step": 4277 }, { "epoch": 0.11803943615337124, "grad_norm": 0.0021453346125781536, "learning_rate": 0.001, "loss": 0.3648, "step": 4278 }, { "epoch": 0.11806702835443562, "grad_norm": 0.003014147747308016, "learning_rate": 0.001, "loss": 0.4007, "step": 4279 }, { "epoch": 0.11809462055549999, "grad_norm": 0.007064585108309984, "learning_rate": 0.001, "loss": 0.3441, "step": 4280 }, { "epoch": 0.11812221275656436, "grad_norm": 0.004232319537550211, "learning_rate": 0.001, "loss": 0.4019, "step": 4281 }, { "epoch": 0.11814980495762872, "grad_norm": 0.0022875110153108835, "learning_rate": 0.001, "loss": 0.3712, "step": 4282 }, { "epoch": 0.11817739715869309, "grad_norm": 0.0034797275438904762, "learning_rate": 0.001, "loss": 0.4008, "step": 4283 }, { "epoch": 0.11820498935975747, "grad_norm": 0.0038444402161985636, "learning_rate": 0.001, "loss": 0.3889, "step": 4284 }, { "epoch": 0.11823258156082184, "grad_norm": 0.004799429327249527, "learning_rate": 0.001, "loss": 0.4482, "step": 4285 }, { "epoch": 0.1182601737618862, "grad_norm": 0.0031683624256402254, "learning_rate": 0.001, "loss": 0.4242, "step": 4286 }, { "epoch": 0.11828776596295057, "grad_norm": 0.002779352478682995, "learning_rate": 0.001, "loss": 0.4007, "step": 4287 }, { "epoch": 0.11831535816401494, "grad_norm": 0.00301496684551239, "learning_rate": 0.001, "loss": 0.452, "step": 4288 }, { "epoch": 0.11834295036507932, "grad_norm": 0.0021808287128806114, "learning_rate": 0.001, "loss": 0.4234, "step": 4289 }, { "epoch": 0.11837054256614368, "grad_norm": 0.002374732168391347, "learning_rate": 0.001, "loss": 0.4414, "step": 4290 }, { "epoch": 0.11839813476720805, "grad_norm": 0.0025609827134758234, "learning_rate": 0.001, "loss": 0.41, "step": 4291 }, { "epoch": 0.11842572696827242, "grad_norm": 0.0029708382207900286, "learning_rate": 0.001, "loss": 0.3762, "step": 4292 }, { "epoch": 0.11845331916933678, "grad_norm": 0.002453922526910901, "learning_rate": 0.001, "loss": 0.3997, "step": 4293 }, { "epoch": 0.11848091137040116, "grad_norm": 0.003155443584546447, "learning_rate": 0.001, "loss": 0.4032, "step": 4294 }, { "epoch": 0.11850850357146553, "grad_norm": 0.0028496377635747194, "learning_rate": 0.001, "loss": 0.4215, "step": 4295 }, { "epoch": 0.1185360957725299, "grad_norm": 0.0024521294981241226, "learning_rate": 0.001, "loss": 0.4255, "step": 4296 }, { "epoch": 0.11856368797359426, "grad_norm": 0.002795828739181161, "learning_rate": 0.001, "loss": 0.4039, "step": 4297 }, { "epoch": 0.11859128017465863, "grad_norm": 0.0030860670376569033, "learning_rate": 0.001, "loss": 0.3927, "step": 4298 }, { "epoch": 0.11861887237572301, "grad_norm": 0.005245378706604242, "learning_rate": 0.001, "loss": 0.4086, "step": 4299 }, { "epoch": 0.11864646457678737, "grad_norm": 0.003146842820569873, "learning_rate": 0.001, "loss": 0.3404, "step": 4300 }, { "epoch": 0.11867405677785174, "grad_norm": 0.0026854875031858683, "learning_rate": 0.001, "loss": 0.3727, "step": 4301 }, { "epoch": 0.11870164897891611, "grad_norm": 0.0026074268389493227, "learning_rate": 0.001, "loss": 0.3586, "step": 4302 }, { "epoch": 0.11872924117998047, "grad_norm": 0.0035085766576230526, "learning_rate": 0.001, "loss": 0.4453, "step": 4303 }, { "epoch": 0.11875683338104485, "grad_norm": 0.0026303378399461508, "learning_rate": 0.001, "loss": 0.3808, "step": 4304 }, { "epoch": 0.11878442558210922, "grad_norm": 0.002744373632594943, "learning_rate": 0.001, "loss": 0.3987, "step": 4305 }, { "epoch": 0.11881201778317359, "grad_norm": 0.002459439681842923, "learning_rate": 0.001, "loss": 0.424, "step": 4306 }, { "epoch": 0.11883960998423795, "grad_norm": 0.004186084493994713, "learning_rate": 0.001, "loss": 0.3886, "step": 4307 }, { "epoch": 0.11886720218530232, "grad_norm": 0.002484044060111046, "learning_rate": 0.001, "loss": 0.4072, "step": 4308 }, { "epoch": 0.11889479438636669, "grad_norm": 0.002908499911427498, "learning_rate": 0.001, "loss": 0.3802, "step": 4309 }, { "epoch": 0.11892238658743107, "grad_norm": 0.0024023684673011303, "learning_rate": 0.001, "loss": 0.3796, "step": 4310 }, { "epoch": 0.11894997878849543, "grad_norm": 0.0033678016625344753, "learning_rate": 0.001, "loss": 0.3906, "step": 4311 }, { "epoch": 0.1189775709895598, "grad_norm": 0.0029683737084269524, "learning_rate": 0.001, "loss": 0.3552, "step": 4312 }, { "epoch": 0.11900516319062417, "grad_norm": 0.0027305546682327986, "learning_rate": 0.001, "loss": 0.422, "step": 4313 }, { "epoch": 0.11903275539168853, "grad_norm": 0.002915582386776805, "learning_rate": 0.001, "loss": 0.4102, "step": 4314 }, { "epoch": 0.11906034759275291, "grad_norm": 0.002866593888029456, "learning_rate": 0.001, "loss": 0.3765, "step": 4315 }, { "epoch": 0.11908793979381728, "grad_norm": 0.0024439122062176466, "learning_rate": 0.001, "loss": 0.4118, "step": 4316 }, { "epoch": 0.11911553199488165, "grad_norm": 0.003114825114607811, "learning_rate": 0.001, "loss": 0.3869, "step": 4317 }, { "epoch": 0.11914312419594601, "grad_norm": 0.005244606640189886, "learning_rate": 0.001, "loss": 0.3832, "step": 4318 }, { "epoch": 0.11917071639701038, "grad_norm": 0.0027887518517673016, "learning_rate": 0.001, "loss": 0.3903, "step": 4319 }, { "epoch": 0.11919830859807476, "grad_norm": 0.0033839023672044277, "learning_rate": 0.001, "loss": 0.3994, "step": 4320 }, { "epoch": 0.11922590079913913, "grad_norm": 0.00488735968247056, "learning_rate": 0.001, "loss": 0.4666, "step": 4321 }, { "epoch": 0.11925349300020349, "grad_norm": 0.00358974770642817, "learning_rate": 0.001, "loss": 0.3924, "step": 4322 }, { "epoch": 0.11928108520126786, "grad_norm": 0.004664520733058453, "learning_rate": 0.001, "loss": 0.4276, "step": 4323 }, { "epoch": 0.11930867740233222, "grad_norm": 0.0029046619310975075, "learning_rate": 0.001, "loss": 0.4279, "step": 4324 }, { "epoch": 0.1193362696033966, "grad_norm": 0.005748322233557701, "learning_rate": 0.001, "loss": 0.3928, "step": 4325 }, { "epoch": 0.11936386180446097, "grad_norm": 0.004792370367795229, "learning_rate": 0.001, "loss": 0.3951, "step": 4326 }, { "epoch": 0.11939145400552534, "grad_norm": 0.002347557572647929, "learning_rate": 0.001, "loss": 0.3878, "step": 4327 }, { "epoch": 0.1194190462065897, "grad_norm": 0.009171836078166962, "learning_rate": 0.001, "loss": 0.3932, "step": 4328 }, { "epoch": 0.11944663840765407, "grad_norm": 0.0038018315099179745, "learning_rate": 0.001, "loss": 0.3935, "step": 4329 }, { "epoch": 0.11947423060871845, "grad_norm": 0.010487204417586327, "learning_rate": 0.001, "loss": 0.4135, "step": 4330 }, { "epoch": 0.11950182280978282, "grad_norm": 0.0031547516118735075, "learning_rate": 0.001, "loss": 0.4237, "step": 4331 }, { "epoch": 0.11952941501084718, "grad_norm": 0.008287927135825157, "learning_rate": 0.001, "loss": 0.4101, "step": 4332 }, { "epoch": 0.11955700721191155, "grad_norm": 0.0035811339039355516, "learning_rate": 0.001, "loss": 0.4101, "step": 4333 }, { "epoch": 0.11958459941297592, "grad_norm": 0.0023166469763964415, "learning_rate": 0.001, "loss": 0.4415, "step": 4334 }, { "epoch": 0.1196121916140403, "grad_norm": 0.005073962267488241, "learning_rate": 0.001, "loss": 0.4063, "step": 4335 }, { "epoch": 0.11963978381510466, "grad_norm": 0.003093453822657466, "learning_rate": 0.001, "loss": 0.3567, "step": 4336 }, { "epoch": 0.11966737601616903, "grad_norm": 0.002334992168471217, "learning_rate": 0.001, "loss": 0.4305, "step": 4337 }, { "epoch": 0.1196949682172334, "grad_norm": 0.0023138297256082296, "learning_rate": 0.001, "loss": 0.4247, "step": 4338 }, { "epoch": 0.11972256041829776, "grad_norm": 0.002155238064005971, "learning_rate": 0.001, "loss": 0.4446, "step": 4339 }, { "epoch": 0.11975015261936214, "grad_norm": 0.004164060112088919, "learning_rate": 0.001, "loss": 0.4138, "step": 4340 }, { "epoch": 0.11977774482042651, "grad_norm": 0.0026546409353613853, "learning_rate": 0.001, "loss": 0.3838, "step": 4341 }, { "epoch": 0.11980533702149088, "grad_norm": 0.004223043564707041, "learning_rate": 0.001, "loss": 0.4106, "step": 4342 }, { "epoch": 0.11983292922255524, "grad_norm": 0.002268544165417552, "learning_rate": 0.001, "loss": 0.4368, "step": 4343 }, { "epoch": 0.11986052142361961, "grad_norm": 0.0023410958237946033, "learning_rate": 0.001, "loss": 0.439, "step": 4344 }, { "epoch": 0.11988811362468399, "grad_norm": 0.003867893014103174, "learning_rate": 0.001, "loss": 0.399, "step": 4345 }, { "epoch": 0.11991570582574836, "grad_norm": 0.0023783824872225523, "learning_rate": 0.001, "loss": 0.4041, "step": 4346 }, { "epoch": 0.11994329802681272, "grad_norm": 0.002548053627833724, "learning_rate": 0.001, "loss": 0.4261, "step": 4347 }, { "epoch": 0.11997089022787709, "grad_norm": 0.0028831157833337784, "learning_rate": 0.001, "loss": 0.3841, "step": 4348 }, { "epoch": 0.11999848242894146, "grad_norm": 0.0029863922391086817, "learning_rate": 0.001, "loss": 0.3918, "step": 4349 }, { "epoch": 0.12002607463000584, "grad_norm": 0.002305314177647233, "learning_rate": 0.001, "loss": 0.3892, "step": 4350 }, { "epoch": 0.1200536668310702, "grad_norm": 0.006781649775803089, "learning_rate": 0.001, "loss": 0.4038, "step": 4351 }, { "epoch": 0.12008125903213457, "grad_norm": 0.0042722891084849834, "learning_rate": 0.001, "loss": 0.3885, "step": 4352 }, { "epoch": 0.12010885123319893, "grad_norm": 0.0033779507502913475, "learning_rate": 0.001, "loss": 0.3611, "step": 4353 }, { "epoch": 0.1201364434342633, "grad_norm": 0.002541282679885626, "learning_rate": 0.001, "loss": 0.4365, "step": 4354 }, { "epoch": 0.12016403563532767, "grad_norm": 0.002650998765602708, "learning_rate": 0.001, "loss": 0.3754, "step": 4355 }, { "epoch": 0.12019162783639205, "grad_norm": 0.002635399578139186, "learning_rate": 0.001, "loss": 0.4553, "step": 4356 }, { "epoch": 0.12021922003745641, "grad_norm": 0.0034205662086606026, "learning_rate": 0.001, "loss": 0.3805, "step": 4357 }, { "epoch": 0.12024681223852078, "grad_norm": 0.0041115800850093365, "learning_rate": 0.001, "loss": 0.4095, "step": 4358 }, { "epoch": 0.12027440443958515, "grad_norm": 0.003525017062202096, "learning_rate": 0.001, "loss": 0.4103, "step": 4359 }, { "epoch": 0.12030199664064951, "grad_norm": 0.003658512607216835, "learning_rate": 0.001, "loss": 0.4322, "step": 4360 }, { "epoch": 0.1203295888417139, "grad_norm": 0.00296131893992424, "learning_rate": 0.001, "loss": 0.3949, "step": 4361 }, { "epoch": 0.12035718104277826, "grad_norm": 0.0035319009330123663, "learning_rate": 0.001, "loss": 0.4197, "step": 4362 }, { "epoch": 0.12038477324384263, "grad_norm": 0.0023319576866924763, "learning_rate": 0.001, "loss": 0.4042, "step": 4363 }, { "epoch": 0.120412365444907, "grad_norm": 0.002195805311203003, "learning_rate": 0.001, "loss": 0.4338, "step": 4364 }, { "epoch": 0.12043995764597136, "grad_norm": 0.002451231935992837, "learning_rate": 0.001, "loss": 0.3916, "step": 4365 }, { "epoch": 0.12046754984703574, "grad_norm": 0.00870354101061821, "learning_rate": 0.001, "loss": 0.3913, "step": 4366 }, { "epoch": 0.1204951420481001, "grad_norm": 0.002587871393188834, "learning_rate": 0.001, "loss": 0.4032, "step": 4367 }, { "epoch": 0.12052273424916447, "grad_norm": 0.004466624464839697, "learning_rate": 0.001, "loss": 0.3526, "step": 4368 }, { "epoch": 0.12055032645022884, "grad_norm": 0.004007890820503235, "learning_rate": 0.001, "loss": 0.3694, "step": 4369 }, { "epoch": 0.1205779186512932, "grad_norm": 0.015531973913311958, "learning_rate": 0.001, "loss": 0.4283, "step": 4370 }, { "epoch": 0.12060551085235759, "grad_norm": 0.008623013272881508, "learning_rate": 0.001, "loss": 0.4336, "step": 4371 }, { "epoch": 0.12063310305342195, "grad_norm": 0.006909455172717571, "learning_rate": 0.001, "loss": 0.3871, "step": 4372 }, { "epoch": 0.12066069525448632, "grad_norm": 0.009513488039374352, "learning_rate": 0.001, "loss": 0.3819, "step": 4373 }, { "epoch": 0.12068828745555069, "grad_norm": 0.003246552310883999, "learning_rate": 0.001, "loss": 0.4136, "step": 4374 }, { "epoch": 0.12071587965661505, "grad_norm": 0.00548326363787055, "learning_rate": 0.001, "loss": 0.3935, "step": 4375 }, { "epoch": 0.12074347185767943, "grad_norm": 0.004080353304743767, "learning_rate": 0.001, "loss": 0.4058, "step": 4376 }, { "epoch": 0.1207710640587438, "grad_norm": 0.004176739137619734, "learning_rate": 0.001, "loss": 0.3706, "step": 4377 }, { "epoch": 0.12079865625980817, "grad_norm": 0.002543453825637698, "learning_rate": 0.001, "loss": 0.4096, "step": 4378 }, { "epoch": 0.12082624846087253, "grad_norm": 0.0027144979685544968, "learning_rate": 0.001, "loss": 0.4159, "step": 4379 }, { "epoch": 0.1208538406619369, "grad_norm": 0.0035318818408995867, "learning_rate": 0.001, "loss": 0.3599, "step": 4380 }, { "epoch": 0.12088143286300128, "grad_norm": 0.005954810418188572, "learning_rate": 0.001, "loss": 0.399, "step": 4381 }, { "epoch": 0.12090902506406564, "grad_norm": 0.00457935081794858, "learning_rate": 0.001, "loss": 0.4323, "step": 4382 }, { "epoch": 0.12093661726513001, "grad_norm": 0.004335826262831688, "learning_rate": 0.001, "loss": 0.3982, "step": 4383 }, { "epoch": 0.12096420946619438, "grad_norm": 0.0032937126234173775, "learning_rate": 0.001, "loss": 0.4219, "step": 4384 }, { "epoch": 0.12099180166725874, "grad_norm": 0.0024808261077851057, "learning_rate": 0.001, "loss": 0.4096, "step": 4385 }, { "epoch": 0.12101939386832312, "grad_norm": 0.002847994677722454, "learning_rate": 0.001, "loss": 0.423, "step": 4386 }, { "epoch": 0.12104698606938749, "grad_norm": 0.002378343604505062, "learning_rate": 0.001, "loss": 0.4155, "step": 4387 }, { "epoch": 0.12107457827045186, "grad_norm": 0.002491983585059643, "learning_rate": 0.001, "loss": 0.4069, "step": 4388 }, { "epoch": 0.12110217047151622, "grad_norm": 0.002309272298589349, "learning_rate": 0.001, "loss": 0.4009, "step": 4389 }, { "epoch": 0.12112976267258059, "grad_norm": 0.0028091350104659796, "learning_rate": 0.001, "loss": 0.3731, "step": 4390 }, { "epoch": 0.12115735487364497, "grad_norm": 0.00367850624024868, "learning_rate": 0.001, "loss": 0.4191, "step": 4391 }, { "epoch": 0.12118494707470934, "grad_norm": 0.003562155645340681, "learning_rate": 0.001, "loss": 0.4017, "step": 4392 }, { "epoch": 0.1212125392757737, "grad_norm": 0.0025711546186357737, "learning_rate": 0.001, "loss": 0.3541, "step": 4393 }, { "epoch": 0.12124013147683807, "grad_norm": 0.003351216670125723, "learning_rate": 0.001, "loss": 0.3932, "step": 4394 }, { "epoch": 0.12126772367790244, "grad_norm": 0.002373376628383994, "learning_rate": 0.001, "loss": 0.4049, "step": 4395 }, { "epoch": 0.12129531587896682, "grad_norm": 0.0035600061528384686, "learning_rate": 0.001, "loss": 0.4044, "step": 4396 }, { "epoch": 0.12132290808003118, "grad_norm": 0.0033355422783643007, "learning_rate": 0.001, "loss": 0.3438, "step": 4397 }, { "epoch": 0.12135050028109555, "grad_norm": 0.004041003528982401, "learning_rate": 0.001, "loss": 0.3776, "step": 4398 }, { "epoch": 0.12137809248215992, "grad_norm": 0.002868924057111144, "learning_rate": 0.001, "loss": 0.3742, "step": 4399 }, { "epoch": 0.12140568468322428, "grad_norm": 0.003549615852534771, "learning_rate": 0.001, "loss": 0.3907, "step": 4400 }, { "epoch": 0.12143327688428865, "grad_norm": 0.0034831890370696783, "learning_rate": 0.001, "loss": 0.4147, "step": 4401 }, { "epoch": 0.12146086908535303, "grad_norm": 0.0029312497936189175, "learning_rate": 0.001, "loss": 0.4079, "step": 4402 }, { "epoch": 0.1214884612864174, "grad_norm": 0.0037916668225079775, "learning_rate": 0.001, "loss": 0.4078, "step": 4403 }, { "epoch": 0.12151605348748176, "grad_norm": 0.0031889586243778467, "learning_rate": 0.001, "loss": 0.4114, "step": 4404 }, { "epoch": 0.12154364568854613, "grad_norm": 0.003701084526255727, "learning_rate": 0.001, "loss": 0.363, "step": 4405 }, { "epoch": 0.1215712378896105, "grad_norm": 0.00297721428796649, "learning_rate": 0.001, "loss": 0.3948, "step": 4406 }, { "epoch": 0.12159883009067488, "grad_norm": 0.0032696991693228483, "learning_rate": 0.001, "loss": 0.3994, "step": 4407 }, { "epoch": 0.12162642229173924, "grad_norm": 0.0023237040732055902, "learning_rate": 0.001, "loss": 0.4037, "step": 4408 }, { "epoch": 0.12165401449280361, "grad_norm": 0.0023669737856835127, "learning_rate": 0.001, "loss": 0.396, "step": 4409 }, { "epoch": 0.12168160669386797, "grad_norm": 0.0029125306755304337, "learning_rate": 0.001, "loss": 0.3878, "step": 4410 }, { "epoch": 0.12170919889493234, "grad_norm": 0.003643943928182125, "learning_rate": 0.001, "loss": 0.4312, "step": 4411 }, { "epoch": 0.12173679109599672, "grad_norm": 0.002598782768473029, "learning_rate": 0.001, "loss": 0.4217, "step": 4412 }, { "epoch": 0.12176438329706109, "grad_norm": 0.00362451933324337, "learning_rate": 0.001, "loss": 0.3839, "step": 4413 }, { "epoch": 0.12179197549812545, "grad_norm": 0.0032412779983133078, "learning_rate": 0.001, "loss": 0.371, "step": 4414 }, { "epoch": 0.12181956769918982, "grad_norm": 0.0031498463358730078, "learning_rate": 0.001, "loss": 0.3834, "step": 4415 }, { "epoch": 0.12184715990025419, "grad_norm": 0.0026793426368385553, "learning_rate": 0.001, "loss": 0.4164, "step": 4416 }, { "epoch": 0.12187475210131857, "grad_norm": 0.002742303302511573, "learning_rate": 0.001, "loss": 0.3773, "step": 4417 }, { "epoch": 0.12190234430238293, "grad_norm": 0.013603371568024158, "learning_rate": 0.001, "loss": 0.3664, "step": 4418 }, { "epoch": 0.1219299365034473, "grad_norm": 0.002509272890165448, "learning_rate": 0.001, "loss": 0.3634, "step": 4419 }, { "epoch": 0.12195752870451167, "grad_norm": 0.0029235216788947582, "learning_rate": 0.001, "loss": 0.3969, "step": 4420 }, { "epoch": 0.12198512090557603, "grad_norm": 0.003002134384587407, "learning_rate": 0.001, "loss": 0.4022, "step": 4421 }, { "epoch": 0.12201271310664041, "grad_norm": 0.0032670635264366865, "learning_rate": 0.001, "loss": 0.4005, "step": 4422 }, { "epoch": 0.12204030530770478, "grad_norm": 0.0023868351709097624, "learning_rate": 0.001, "loss": 0.3716, "step": 4423 }, { "epoch": 0.12206789750876915, "grad_norm": 0.004251338541507721, "learning_rate": 0.001, "loss": 0.3969, "step": 4424 }, { "epoch": 0.12209548970983351, "grad_norm": 0.011102253571152687, "learning_rate": 0.001, "loss": 0.3887, "step": 4425 }, { "epoch": 0.12212308191089788, "grad_norm": 0.0027349465526640415, "learning_rate": 0.001, "loss": 0.3824, "step": 4426 }, { "epoch": 0.12215067411196226, "grad_norm": 0.0032276634592562914, "learning_rate": 0.001, "loss": 0.4151, "step": 4427 }, { "epoch": 0.12217826631302663, "grad_norm": 0.0035047761630266905, "learning_rate": 0.001, "loss": 0.3754, "step": 4428 }, { "epoch": 0.12220585851409099, "grad_norm": 0.0022022626362740993, "learning_rate": 0.001, "loss": 0.4513, "step": 4429 }, { "epoch": 0.12223345071515536, "grad_norm": 0.0028917898889631033, "learning_rate": 0.001, "loss": 0.4174, "step": 4430 }, { "epoch": 0.12226104291621973, "grad_norm": 0.0040835002437233925, "learning_rate": 0.001, "loss": 0.3589, "step": 4431 }, { "epoch": 0.1222886351172841, "grad_norm": 0.0042761219665408134, "learning_rate": 0.001, "loss": 0.4104, "step": 4432 }, { "epoch": 0.12231622731834847, "grad_norm": 0.0031644178088754416, "learning_rate": 0.001, "loss": 0.4044, "step": 4433 }, { "epoch": 0.12234381951941284, "grad_norm": 0.005158254411071539, "learning_rate": 0.001, "loss": 0.387, "step": 4434 }, { "epoch": 0.1223714117204772, "grad_norm": 0.00329927378334105, "learning_rate": 0.001, "loss": 0.3385, "step": 4435 }, { "epoch": 0.12239900392154157, "grad_norm": 0.0028820266015827656, "learning_rate": 0.001, "loss": 0.3833, "step": 4436 }, { "epoch": 0.12242659612260595, "grad_norm": 0.0035580925177782774, "learning_rate": 0.001, "loss": 0.4017, "step": 4437 }, { "epoch": 0.12245418832367032, "grad_norm": 0.0028019326273351908, "learning_rate": 0.001, "loss": 0.3772, "step": 4438 }, { "epoch": 0.12248178052473468, "grad_norm": 0.002797899767756462, "learning_rate": 0.001, "loss": 0.4142, "step": 4439 }, { "epoch": 0.12250937272579905, "grad_norm": 0.002947175409644842, "learning_rate": 0.001, "loss": 0.3884, "step": 4440 }, { "epoch": 0.12253696492686342, "grad_norm": 0.003999659325927496, "learning_rate": 0.001, "loss": 0.397, "step": 4441 }, { "epoch": 0.1225645571279278, "grad_norm": 0.0022317490074783564, "learning_rate": 0.001, "loss": 0.4191, "step": 4442 }, { "epoch": 0.12259214932899216, "grad_norm": 0.01325872354209423, "learning_rate": 0.001, "loss": 0.397, "step": 4443 }, { "epoch": 0.12261974153005653, "grad_norm": 0.0043718283995985985, "learning_rate": 0.001, "loss": 0.4027, "step": 4444 }, { "epoch": 0.1226473337311209, "grad_norm": 0.003364040283486247, "learning_rate": 0.001, "loss": 0.423, "step": 4445 }, { "epoch": 0.12267492593218526, "grad_norm": 0.0032444903627038, "learning_rate": 0.001, "loss": 0.3935, "step": 4446 }, { "epoch": 0.12270251813324963, "grad_norm": 0.004509296268224716, "learning_rate": 0.001, "loss": 0.4078, "step": 4447 }, { "epoch": 0.12273011033431401, "grad_norm": 0.004130146466195583, "learning_rate": 0.001, "loss": 0.3735, "step": 4448 }, { "epoch": 0.12275770253537838, "grad_norm": 0.003644294338300824, "learning_rate": 0.001, "loss": 0.4376, "step": 4449 }, { "epoch": 0.12278529473644274, "grad_norm": 0.003993290476500988, "learning_rate": 0.001, "loss": 0.3884, "step": 4450 }, { "epoch": 0.12281288693750711, "grad_norm": 0.0026422853115946054, "learning_rate": 0.001, "loss": 0.4278, "step": 4451 }, { "epoch": 0.12284047913857148, "grad_norm": 0.0023565879091620445, "learning_rate": 0.001, "loss": 0.4267, "step": 4452 }, { "epoch": 0.12286807133963586, "grad_norm": 0.002863645553588867, "learning_rate": 0.001, "loss": 0.3796, "step": 4453 }, { "epoch": 0.12289566354070022, "grad_norm": 0.0022721088025718927, "learning_rate": 0.001, "loss": 0.4075, "step": 4454 }, { "epoch": 0.12292325574176459, "grad_norm": 0.0033876546658575535, "learning_rate": 0.001, "loss": 0.3802, "step": 4455 }, { "epoch": 0.12295084794282896, "grad_norm": 0.0030466553289443254, "learning_rate": 0.001, "loss": 0.4106, "step": 4456 }, { "epoch": 0.12297844014389332, "grad_norm": 0.005601429846137762, "learning_rate": 0.001, "loss": 0.4128, "step": 4457 }, { "epoch": 0.1230060323449577, "grad_norm": 0.0028402013704180717, "learning_rate": 0.001, "loss": 0.4016, "step": 4458 }, { "epoch": 0.12303362454602207, "grad_norm": 0.004646173678338528, "learning_rate": 0.001, "loss": 0.3447, "step": 4459 }, { "epoch": 0.12306121674708644, "grad_norm": 0.002111830050125718, "learning_rate": 0.001, "loss": 0.4089, "step": 4460 }, { "epoch": 0.1230888089481508, "grad_norm": 0.0025454754941165447, "learning_rate": 0.001, "loss": 0.3967, "step": 4461 }, { "epoch": 0.12311640114921517, "grad_norm": 0.002491764025762677, "learning_rate": 0.001, "loss": 0.3989, "step": 4462 }, { "epoch": 0.12314399335027955, "grad_norm": 0.005627894774079323, "learning_rate": 0.001, "loss": 0.389, "step": 4463 }, { "epoch": 0.12317158555134392, "grad_norm": 0.004091967828571796, "learning_rate": 0.001, "loss": 0.4273, "step": 4464 }, { "epoch": 0.12319917775240828, "grad_norm": 0.0029696535784751177, "learning_rate": 0.001, "loss": 0.3962, "step": 4465 }, { "epoch": 0.12322676995347265, "grad_norm": 0.004290423821657896, "learning_rate": 0.001, "loss": 0.3906, "step": 4466 }, { "epoch": 0.12325436215453701, "grad_norm": 0.002825783099979162, "learning_rate": 0.001, "loss": 0.3961, "step": 4467 }, { "epoch": 0.1232819543556014, "grad_norm": 0.002694531576707959, "learning_rate": 0.001, "loss": 0.3918, "step": 4468 }, { "epoch": 0.12330954655666576, "grad_norm": 0.002853821264579892, "learning_rate": 0.001, "loss": 0.3638, "step": 4469 }, { "epoch": 0.12333713875773013, "grad_norm": 0.0025199069641530514, "learning_rate": 0.001, "loss": 0.384, "step": 4470 }, { "epoch": 0.1233647309587945, "grad_norm": 0.003955396823585033, "learning_rate": 0.001, "loss": 0.3755, "step": 4471 }, { "epoch": 0.12339232315985886, "grad_norm": 0.0027513199020177126, "learning_rate": 0.001, "loss": 0.3981, "step": 4472 }, { "epoch": 0.12341991536092324, "grad_norm": 0.004522261209785938, "learning_rate": 0.001, "loss": 0.4623, "step": 4473 }, { "epoch": 0.12344750756198761, "grad_norm": 0.003415697254240513, "learning_rate": 0.001, "loss": 0.3978, "step": 4474 }, { "epoch": 0.12347509976305197, "grad_norm": 0.0028595994226634502, "learning_rate": 0.001, "loss": 0.3896, "step": 4475 }, { "epoch": 0.12350269196411634, "grad_norm": 0.0041776299476623535, "learning_rate": 0.001, "loss": 0.4008, "step": 4476 }, { "epoch": 0.1235302841651807, "grad_norm": 0.002562503796070814, "learning_rate": 0.001, "loss": 0.3921, "step": 4477 }, { "epoch": 0.12355787636624509, "grad_norm": 0.0029480233788490295, "learning_rate": 0.001, "loss": 0.402, "step": 4478 }, { "epoch": 0.12358546856730945, "grad_norm": 0.003219869453459978, "learning_rate": 0.001, "loss": 0.4032, "step": 4479 }, { "epoch": 0.12361306076837382, "grad_norm": 0.002801501424983144, "learning_rate": 0.001, "loss": 0.3823, "step": 4480 }, { "epoch": 0.12364065296943819, "grad_norm": 0.004376853816211224, "learning_rate": 0.001, "loss": 0.3591, "step": 4481 }, { "epoch": 0.12366824517050255, "grad_norm": 0.0027625143993645906, "learning_rate": 0.001, "loss": 0.3813, "step": 4482 }, { "epoch": 0.12369583737156693, "grad_norm": 0.0024436269886791706, "learning_rate": 0.001, "loss": 0.4358, "step": 4483 }, { "epoch": 0.1237234295726313, "grad_norm": 0.0035639768466353416, "learning_rate": 0.001, "loss": 0.3796, "step": 4484 }, { "epoch": 0.12375102177369567, "grad_norm": 0.0029148710891604424, "learning_rate": 0.001, "loss": 0.3849, "step": 4485 }, { "epoch": 0.12377861397476003, "grad_norm": 0.0025241354014724493, "learning_rate": 0.001, "loss": 0.3841, "step": 4486 }, { "epoch": 0.1238062061758244, "grad_norm": 0.003036417765542865, "learning_rate": 0.001, "loss": 0.398, "step": 4487 }, { "epoch": 0.12383379837688878, "grad_norm": 0.002688635839149356, "learning_rate": 0.001, "loss": 0.3572, "step": 4488 }, { "epoch": 0.12386139057795315, "grad_norm": 0.0027762525714933872, "learning_rate": 0.001, "loss": 0.4021, "step": 4489 }, { "epoch": 0.12388898277901751, "grad_norm": 0.0030563059262931347, "learning_rate": 0.001, "loss": 0.3528, "step": 4490 }, { "epoch": 0.12391657498008188, "grad_norm": 0.006547185592353344, "learning_rate": 0.001, "loss": 0.3975, "step": 4491 }, { "epoch": 0.12394416718114624, "grad_norm": 0.0036082088481634855, "learning_rate": 0.001, "loss": 0.3909, "step": 4492 }, { "epoch": 0.12397175938221063, "grad_norm": 0.007952166721224785, "learning_rate": 0.001, "loss": 0.3944, "step": 4493 }, { "epoch": 0.12399935158327499, "grad_norm": 0.0032326721120625734, "learning_rate": 0.001, "loss": 0.3994, "step": 4494 }, { "epoch": 0.12402694378433936, "grad_norm": 0.003287110012024641, "learning_rate": 0.001, "loss": 0.3341, "step": 4495 }, { "epoch": 0.12405453598540372, "grad_norm": 0.0022940190974622965, "learning_rate": 0.001, "loss": 0.3939, "step": 4496 }, { "epoch": 0.12408212818646809, "grad_norm": 0.007416036911308765, "learning_rate": 0.001, "loss": 0.404, "step": 4497 }, { "epoch": 0.12410972038753246, "grad_norm": 0.002889924682676792, "learning_rate": 0.001, "loss": 0.3884, "step": 4498 }, { "epoch": 0.12413731258859684, "grad_norm": 0.002148964209482074, "learning_rate": 0.001, "loss": 0.3988, "step": 4499 }, { "epoch": 0.1241649047896612, "grad_norm": 0.004982593934983015, "learning_rate": 0.001, "loss": 0.4168, "step": 4500 }, { "epoch": 0.1241649047896612, "eval_runtime": 24.5339, "eval_samples_per_second": 1.304, "eval_steps_per_second": 0.163, "step": 4500 }, { "epoch": 0.12419249699072557, "grad_norm": 0.004382569808512926, "learning_rate": 0.001, "loss": 0.4112, "step": 4501 }, { "epoch": 0.12422008919178994, "grad_norm": 0.003418430220335722, "learning_rate": 0.001, "loss": 0.4415, "step": 4502 }, { "epoch": 0.1242476813928543, "grad_norm": 0.003198147751390934, "learning_rate": 0.001, "loss": 0.3938, "step": 4503 }, { "epoch": 0.12427527359391868, "grad_norm": 0.00246452703140676, "learning_rate": 0.001, "loss": 0.3847, "step": 4504 }, { "epoch": 0.12430286579498305, "grad_norm": 0.0038448646664619446, "learning_rate": 0.001, "loss": 0.3798, "step": 4505 }, { "epoch": 0.12433045799604742, "grad_norm": 0.0039044911973178387, "learning_rate": 0.001, "loss": 0.3727, "step": 4506 }, { "epoch": 0.12435805019711178, "grad_norm": 0.003824865445494652, "learning_rate": 0.001, "loss": 0.3824, "step": 4507 }, { "epoch": 0.12438564239817615, "grad_norm": 0.003275007475167513, "learning_rate": 0.001, "loss": 0.4156, "step": 4508 }, { "epoch": 0.12441323459924053, "grad_norm": 0.00457676500082016, "learning_rate": 0.001, "loss": 0.3967, "step": 4509 }, { "epoch": 0.1244408268003049, "grad_norm": 0.004399159457534552, "learning_rate": 0.001, "loss": 0.4016, "step": 4510 }, { "epoch": 0.12446841900136926, "grad_norm": 0.0027072380762547255, "learning_rate": 0.001, "loss": 0.4299, "step": 4511 }, { "epoch": 0.12449601120243363, "grad_norm": 0.0025107194669544697, "learning_rate": 0.001, "loss": 0.3946, "step": 4512 }, { "epoch": 0.124523603403498, "grad_norm": 0.004047545604407787, "learning_rate": 0.001, "loss": 0.3901, "step": 4513 }, { "epoch": 0.12455119560456238, "grad_norm": 0.002424485282972455, "learning_rate": 0.001, "loss": 0.4101, "step": 4514 }, { "epoch": 0.12457878780562674, "grad_norm": 0.00391024025157094, "learning_rate": 0.001, "loss": 0.3942, "step": 4515 }, { "epoch": 0.12460638000669111, "grad_norm": 0.0026012531016021967, "learning_rate": 0.001, "loss": 0.4127, "step": 4516 }, { "epoch": 0.12463397220775548, "grad_norm": 0.0023119868710637093, "learning_rate": 0.001, "loss": 0.4309, "step": 4517 }, { "epoch": 0.12466156440881984, "grad_norm": 0.0023387623950839043, "learning_rate": 0.001, "loss": 0.4148, "step": 4518 }, { "epoch": 0.12468915660988422, "grad_norm": 0.0031953216530382633, "learning_rate": 0.001, "loss": 0.394, "step": 4519 }, { "epoch": 0.12471674881094859, "grad_norm": 0.002653286559507251, "learning_rate": 0.001, "loss": 0.3617, "step": 4520 }, { "epoch": 0.12474434101201295, "grad_norm": 0.006683747284114361, "learning_rate": 0.001, "loss": 0.3744, "step": 4521 }, { "epoch": 0.12477193321307732, "grad_norm": 0.002483023563399911, "learning_rate": 0.001, "loss": 0.3819, "step": 4522 }, { "epoch": 0.12479952541414169, "grad_norm": 0.004147149156779051, "learning_rate": 0.001, "loss": 0.4002, "step": 4523 }, { "epoch": 0.12482711761520607, "grad_norm": 0.0026530877221375704, "learning_rate": 0.001, "loss": 0.3871, "step": 4524 }, { "epoch": 0.12485470981627043, "grad_norm": 0.003452074248343706, "learning_rate": 0.001, "loss": 0.3931, "step": 4525 }, { "epoch": 0.1248823020173348, "grad_norm": 0.0028085915837436914, "learning_rate": 0.001, "loss": 0.3797, "step": 4526 }, { "epoch": 0.12490989421839917, "grad_norm": 0.0034218342043459415, "learning_rate": 0.001, "loss": 0.4248, "step": 4527 }, { "epoch": 0.12493748641946353, "grad_norm": 0.007770628668367863, "learning_rate": 0.001, "loss": 0.3624, "step": 4528 }, { "epoch": 0.12496507862052791, "grad_norm": 0.0029227614868432283, "learning_rate": 0.001, "loss": 0.4236, "step": 4529 }, { "epoch": 0.12499267082159228, "grad_norm": 0.0030319190118461847, "learning_rate": 0.001, "loss": 0.3845, "step": 4530 }, { "epoch": 0.12502026302265665, "grad_norm": 0.002946640131995082, "learning_rate": 0.001, "loss": 0.3868, "step": 4531 }, { "epoch": 0.12504785522372103, "grad_norm": 0.003349126083776355, "learning_rate": 0.001, "loss": 0.4028, "step": 4532 }, { "epoch": 0.12507544742478538, "grad_norm": 0.0028201621025800705, "learning_rate": 0.001, "loss": 0.4228, "step": 4533 }, { "epoch": 0.12510303962584976, "grad_norm": 0.002343755681067705, "learning_rate": 0.001, "loss": 0.4049, "step": 4534 }, { "epoch": 0.1251306318269141, "grad_norm": 0.004897142760455608, "learning_rate": 0.001, "loss": 0.4179, "step": 4535 }, { "epoch": 0.1251582240279785, "grad_norm": 0.003420259803533554, "learning_rate": 0.001, "loss": 0.3931, "step": 4536 }, { "epoch": 0.12518581622904287, "grad_norm": 0.005415117833763361, "learning_rate": 0.001, "loss": 0.3938, "step": 4537 }, { "epoch": 0.12521340843010723, "grad_norm": 0.009312170557677746, "learning_rate": 0.001, "loss": 0.4319, "step": 4538 }, { "epoch": 0.1252410006311716, "grad_norm": 0.0031622762326151133, "learning_rate": 0.001, "loss": 0.404, "step": 4539 }, { "epoch": 0.12526859283223596, "grad_norm": 0.0035899661015719175, "learning_rate": 0.001, "loss": 0.4331, "step": 4540 }, { "epoch": 0.12529618503330034, "grad_norm": 0.005790769122540951, "learning_rate": 0.001, "loss": 0.3559, "step": 4541 }, { "epoch": 0.12532377723436472, "grad_norm": 0.003316226415336132, "learning_rate": 0.001, "loss": 0.3989, "step": 4542 }, { "epoch": 0.12535136943542907, "grad_norm": 0.003180850762873888, "learning_rate": 0.001, "loss": 0.3986, "step": 4543 }, { "epoch": 0.12537896163649345, "grad_norm": 0.0034962312784045935, "learning_rate": 0.001, "loss": 0.4058, "step": 4544 }, { "epoch": 0.1254065538375578, "grad_norm": 0.0036736545152962208, "learning_rate": 0.001, "loss": 0.4017, "step": 4545 }, { "epoch": 0.12543414603862219, "grad_norm": 0.0032101564574986696, "learning_rate": 0.001, "loss": 0.4218, "step": 4546 }, { "epoch": 0.12546173823968657, "grad_norm": 0.0029373078141361475, "learning_rate": 0.001, "loss": 0.4029, "step": 4547 }, { "epoch": 0.12548933044075092, "grad_norm": 0.0039803143590688705, "learning_rate": 0.001, "loss": 0.4196, "step": 4548 }, { "epoch": 0.1255169226418153, "grad_norm": 0.006120176054537296, "learning_rate": 0.001, "loss": 0.3971, "step": 4549 }, { "epoch": 0.12554451484287965, "grad_norm": 0.004431293345987797, "learning_rate": 0.001, "loss": 0.4117, "step": 4550 }, { "epoch": 0.12557210704394403, "grad_norm": 0.0034864030312746763, "learning_rate": 0.001, "loss": 0.4042, "step": 4551 }, { "epoch": 0.1255996992450084, "grad_norm": 0.003251910675317049, "learning_rate": 0.001, "loss": 0.3791, "step": 4552 }, { "epoch": 0.12562729144607276, "grad_norm": 0.0032173239160329103, "learning_rate": 0.001, "loss": 0.358, "step": 4553 }, { "epoch": 0.12565488364713714, "grad_norm": 0.002825138159096241, "learning_rate": 0.001, "loss": 0.3771, "step": 4554 }, { "epoch": 0.1256824758482015, "grad_norm": 0.0024809478782117367, "learning_rate": 0.001, "loss": 0.4102, "step": 4555 }, { "epoch": 0.12571006804926588, "grad_norm": 0.0038729074876755476, "learning_rate": 0.001, "loss": 0.3765, "step": 4556 }, { "epoch": 0.12573766025033026, "grad_norm": 0.0027885730378329754, "learning_rate": 0.001, "loss": 0.4325, "step": 4557 }, { "epoch": 0.1257652524513946, "grad_norm": 0.002833339385688305, "learning_rate": 0.001, "loss": 0.4231, "step": 4558 }, { "epoch": 0.125792844652459, "grad_norm": 0.00270766019821167, "learning_rate": 0.001, "loss": 0.3694, "step": 4559 }, { "epoch": 0.12582043685352334, "grad_norm": 0.0024998581502586603, "learning_rate": 0.001, "loss": 0.3947, "step": 4560 }, { "epoch": 0.12584802905458772, "grad_norm": 0.010215898975729942, "learning_rate": 0.001, "loss": 0.359, "step": 4561 }, { "epoch": 0.1258756212556521, "grad_norm": 0.015013232827186584, "learning_rate": 0.001, "loss": 0.3853, "step": 4562 }, { "epoch": 0.12590321345671646, "grad_norm": 0.010075882077217102, "learning_rate": 0.001, "loss": 0.4007, "step": 4563 }, { "epoch": 0.12593080565778084, "grad_norm": 0.02185934968292713, "learning_rate": 0.001, "loss": 0.3701, "step": 4564 }, { "epoch": 0.1259583978588452, "grad_norm": 0.003104067873209715, "learning_rate": 0.001, "loss": 0.4061, "step": 4565 }, { "epoch": 0.12598599005990957, "grad_norm": 0.0032787187956273556, "learning_rate": 0.001, "loss": 0.397, "step": 4566 }, { "epoch": 0.12601358226097392, "grad_norm": 0.002555015729740262, "learning_rate": 0.001, "loss": 0.4352, "step": 4567 }, { "epoch": 0.1260411744620383, "grad_norm": 0.0023251352831721306, "learning_rate": 0.001, "loss": 0.4093, "step": 4568 }, { "epoch": 0.12606876666310268, "grad_norm": 0.0028923735953867435, "learning_rate": 0.001, "loss": 0.4166, "step": 4569 }, { "epoch": 0.12609635886416704, "grad_norm": 0.003060357179492712, "learning_rate": 0.001, "loss": 0.388, "step": 4570 }, { "epoch": 0.12612395106523142, "grad_norm": 0.0036045622546225786, "learning_rate": 0.001, "loss": 0.3544, "step": 4571 }, { "epoch": 0.12615154326629577, "grad_norm": 0.002739776624366641, "learning_rate": 0.001, "loss": 0.4174, "step": 4572 }, { "epoch": 0.12617913546736015, "grad_norm": 0.0037477100268006325, "learning_rate": 0.001, "loss": 0.4281, "step": 4573 }, { "epoch": 0.12620672766842453, "grad_norm": 0.002796964254230261, "learning_rate": 0.001, "loss": 0.3802, "step": 4574 }, { "epoch": 0.12623431986948888, "grad_norm": 0.0034001306630671024, "learning_rate": 0.001, "loss": 0.4039, "step": 4575 }, { "epoch": 0.12626191207055326, "grad_norm": 0.004743472672998905, "learning_rate": 0.001, "loss": 0.4293, "step": 4576 }, { "epoch": 0.12628950427161761, "grad_norm": 0.0023272617254406214, "learning_rate": 0.001, "loss": 0.3782, "step": 4577 }, { "epoch": 0.126317096472682, "grad_norm": 0.0036957256961613894, "learning_rate": 0.001, "loss": 0.3981, "step": 4578 }, { "epoch": 0.12634468867374637, "grad_norm": 0.002441568998619914, "learning_rate": 0.001, "loss": 0.4053, "step": 4579 }, { "epoch": 0.12637228087481073, "grad_norm": 0.003656483720988035, "learning_rate": 0.001, "loss": 0.3826, "step": 4580 }, { "epoch": 0.1263998730758751, "grad_norm": 0.005753074306994677, "learning_rate": 0.001, "loss": 0.3647, "step": 4581 }, { "epoch": 0.12642746527693946, "grad_norm": 0.005195307079702616, "learning_rate": 0.001, "loss": 0.4113, "step": 4582 }, { "epoch": 0.12645505747800384, "grad_norm": 0.003572377609089017, "learning_rate": 0.001, "loss": 0.4046, "step": 4583 }, { "epoch": 0.12648264967906822, "grad_norm": 0.006414738483726978, "learning_rate": 0.001, "loss": 0.3894, "step": 4584 }, { "epoch": 0.12651024188013257, "grad_norm": 0.003181818872690201, "learning_rate": 0.001, "loss": 0.376, "step": 4585 }, { "epoch": 0.12653783408119695, "grad_norm": 0.0062148310244083405, "learning_rate": 0.001, "loss": 0.3956, "step": 4586 }, { "epoch": 0.1265654262822613, "grad_norm": 0.00339969783090055, "learning_rate": 0.001, "loss": 0.4389, "step": 4587 }, { "epoch": 0.1265930184833257, "grad_norm": 0.003279429627582431, "learning_rate": 0.001, "loss": 0.415, "step": 4588 }, { "epoch": 0.12662061068439007, "grad_norm": 0.003928397316485643, "learning_rate": 0.001, "loss": 0.4015, "step": 4589 }, { "epoch": 0.12664820288545442, "grad_norm": 0.0071240440011024475, "learning_rate": 0.001, "loss": 0.371, "step": 4590 }, { "epoch": 0.1266757950865188, "grad_norm": 0.0035491851158440113, "learning_rate": 0.001, "loss": 0.3887, "step": 4591 }, { "epoch": 0.12670338728758315, "grad_norm": 0.003695262363180518, "learning_rate": 0.001, "loss": 0.3705, "step": 4592 }, { "epoch": 0.12673097948864753, "grad_norm": 0.002730879234150052, "learning_rate": 0.001, "loss": 0.4148, "step": 4593 }, { "epoch": 0.1267585716897119, "grad_norm": 0.0026994007639586926, "learning_rate": 0.001, "loss": 0.4262, "step": 4594 }, { "epoch": 0.12678616389077627, "grad_norm": 0.0025865414645522833, "learning_rate": 0.001, "loss": 0.4126, "step": 4595 }, { "epoch": 0.12681375609184065, "grad_norm": 0.003949947189539671, "learning_rate": 0.001, "loss": 0.3755, "step": 4596 }, { "epoch": 0.126841348292905, "grad_norm": 0.0021580797620117664, "learning_rate": 0.001, "loss": 0.3898, "step": 4597 }, { "epoch": 0.12686894049396938, "grad_norm": 0.003605265635997057, "learning_rate": 0.001, "loss": 0.3938, "step": 4598 }, { "epoch": 0.12689653269503376, "grad_norm": 0.0027530419174581766, "learning_rate": 0.001, "loss": 0.4002, "step": 4599 }, { "epoch": 0.1269241248960981, "grad_norm": 0.002252806443721056, "learning_rate": 0.001, "loss": 0.4241, "step": 4600 }, { "epoch": 0.1269517170971625, "grad_norm": 0.0026294796261936426, "learning_rate": 0.001, "loss": 0.4238, "step": 4601 }, { "epoch": 0.12697930929822684, "grad_norm": 0.00271353917196393, "learning_rate": 0.001, "loss": 0.412, "step": 4602 }, { "epoch": 0.12700690149929122, "grad_norm": 0.004706821870058775, "learning_rate": 0.001, "loss": 0.3532, "step": 4603 }, { "epoch": 0.1270344937003556, "grad_norm": 0.0026412513107061386, "learning_rate": 0.001, "loss": 0.4048, "step": 4604 }, { "epoch": 0.12706208590141996, "grad_norm": 0.00251567829400301, "learning_rate": 0.001, "loss": 0.3959, "step": 4605 }, { "epoch": 0.12708967810248434, "grad_norm": 0.003459150902926922, "learning_rate": 0.001, "loss": 0.4001, "step": 4606 }, { "epoch": 0.1271172703035487, "grad_norm": 0.002293146215379238, "learning_rate": 0.001, "loss": 0.3427, "step": 4607 }, { "epoch": 0.12714486250461307, "grad_norm": 0.0029938959050923586, "learning_rate": 0.001, "loss": 0.3961, "step": 4608 }, { "epoch": 0.12717245470567745, "grad_norm": 0.0036902104038745165, "learning_rate": 0.001, "loss": 0.3973, "step": 4609 }, { "epoch": 0.1272000469067418, "grad_norm": 0.004806086421012878, "learning_rate": 0.001, "loss": 0.4121, "step": 4610 }, { "epoch": 0.12722763910780618, "grad_norm": 0.002169287297874689, "learning_rate": 0.001, "loss": 0.4396, "step": 4611 }, { "epoch": 0.12725523130887054, "grad_norm": 0.0030692543368786573, "learning_rate": 0.001, "loss": 0.3516, "step": 4612 }, { "epoch": 0.12728282350993492, "grad_norm": 0.004069609101861715, "learning_rate": 0.001, "loss": 0.4112, "step": 4613 }, { "epoch": 0.1273104157109993, "grad_norm": 0.0051890406757593155, "learning_rate": 0.001, "loss": 0.3719, "step": 4614 }, { "epoch": 0.12733800791206365, "grad_norm": 0.002785927150398493, "learning_rate": 0.001, "loss": 0.3835, "step": 4615 }, { "epoch": 0.12736560011312803, "grad_norm": 0.0058522410690784454, "learning_rate": 0.001, "loss": 0.417, "step": 4616 }, { "epoch": 0.12739319231419238, "grad_norm": 0.0026462904643267393, "learning_rate": 0.001, "loss": 0.4455, "step": 4617 }, { "epoch": 0.12742078451525676, "grad_norm": 0.012372707948088646, "learning_rate": 0.001, "loss": 0.3911, "step": 4618 }, { "epoch": 0.12744837671632114, "grad_norm": 0.003128821961581707, "learning_rate": 0.001, "loss": 0.4198, "step": 4619 }, { "epoch": 0.1274759689173855, "grad_norm": 0.004159901756793261, "learning_rate": 0.001, "loss": 0.4282, "step": 4620 }, { "epoch": 0.12750356111844988, "grad_norm": 0.00412391172721982, "learning_rate": 0.001, "loss": 0.3741, "step": 4621 }, { "epoch": 0.12753115331951423, "grad_norm": 0.002524176612496376, "learning_rate": 0.001, "loss": 0.4158, "step": 4622 }, { "epoch": 0.1275587455205786, "grad_norm": 0.004930204711854458, "learning_rate": 0.001, "loss": 0.3922, "step": 4623 }, { "epoch": 0.127586337721643, "grad_norm": 0.003738861531019211, "learning_rate": 0.001, "loss": 0.3992, "step": 4624 }, { "epoch": 0.12761392992270734, "grad_norm": 0.0171899925917387, "learning_rate": 0.001, "loss": 0.391, "step": 4625 }, { "epoch": 0.12764152212377172, "grad_norm": 0.018499117344617844, "learning_rate": 0.001, "loss": 0.3724, "step": 4626 }, { "epoch": 0.12766911432483607, "grad_norm": 0.0035764595959335566, "learning_rate": 0.001, "loss": 0.3737, "step": 4627 }, { "epoch": 0.12769670652590046, "grad_norm": 0.00345508917234838, "learning_rate": 0.001, "loss": 0.3695, "step": 4628 }, { "epoch": 0.12772429872696484, "grad_norm": 0.0037645609118044376, "learning_rate": 0.001, "loss": 0.3914, "step": 4629 }, { "epoch": 0.1277518909280292, "grad_norm": 0.0043193078599870205, "learning_rate": 0.001, "loss": 0.3713, "step": 4630 }, { "epoch": 0.12777948312909357, "grad_norm": 0.0038787908852100372, "learning_rate": 0.001, "loss": 0.4257, "step": 4631 }, { "epoch": 0.12780707533015792, "grad_norm": 0.007828318513929844, "learning_rate": 0.001, "loss": 0.4126, "step": 4632 }, { "epoch": 0.1278346675312223, "grad_norm": 0.0029059904627501965, "learning_rate": 0.001, "loss": 0.4446, "step": 4633 }, { "epoch": 0.12786225973228668, "grad_norm": 0.005105176474899054, "learning_rate": 0.001, "loss": 0.3566, "step": 4634 }, { "epoch": 0.12788985193335103, "grad_norm": 0.005164528265595436, "learning_rate": 0.001, "loss": 0.4067, "step": 4635 }, { "epoch": 0.12791744413441541, "grad_norm": 0.003127202857285738, "learning_rate": 0.001, "loss": 0.3882, "step": 4636 }, { "epoch": 0.12794503633547977, "grad_norm": 0.004962852690368891, "learning_rate": 0.001, "loss": 0.3678, "step": 4637 }, { "epoch": 0.12797262853654415, "grad_norm": 0.003928507212549448, "learning_rate": 0.001, "loss": 0.3994, "step": 4638 }, { "epoch": 0.12800022073760853, "grad_norm": 0.002523329108953476, "learning_rate": 0.001, "loss": 0.4012, "step": 4639 }, { "epoch": 0.12802781293867288, "grad_norm": 0.003745138179510832, "learning_rate": 0.001, "loss": 0.4143, "step": 4640 }, { "epoch": 0.12805540513973726, "grad_norm": 0.007446894887834787, "learning_rate": 0.001, "loss": 0.4105, "step": 4641 }, { "epoch": 0.1280829973408016, "grad_norm": 0.0024344183038920164, "learning_rate": 0.001, "loss": 0.4278, "step": 4642 }, { "epoch": 0.128110589541866, "grad_norm": 0.0059473030269145966, "learning_rate": 0.001, "loss": 0.4239, "step": 4643 }, { "epoch": 0.12813818174293037, "grad_norm": 0.00471229525282979, "learning_rate": 0.001, "loss": 0.3605, "step": 4644 }, { "epoch": 0.12816577394399473, "grad_norm": 0.004949926398694515, "learning_rate": 0.001, "loss": 0.375, "step": 4645 }, { "epoch": 0.1281933661450591, "grad_norm": 0.0033453593496233225, "learning_rate": 0.001, "loss": 0.3764, "step": 4646 }, { "epoch": 0.12822095834612346, "grad_norm": 0.003853026544675231, "learning_rate": 0.001, "loss": 0.4356, "step": 4647 }, { "epoch": 0.12824855054718784, "grad_norm": 0.0037174660246819258, "learning_rate": 0.001, "loss": 0.392, "step": 4648 }, { "epoch": 0.12827614274825222, "grad_norm": 0.002829108154401183, "learning_rate": 0.001, "loss": 0.414, "step": 4649 }, { "epoch": 0.12830373494931657, "grad_norm": 0.002781393239274621, "learning_rate": 0.001, "loss": 0.4145, "step": 4650 }, { "epoch": 0.12833132715038095, "grad_norm": 0.012161584571003914, "learning_rate": 0.001, "loss": 0.4238, "step": 4651 }, { "epoch": 0.1283589193514453, "grad_norm": 0.004447166807949543, "learning_rate": 0.001, "loss": 0.3697, "step": 4652 }, { "epoch": 0.12838651155250969, "grad_norm": 0.010521035641431808, "learning_rate": 0.001, "loss": 0.3953, "step": 4653 }, { "epoch": 0.12841410375357407, "grad_norm": 0.0034564679954200983, "learning_rate": 0.001, "loss": 0.4372, "step": 4654 }, { "epoch": 0.12844169595463842, "grad_norm": 0.003944997675716877, "learning_rate": 0.001, "loss": 0.4111, "step": 4655 }, { "epoch": 0.1284692881557028, "grad_norm": 0.0037498660385608673, "learning_rate": 0.001, "loss": 0.3737, "step": 4656 }, { "epoch": 0.12849688035676715, "grad_norm": 0.0032498843502253294, "learning_rate": 0.001, "loss": 0.4231, "step": 4657 }, { "epoch": 0.12852447255783153, "grad_norm": 0.007645866367965937, "learning_rate": 0.001, "loss": 0.3664, "step": 4658 }, { "epoch": 0.12855206475889588, "grad_norm": 0.006332173477858305, "learning_rate": 0.001, "loss": 0.4242, "step": 4659 }, { "epoch": 0.12857965695996026, "grad_norm": 0.004206281155347824, "learning_rate": 0.001, "loss": 0.4048, "step": 4660 }, { "epoch": 0.12860724916102464, "grad_norm": 0.0026335117872804403, "learning_rate": 0.001, "loss": 0.4098, "step": 4661 }, { "epoch": 0.128634841362089, "grad_norm": 0.004516151268035173, "learning_rate": 0.001, "loss": 0.3905, "step": 4662 }, { "epoch": 0.12866243356315338, "grad_norm": 0.0032997119706124067, "learning_rate": 0.001, "loss": 0.4237, "step": 4663 }, { "epoch": 0.12869002576421773, "grad_norm": 0.0038388820830732584, "learning_rate": 0.001, "loss": 0.4067, "step": 4664 }, { "epoch": 0.1287176179652821, "grad_norm": 0.0035206389147788286, "learning_rate": 0.001, "loss": 0.4026, "step": 4665 }, { "epoch": 0.1287452101663465, "grad_norm": 0.005415271036326885, "learning_rate": 0.001, "loss": 0.4253, "step": 4666 }, { "epoch": 0.12877280236741084, "grad_norm": 0.002756484318524599, "learning_rate": 0.001, "loss": 0.3805, "step": 4667 }, { "epoch": 0.12880039456847522, "grad_norm": 0.0028146097902208567, "learning_rate": 0.001, "loss": 0.4216, "step": 4668 }, { "epoch": 0.12882798676953958, "grad_norm": 0.004214905202388763, "learning_rate": 0.001, "loss": 0.4183, "step": 4669 }, { "epoch": 0.12885557897060396, "grad_norm": 0.00516669312492013, "learning_rate": 0.001, "loss": 0.3785, "step": 4670 }, { "epoch": 0.12888317117166834, "grad_norm": 0.0031740644481033087, "learning_rate": 0.001, "loss": 0.3944, "step": 4671 }, { "epoch": 0.1289107633727327, "grad_norm": 0.004468605387955904, "learning_rate": 0.001, "loss": 0.4262, "step": 4672 }, { "epoch": 0.12893835557379707, "grad_norm": 0.003522511338815093, "learning_rate": 0.001, "loss": 0.4624, "step": 4673 }, { "epoch": 0.12896594777486142, "grad_norm": 0.002765173325315118, "learning_rate": 0.001, "loss": 0.4136, "step": 4674 }, { "epoch": 0.1289935399759258, "grad_norm": 0.0032698458526283503, "learning_rate": 0.001, "loss": 0.3992, "step": 4675 }, { "epoch": 0.12902113217699018, "grad_norm": 0.0034274624194949865, "learning_rate": 0.001, "loss": 0.3779, "step": 4676 }, { "epoch": 0.12904872437805454, "grad_norm": 0.003263387130573392, "learning_rate": 0.001, "loss": 0.4153, "step": 4677 }, { "epoch": 0.12907631657911892, "grad_norm": 0.0030025255400687456, "learning_rate": 0.001, "loss": 0.406, "step": 4678 }, { "epoch": 0.12910390878018327, "grad_norm": 0.003843993414193392, "learning_rate": 0.001, "loss": 0.3746, "step": 4679 }, { "epoch": 0.12913150098124765, "grad_norm": 0.0030434427317231894, "learning_rate": 0.001, "loss": 0.4037, "step": 4680 }, { "epoch": 0.12915909318231203, "grad_norm": 0.0034381363075226545, "learning_rate": 0.001, "loss": 0.3714, "step": 4681 }, { "epoch": 0.12918668538337638, "grad_norm": 0.0032730584498494864, "learning_rate": 0.001, "loss": 0.388, "step": 4682 }, { "epoch": 0.12921427758444076, "grad_norm": 0.004053408745676279, "learning_rate": 0.001, "loss": 0.3626, "step": 4683 }, { "epoch": 0.12924186978550511, "grad_norm": 0.003272439120337367, "learning_rate": 0.001, "loss": 0.4149, "step": 4684 }, { "epoch": 0.1292694619865695, "grad_norm": 0.0030873471405357122, "learning_rate": 0.001, "loss": 0.3804, "step": 4685 }, { "epoch": 0.12929705418763388, "grad_norm": 0.0030633402056992054, "learning_rate": 0.001, "loss": 0.4443, "step": 4686 }, { "epoch": 0.12932464638869823, "grad_norm": 0.003019932424649596, "learning_rate": 0.001, "loss": 0.3883, "step": 4687 }, { "epoch": 0.1293522385897626, "grad_norm": 0.0027192363049834967, "learning_rate": 0.001, "loss": 0.4195, "step": 4688 }, { "epoch": 0.12937983079082696, "grad_norm": 0.003051141509786248, "learning_rate": 0.001, "loss": 0.4528, "step": 4689 }, { "epoch": 0.12940742299189134, "grad_norm": 0.004393309820443392, "learning_rate": 0.001, "loss": 0.3668, "step": 4690 }, { "epoch": 0.12943501519295572, "grad_norm": 0.004716060124337673, "learning_rate": 0.001, "loss": 0.3668, "step": 4691 }, { "epoch": 0.12946260739402007, "grad_norm": 0.0047832694835960865, "learning_rate": 0.001, "loss": 0.4004, "step": 4692 }, { "epoch": 0.12949019959508445, "grad_norm": 0.0040343874134123325, "learning_rate": 0.001, "loss": 0.4414, "step": 4693 }, { "epoch": 0.1295177917961488, "grad_norm": 0.006127424072474241, "learning_rate": 0.001, "loss": 0.4169, "step": 4694 }, { "epoch": 0.1295453839972132, "grad_norm": 0.002882004715502262, "learning_rate": 0.001, "loss": 0.4052, "step": 4695 }, { "epoch": 0.12957297619827757, "grad_norm": 0.003973776008933783, "learning_rate": 0.001, "loss": 0.393, "step": 4696 }, { "epoch": 0.12960056839934192, "grad_norm": 0.0027190132532268763, "learning_rate": 0.001, "loss": 0.4071, "step": 4697 }, { "epoch": 0.1296281606004063, "grad_norm": 0.0025696575175970793, "learning_rate": 0.001, "loss": 0.3987, "step": 4698 }, { "epoch": 0.12965575280147065, "grad_norm": 0.0032083040568977594, "learning_rate": 0.001, "loss": 0.3845, "step": 4699 }, { "epoch": 0.12968334500253503, "grad_norm": 0.0025488468818366528, "learning_rate": 0.001, "loss": 0.4216, "step": 4700 }, { "epoch": 0.1297109372035994, "grad_norm": 0.009488187730312347, "learning_rate": 0.001, "loss": 0.4063, "step": 4701 }, { "epoch": 0.12973852940466377, "grad_norm": 0.0029801761265844107, "learning_rate": 0.001, "loss": 0.4158, "step": 4702 }, { "epoch": 0.12976612160572815, "grad_norm": 0.0247980747371912, "learning_rate": 0.001, "loss": 0.3803, "step": 4703 }, { "epoch": 0.1297937138067925, "grad_norm": 0.002381372032687068, "learning_rate": 0.001, "loss": 0.4051, "step": 4704 }, { "epoch": 0.12982130600785688, "grad_norm": 0.0035768726374953985, "learning_rate": 0.001, "loss": 0.3934, "step": 4705 }, { "epoch": 0.12984889820892126, "grad_norm": 0.003190788673236966, "learning_rate": 0.001, "loss": 0.4226, "step": 4706 }, { "epoch": 0.1298764904099856, "grad_norm": 0.00331826857291162, "learning_rate": 0.001, "loss": 0.4116, "step": 4707 }, { "epoch": 0.12990408261105, "grad_norm": 0.0032035168260335922, "learning_rate": 0.001, "loss": 0.3938, "step": 4708 }, { "epoch": 0.12993167481211434, "grad_norm": 0.0037546653766185045, "learning_rate": 0.001, "loss": 0.3856, "step": 4709 }, { "epoch": 0.12995926701317873, "grad_norm": 0.0026541994884610176, "learning_rate": 0.001, "loss": 0.4312, "step": 4710 }, { "epoch": 0.1299868592142431, "grad_norm": 0.004107889253646135, "learning_rate": 0.001, "loss": 0.4092, "step": 4711 }, { "epoch": 0.13001445141530746, "grad_norm": 0.002514521824195981, "learning_rate": 0.001, "loss": 0.4137, "step": 4712 }, { "epoch": 0.13004204361637184, "grad_norm": 0.0028199944645166397, "learning_rate": 0.001, "loss": 0.3967, "step": 4713 }, { "epoch": 0.1300696358174362, "grad_norm": 0.0027020128909498453, "learning_rate": 0.001, "loss": 0.449, "step": 4714 }, { "epoch": 0.13009722801850057, "grad_norm": 0.003255255287513137, "learning_rate": 0.001, "loss": 0.4046, "step": 4715 }, { "epoch": 0.13012482021956495, "grad_norm": 0.0035547774750739336, "learning_rate": 0.001, "loss": 0.3949, "step": 4716 }, { "epoch": 0.1301524124206293, "grad_norm": 0.0025250576436519623, "learning_rate": 0.001, "loss": 0.4117, "step": 4717 }, { "epoch": 0.13018000462169368, "grad_norm": 0.002805963857099414, "learning_rate": 0.001, "loss": 0.3898, "step": 4718 }, { "epoch": 0.13020759682275804, "grad_norm": 0.005695881322026253, "learning_rate": 0.001, "loss": 0.4223, "step": 4719 }, { "epoch": 0.13023518902382242, "grad_norm": 0.002897542668506503, "learning_rate": 0.001, "loss": 0.3924, "step": 4720 }, { "epoch": 0.1302627812248868, "grad_norm": 0.0037734811194241047, "learning_rate": 0.001, "loss": 0.3864, "step": 4721 }, { "epoch": 0.13029037342595115, "grad_norm": 0.003529176115989685, "learning_rate": 0.001, "loss": 0.3992, "step": 4722 }, { "epoch": 0.13031796562701553, "grad_norm": 0.003130922093987465, "learning_rate": 0.001, "loss": 0.4175, "step": 4723 }, { "epoch": 0.13034555782807988, "grad_norm": 0.0031659335363656282, "learning_rate": 0.001, "loss": 0.4084, "step": 4724 }, { "epoch": 0.13037315002914426, "grad_norm": 0.002901850501075387, "learning_rate": 0.001, "loss": 0.3685, "step": 4725 }, { "epoch": 0.13040074223020864, "grad_norm": 0.0034328114707022905, "learning_rate": 0.001, "loss": 0.4135, "step": 4726 }, { "epoch": 0.130428334431273, "grad_norm": 0.0028659338131546974, "learning_rate": 0.001, "loss": 0.4166, "step": 4727 }, { "epoch": 0.13045592663233738, "grad_norm": 0.002500742208212614, "learning_rate": 0.001, "loss": 0.431, "step": 4728 }, { "epoch": 0.13048351883340173, "grad_norm": 0.004093059338629246, "learning_rate": 0.001, "loss": 0.4415, "step": 4729 }, { "epoch": 0.1305111110344661, "grad_norm": 0.006217313464730978, "learning_rate": 0.001, "loss": 0.4253, "step": 4730 }, { "epoch": 0.1305387032355305, "grad_norm": 0.003967654425650835, "learning_rate": 0.001, "loss": 0.398, "step": 4731 }, { "epoch": 0.13056629543659484, "grad_norm": 0.002272986341267824, "learning_rate": 0.001, "loss": 0.4046, "step": 4732 }, { "epoch": 0.13059388763765922, "grad_norm": 0.003741595894098282, "learning_rate": 0.001, "loss": 0.35, "step": 4733 }, { "epoch": 0.13062147983872358, "grad_norm": 0.004285778850317001, "learning_rate": 0.001, "loss": 0.381, "step": 4734 }, { "epoch": 0.13064907203978796, "grad_norm": 0.003829494584351778, "learning_rate": 0.001, "loss": 0.3918, "step": 4735 }, { "epoch": 0.13067666424085234, "grad_norm": 0.0037094939034432173, "learning_rate": 0.001, "loss": 0.3885, "step": 4736 }, { "epoch": 0.1307042564419167, "grad_norm": 0.005124232266098261, "learning_rate": 0.001, "loss": 0.4105, "step": 4737 }, { "epoch": 0.13073184864298107, "grad_norm": 0.0033820930402725935, "learning_rate": 0.001, "loss": 0.3564, "step": 4738 }, { "epoch": 0.13075944084404542, "grad_norm": 0.004075558390468359, "learning_rate": 0.001, "loss": 0.3771, "step": 4739 }, { "epoch": 0.1307870330451098, "grad_norm": 0.003454482415691018, "learning_rate": 0.001, "loss": 0.4064, "step": 4740 }, { "epoch": 0.13081462524617418, "grad_norm": 0.003381206886842847, "learning_rate": 0.001, "loss": 0.3995, "step": 4741 }, { "epoch": 0.13084221744723853, "grad_norm": 0.0028117720503360033, "learning_rate": 0.001, "loss": 0.3949, "step": 4742 }, { "epoch": 0.13086980964830291, "grad_norm": 0.002668531145900488, "learning_rate": 0.001, "loss": 0.3892, "step": 4743 }, { "epoch": 0.13089740184936727, "grad_norm": 0.003135831095278263, "learning_rate": 0.001, "loss": 0.4174, "step": 4744 }, { "epoch": 0.13092499405043165, "grad_norm": 0.008101669140160084, "learning_rate": 0.001, "loss": 0.3729, "step": 4745 }, { "epoch": 0.13095258625149603, "grad_norm": 0.0035032073501497507, "learning_rate": 0.001, "loss": 0.4256, "step": 4746 }, { "epoch": 0.13098017845256038, "grad_norm": 0.003943659830838442, "learning_rate": 0.001, "loss": 0.4057, "step": 4747 }, { "epoch": 0.13100777065362476, "grad_norm": 0.0026659085415303707, "learning_rate": 0.001, "loss": 0.3986, "step": 4748 }, { "epoch": 0.1310353628546891, "grad_norm": 0.004375234711915255, "learning_rate": 0.001, "loss": 0.4039, "step": 4749 }, { "epoch": 0.1310629550557535, "grad_norm": 0.0033472348004579544, "learning_rate": 0.001, "loss": 0.3845, "step": 4750 }, { "epoch": 0.13109054725681787, "grad_norm": 0.005840728525072336, "learning_rate": 0.001, "loss": 0.4264, "step": 4751 }, { "epoch": 0.13111813945788223, "grad_norm": 0.0032132677733898163, "learning_rate": 0.001, "loss": 0.4109, "step": 4752 }, { "epoch": 0.1311457316589466, "grad_norm": 0.002400551922619343, "learning_rate": 0.001, "loss": 0.446, "step": 4753 }, { "epoch": 0.13117332386001096, "grad_norm": 0.0028581595979630947, "learning_rate": 0.001, "loss": 0.4224, "step": 4754 }, { "epoch": 0.13120091606107534, "grad_norm": 0.0029292525723576546, "learning_rate": 0.001, "loss": 0.3745, "step": 4755 }, { "epoch": 0.1312285082621397, "grad_norm": 0.004841271787881851, "learning_rate": 0.001, "loss": 0.4376, "step": 4756 }, { "epoch": 0.13125610046320407, "grad_norm": 0.004950361791998148, "learning_rate": 0.001, "loss": 0.3883, "step": 4757 }, { "epoch": 0.13128369266426845, "grad_norm": 0.003650795202702284, "learning_rate": 0.001, "loss": 0.3823, "step": 4758 }, { "epoch": 0.1313112848653328, "grad_norm": 0.005590515211224556, "learning_rate": 0.001, "loss": 0.3603, "step": 4759 }, { "epoch": 0.13133887706639719, "grad_norm": 0.004784191958606243, "learning_rate": 0.001, "loss": 0.4078, "step": 4760 }, { "epoch": 0.13136646926746154, "grad_norm": 0.006027446128427982, "learning_rate": 0.001, "loss": 0.3782, "step": 4761 }, { "epoch": 0.13139406146852592, "grad_norm": 0.0037396312691271305, "learning_rate": 0.001, "loss": 0.3694, "step": 4762 }, { "epoch": 0.1314216536695903, "grad_norm": 0.0032113094348460436, "learning_rate": 0.001, "loss": 0.4085, "step": 4763 }, { "epoch": 0.13144924587065465, "grad_norm": 0.00637141102924943, "learning_rate": 0.001, "loss": 0.4146, "step": 4764 }, { "epoch": 0.13147683807171903, "grad_norm": 0.005106314085423946, "learning_rate": 0.001, "loss": 0.4162, "step": 4765 }, { "epoch": 0.13150443027278338, "grad_norm": 0.00308993854559958, "learning_rate": 0.001, "loss": 0.4092, "step": 4766 }, { "epoch": 0.13153202247384777, "grad_norm": 0.007722698617726564, "learning_rate": 0.001, "loss": 0.3741, "step": 4767 }, { "epoch": 0.13155961467491215, "grad_norm": 0.00279480149038136, "learning_rate": 0.001, "loss": 0.3943, "step": 4768 }, { "epoch": 0.1315872068759765, "grad_norm": 0.00339706614613533, "learning_rate": 0.001, "loss": 0.3949, "step": 4769 }, { "epoch": 0.13161479907704088, "grad_norm": 0.0045981621369719505, "learning_rate": 0.001, "loss": 0.4086, "step": 4770 }, { "epoch": 0.13164239127810523, "grad_norm": 0.0029820918571203947, "learning_rate": 0.001, "loss": 0.4246, "step": 4771 }, { "epoch": 0.1316699834791696, "grad_norm": 0.0026766203809529543, "learning_rate": 0.001, "loss": 0.4042, "step": 4772 }, { "epoch": 0.131697575680234, "grad_norm": 0.004069194197654724, "learning_rate": 0.001, "loss": 0.3814, "step": 4773 }, { "epoch": 0.13172516788129834, "grad_norm": 0.005836902651935816, "learning_rate": 0.001, "loss": 0.3624, "step": 4774 }, { "epoch": 0.13175276008236272, "grad_norm": 0.002671006368473172, "learning_rate": 0.001, "loss": 0.4289, "step": 4775 }, { "epoch": 0.13178035228342708, "grad_norm": 0.0025038421154022217, "learning_rate": 0.001, "loss": 0.426, "step": 4776 }, { "epoch": 0.13180794448449146, "grad_norm": 0.0062514557503163815, "learning_rate": 0.001, "loss": 0.4202, "step": 4777 }, { "epoch": 0.13183553668555584, "grad_norm": 0.005402828101068735, "learning_rate": 0.001, "loss": 0.4204, "step": 4778 }, { "epoch": 0.1318631288866202, "grad_norm": 0.005721025634557009, "learning_rate": 0.001, "loss": 0.359, "step": 4779 }, { "epoch": 0.13189072108768457, "grad_norm": 0.0032702479511499405, "learning_rate": 0.001, "loss": 0.4039, "step": 4780 }, { "epoch": 0.13191831328874892, "grad_norm": 0.0022660638205707073, "learning_rate": 0.001, "loss": 0.3813, "step": 4781 }, { "epoch": 0.1319459054898133, "grad_norm": 0.008734694682061672, "learning_rate": 0.001, "loss": 0.4201, "step": 4782 }, { "epoch": 0.13197349769087768, "grad_norm": 0.0025738070253282785, "learning_rate": 0.001, "loss": 0.4214, "step": 4783 }, { "epoch": 0.13200108989194204, "grad_norm": 0.004817556589841843, "learning_rate": 0.001, "loss": 0.4043, "step": 4784 }, { "epoch": 0.13202868209300642, "grad_norm": 0.0032889090944081545, "learning_rate": 0.001, "loss": 0.3642, "step": 4785 }, { "epoch": 0.13205627429407077, "grad_norm": 0.002885392401367426, "learning_rate": 0.001, "loss": 0.3915, "step": 4786 }, { "epoch": 0.13208386649513515, "grad_norm": 0.002848939271643758, "learning_rate": 0.001, "loss": 0.3888, "step": 4787 }, { "epoch": 0.13211145869619953, "grad_norm": 0.004833425395190716, "learning_rate": 0.001, "loss": 0.368, "step": 4788 }, { "epoch": 0.13213905089726388, "grad_norm": 0.005285562947392464, "learning_rate": 0.001, "loss": 0.4323, "step": 4789 }, { "epoch": 0.13216664309832826, "grad_norm": 0.002420694101601839, "learning_rate": 0.001, "loss": 0.4163, "step": 4790 }, { "epoch": 0.13219423529939262, "grad_norm": 0.0041345711797475815, "learning_rate": 0.001, "loss": 0.4253, "step": 4791 }, { "epoch": 0.132221827500457, "grad_norm": 0.00341211399063468, "learning_rate": 0.001, "loss": 0.441, "step": 4792 }, { "epoch": 0.13224941970152138, "grad_norm": 0.003937454894185066, "learning_rate": 0.001, "loss": 0.4131, "step": 4793 }, { "epoch": 0.13227701190258573, "grad_norm": 0.0027174679562449455, "learning_rate": 0.001, "loss": 0.4081, "step": 4794 }, { "epoch": 0.1323046041036501, "grad_norm": 0.004002917557954788, "learning_rate": 0.001, "loss": 0.3918, "step": 4795 }, { "epoch": 0.13233219630471446, "grad_norm": 0.0035276333801448345, "learning_rate": 0.001, "loss": 0.4092, "step": 4796 }, { "epoch": 0.13235978850577884, "grad_norm": 0.0030461258720606565, "learning_rate": 0.001, "loss": 0.4176, "step": 4797 }, { "epoch": 0.13238738070684322, "grad_norm": 0.003558572381734848, "learning_rate": 0.001, "loss": 0.4045, "step": 4798 }, { "epoch": 0.13241497290790757, "grad_norm": 0.00515527231618762, "learning_rate": 0.001, "loss": 0.4131, "step": 4799 }, { "epoch": 0.13244256510897195, "grad_norm": 0.0030274007003754377, "learning_rate": 0.001, "loss": 0.3776, "step": 4800 }, { "epoch": 0.1324701573100363, "grad_norm": 0.0033126547932624817, "learning_rate": 0.001, "loss": 0.4172, "step": 4801 }, { "epoch": 0.1324977495111007, "grad_norm": 0.0031248959712684155, "learning_rate": 0.001, "loss": 0.4282, "step": 4802 }, { "epoch": 0.13252534171216507, "grad_norm": 0.0032562301494181156, "learning_rate": 0.001, "loss": 0.4023, "step": 4803 }, { "epoch": 0.13255293391322942, "grad_norm": 0.005781130399554968, "learning_rate": 0.001, "loss": 0.3802, "step": 4804 }, { "epoch": 0.1325805261142938, "grad_norm": 0.0039030571933835745, "learning_rate": 0.001, "loss": 0.4075, "step": 4805 }, { "epoch": 0.13260811831535815, "grad_norm": 0.002995892893522978, "learning_rate": 0.001, "loss": 0.4002, "step": 4806 }, { "epoch": 0.13263571051642253, "grad_norm": 0.002603591652587056, "learning_rate": 0.001, "loss": 0.3841, "step": 4807 }, { "epoch": 0.13266330271748691, "grad_norm": 0.00366300530731678, "learning_rate": 0.001, "loss": 0.404, "step": 4808 }, { "epoch": 0.13269089491855127, "grad_norm": 0.0033524134196341038, "learning_rate": 0.001, "loss": 0.3889, "step": 4809 }, { "epoch": 0.13271848711961565, "grad_norm": 0.003584874328225851, "learning_rate": 0.001, "loss": 0.3963, "step": 4810 }, { "epoch": 0.13274607932068, "grad_norm": 0.004354959353804588, "learning_rate": 0.001, "loss": 0.3705, "step": 4811 }, { "epoch": 0.13277367152174438, "grad_norm": 0.0041418191976845264, "learning_rate": 0.001, "loss": 0.4072, "step": 4812 }, { "epoch": 0.13280126372280876, "grad_norm": 0.0028464416973292828, "learning_rate": 0.001, "loss": 0.3683, "step": 4813 }, { "epoch": 0.1328288559238731, "grad_norm": 0.0049109673127532005, "learning_rate": 0.001, "loss": 0.3907, "step": 4814 }, { "epoch": 0.1328564481249375, "grad_norm": 0.0039614904671907425, "learning_rate": 0.001, "loss": 0.3736, "step": 4815 }, { "epoch": 0.13288404032600185, "grad_norm": 0.008978486992418766, "learning_rate": 0.001, "loss": 0.3636, "step": 4816 }, { "epoch": 0.13291163252706623, "grad_norm": 0.006667922250926495, "learning_rate": 0.001, "loss": 0.417, "step": 4817 }, { "epoch": 0.1329392247281306, "grad_norm": 0.004778092727065086, "learning_rate": 0.001, "loss": 0.4362, "step": 4818 }, { "epoch": 0.13296681692919496, "grad_norm": 0.0056798397563397884, "learning_rate": 0.001, "loss": 0.4426, "step": 4819 }, { "epoch": 0.13299440913025934, "grad_norm": 0.0033784289844334126, "learning_rate": 0.001, "loss": 0.4164, "step": 4820 }, { "epoch": 0.1330220013313237, "grad_norm": 0.002719793003052473, "learning_rate": 0.001, "loss": 0.4182, "step": 4821 }, { "epoch": 0.13304959353238807, "grad_norm": 0.003707107389345765, "learning_rate": 0.001, "loss": 0.4093, "step": 4822 }, { "epoch": 0.13307718573345245, "grad_norm": 0.0044320011511445045, "learning_rate": 0.001, "loss": 0.3985, "step": 4823 }, { "epoch": 0.1331047779345168, "grad_norm": 0.003244374878704548, "learning_rate": 0.001, "loss": 0.3985, "step": 4824 }, { "epoch": 0.13313237013558119, "grad_norm": 0.006342526059597731, "learning_rate": 0.001, "loss": 0.4233, "step": 4825 }, { "epoch": 0.13315996233664554, "grad_norm": 0.002801056718453765, "learning_rate": 0.001, "loss": 0.3753, "step": 4826 }, { "epoch": 0.13318755453770992, "grad_norm": 0.004005104303359985, "learning_rate": 0.001, "loss": 0.3835, "step": 4827 }, { "epoch": 0.1332151467387743, "grad_norm": 0.0026726596988737583, "learning_rate": 0.001, "loss": 0.3881, "step": 4828 }, { "epoch": 0.13324273893983865, "grad_norm": 0.0022171332966536283, "learning_rate": 0.001, "loss": 0.395, "step": 4829 }, { "epoch": 0.13327033114090303, "grad_norm": 0.00455888919532299, "learning_rate": 0.001, "loss": 0.3438, "step": 4830 }, { "epoch": 0.13329792334196738, "grad_norm": 0.0036296145990490913, "learning_rate": 0.001, "loss": 0.4194, "step": 4831 }, { "epoch": 0.13332551554303176, "grad_norm": 0.0030942729208618402, "learning_rate": 0.001, "loss": 0.3936, "step": 4832 }, { "epoch": 0.13335310774409614, "grad_norm": 0.0031570447608828545, "learning_rate": 0.001, "loss": 0.4177, "step": 4833 }, { "epoch": 0.1333806999451605, "grad_norm": 0.0029306053183972836, "learning_rate": 0.001, "loss": 0.4032, "step": 4834 }, { "epoch": 0.13340829214622488, "grad_norm": 0.002967880107462406, "learning_rate": 0.001, "loss": 0.3975, "step": 4835 }, { "epoch": 0.13343588434728923, "grad_norm": 0.0028411787934601307, "learning_rate": 0.001, "loss": 0.4018, "step": 4836 }, { "epoch": 0.1334634765483536, "grad_norm": 0.003896566806361079, "learning_rate": 0.001, "loss": 0.3884, "step": 4837 }, { "epoch": 0.133491068749418, "grad_norm": 0.0024064083117991686, "learning_rate": 0.001, "loss": 0.3944, "step": 4838 }, { "epoch": 0.13351866095048234, "grad_norm": 0.0045156353153288364, "learning_rate": 0.001, "loss": 0.4272, "step": 4839 }, { "epoch": 0.13354625315154672, "grad_norm": 0.002685493789613247, "learning_rate": 0.001, "loss": 0.4084, "step": 4840 }, { "epoch": 0.13357384535261108, "grad_norm": 0.00366023276001215, "learning_rate": 0.001, "loss": 0.3942, "step": 4841 }, { "epoch": 0.13360143755367546, "grad_norm": 0.0024847208987921476, "learning_rate": 0.001, "loss": 0.4506, "step": 4842 }, { "epoch": 0.13362902975473984, "grad_norm": 0.004642703104764223, "learning_rate": 0.001, "loss": 0.3911, "step": 4843 }, { "epoch": 0.1336566219558042, "grad_norm": 0.004185870289802551, "learning_rate": 0.001, "loss": 0.417, "step": 4844 }, { "epoch": 0.13368421415686857, "grad_norm": 0.0043914420530200005, "learning_rate": 0.001, "loss": 0.4254, "step": 4845 }, { "epoch": 0.13371180635793292, "grad_norm": 0.0026118417736142874, "learning_rate": 0.001, "loss": 0.4118, "step": 4846 }, { "epoch": 0.1337393985589973, "grad_norm": 0.0031845802441239357, "learning_rate": 0.001, "loss": 0.3999, "step": 4847 }, { "epoch": 0.13376699076006165, "grad_norm": 0.0029715183191001415, "learning_rate": 0.001, "loss": 0.4254, "step": 4848 }, { "epoch": 0.13379458296112604, "grad_norm": 0.0023027430288493633, "learning_rate": 0.001, "loss": 0.4288, "step": 4849 }, { "epoch": 0.13382217516219042, "grad_norm": 0.002911708317697048, "learning_rate": 0.001, "loss": 0.425, "step": 4850 }, { "epoch": 0.13384976736325477, "grad_norm": 0.002629220252856612, "learning_rate": 0.001, "loss": 0.4045, "step": 4851 }, { "epoch": 0.13387735956431915, "grad_norm": 0.005986600182950497, "learning_rate": 0.001, "loss": 0.3997, "step": 4852 }, { "epoch": 0.1339049517653835, "grad_norm": 0.006612063851207495, "learning_rate": 0.001, "loss": 0.3881, "step": 4853 }, { "epoch": 0.13393254396644788, "grad_norm": 0.003582441946491599, "learning_rate": 0.001, "loss": 0.3711, "step": 4854 }, { "epoch": 0.13396013616751226, "grad_norm": 0.0032464175019413233, "learning_rate": 0.001, "loss": 0.3766, "step": 4855 }, { "epoch": 0.13398772836857661, "grad_norm": 0.0038717519491910934, "learning_rate": 0.001, "loss": 0.3888, "step": 4856 }, { "epoch": 0.134015320569641, "grad_norm": 0.00332628283649683, "learning_rate": 0.001, "loss": 0.375, "step": 4857 }, { "epoch": 0.13404291277070535, "grad_norm": 0.004962892737239599, "learning_rate": 0.001, "loss": 0.412, "step": 4858 }, { "epoch": 0.13407050497176973, "grad_norm": 0.0022890097461640835, "learning_rate": 0.001, "loss": 0.4177, "step": 4859 }, { "epoch": 0.1340980971728341, "grad_norm": 0.0030799328815191984, "learning_rate": 0.001, "loss": 0.4119, "step": 4860 }, { "epoch": 0.13412568937389846, "grad_norm": 0.003105921670794487, "learning_rate": 0.001, "loss": 0.3852, "step": 4861 }, { "epoch": 0.13415328157496284, "grad_norm": 0.0060799745842814445, "learning_rate": 0.001, "loss": 0.4202, "step": 4862 }, { "epoch": 0.1341808737760272, "grad_norm": 0.005220312625169754, "learning_rate": 0.001, "loss": 0.403, "step": 4863 }, { "epoch": 0.13420846597709157, "grad_norm": 0.005123880226165056, "learning_rate": 0.001, "loss": 0.4101, "step": 4864 }, { "epoch": 0.13423605817815595, "grad_norm": 0.002265162765979767, "learning_rate": 0.001, "loss": 0.4223, "step": 4865 }, { "epoch": 0.1342636503792203, "grad_norm": 0.004077676683664322, "learning_rate": 0.001, "loss": 0.3996, "step": 4866 }, { "epoch": 0.1342912425802847, "grad_norm": 0.0035009863786399364, "learning_rate": 0.001, "loss": 0.3876, "step": 4867 }, { "epoch": 0.13431883478134904, "grad_norm": 0.003494169097393751, "learning_rate": 0.001, "loss": 0.3784, "step": 4868 }, { "epoch": 0.13434642698241342, "grad_norm": 0.0059214490465819836, "learning_rate": 0.001, "loss": 0.4206, "step": 4869 }, { "epoch": 0.1343740191834778, "grad_norm": 0.0038102336693555117, "learning_rate": 0.001, "loss": 0.3951, "step": 4870 }, { "epoch": 0.13440161138454215, "grad_norm": 0.01034005731344223, "learning_rate": 0.001, "loss": 0.4085, "step": 4871 }, { "epoch": 0.13442920358560653, "grad_norm": 0.003015523310750723, "learning_rate": 0.001, "loss": 0.4183, "step": 4872 }, { "epoch": 0.13445679578667089, "grad_norm": 0.003311215667054057, "learning_rate": 0.001, "loss": 0.3871, "step": 4873 }, { "epoch": 0.13448438798773527, "grad_norm": 0.002569688018411398, "learning_rate": 0.001, "loss": 0.4212, "step": 4874 }, { "epoch": 0.13451198018879965, "grad_norm": 0.002594457007944584, "learning_rate": 0.001, "loss": 0.4051, "step": 4875 }, { "epoch": 0.134539572389864, "grad_norm": 0.004568415228277445, "learning_rate": 0.001, "loss": 0.4062, "step": 4876 }, { "epoch": 0.13456716459092838, "grad_norm": 0.0033749649301171303, "learning_rate": 0.001, "loss": 0.3851, "step": 4877 }, { "epoch": 0.13459475679199273, "grad_norm": 0.003532090689986944, "learning_rate": 0.001, "loss": 0.4134, "step": 4878 }, { "epoch": 0.1346223489930571, "grad_norm": 0.0033763758838176727, "learning_rate": 0.001, "loss": 0.4118, "step": 4879 }, { "epoch": 0.1346499411941215, "grad_norm": 0.002815960207954049, "learning_rate": 0.001, "loss": 0.3919, "step": 4880 }, { "epoch": 0.13467753339518584, "grad_norm": 0.0021568064112216234, "learning_rate": 0.001, "loss": 0.4046, "step": 4881 }, { "epoch": 0.13470512559625022, "grad_norm": 0.002766069257631898, "learning_rate": 0.001, "loss": 0.3838, "step": 4882 }, { "epoch": 0.13473271779731458, "grad_norm": 0.004232902079820633, "learning_rate": 0.001, "loss": 0.4244, "step": 4883 }, { "epoch": 0.13476030999837896, "grad_norm": 0.007129458710551262, "learning_rate": 0.001, "loss": 0.4131, "step": 4884 }, { "epoch": 0.13478790219944334, "grad_norm": 0.0027489245403558016, "learning_rate": 0.001, "loss": 0.4027, "step": 4885 }, { "epoch": 0.1348154944005077, "grad_norm": 0.002653737785294652, "learning_rate": 0.001, "loss": 0.4018, "step": 4886 }, { "epoch": 0.13484308660157207, "grad_norm": 0.0031049742829054594, "learning_rate": 0.001, "loss": 0.4161, "step": 4887 }, { "epoch": 0.13487067880263642, "grad_norm": 0.002779177390038967, "learning_rate": 0.001, "loss": 0.4165, "step": 4888 }, { "epoch": 0.1348982710037008, "grad_norm": 0.003438913496211171, "learning_rate": 0.001, "loss": 0.378, "step": 4889 }, { "epoch": 0.13492586320476518, "grad_norm": 0.003269481472671032, "learning_rate": 0.001, "loss": 0.3955, "step": 4890 }, { "epoch": 0.13495345540582954, "grad_norm": 0.00541806360706687, "learning_rate": 0.001, "loss": 0.4346, "step": 4891 }, { "epoch": 0.13498104760689392, "grad_norm": 0.007846074178814888, "learning_rate": 0.001, "loss": 0.3962, "step": 4892 }, { "epoch": 0.13500863980795827, "grad_norm": 0.006200781557708979, "learning_rate": 0.001, "loss": 0.406, "step": 4893 }, { "epoch": 0.13503623200902265, "grad_norm": 0.003382598515599966, "learning_rate": 0.001, "loss": 0.3839, "step": 4894 }, { "epoch": 0.13506382421008703, "grad_norm": 0.002724557416513562, "learning_rate": 0.001, "loss": 0.422, "step": 4895 }, { "epoch": 0.13509141641115138, "grad_norm": 0.0024383303243666887, "learning_rate": 0.001, "loss": 0.3955, "step": 4896 }, { "epoch": 0.13511900861221576, "grad_norm": 0.0024829639587551355, "learning_rate": 0.001, "loss": 0.3831, "step": 4897 }, { "epoch": 0.13514660081328012, "grad_norm": 0.002763790776953101, "learning_rate": 0.001, "loss": 0.4006, "step": 4898 }, { "epoch": 0.1351741930143445, "grad_norm": 0.0031322850845754147, "learning_rate": 0.001, "loss": 0.4005, "step": 4899 }, { "epoch": 0.13520178521540888, "grad_norm": 0.002493941690772772, "learning_rate": 0.001, "loss": 0.3807, "step": 4900 }, { "epoch": 0.13522937741647323, "grad_norm": 0.002849231008440256, "learning_rate": 0.001, "loss": 0.3888, "step": 4901 }, { "epoch": 0.1352569696175376, "grad_norm": 0.0022846634965389967, "learning_rate": 0.001, "loss": 0.4765, "step": 4902 }, { "epoch": 0.13528456181860196, "grad_norm": 0.003928142134100199, "learning_rate": 0.001, "loss": 0.395, "step": 4903 }, { "epoch": 0.13531215401966634, "grad_norm": 0.0029574292711913586, "learning_rate": 0.001, "loss": 0.4335, "step": 4904 }, { "epoch": 0.13533974622073072, "grad_norm": 0.0024456526152789593, "learning_rate": 0.001, "loss": 0.4359, "step": 4905 }, { "epoch": 0.13536733842179507, "grad_norm": 0.005585736595094204, "learning_rate": 0.001, "loss": 0.4165, "step": 4906 }, { "epoch": 0.13539493062285946, "grad_norm": 0.003591830376535654, "learning_rate": 0.001, "loss": 0.4199, "step": 4907 }, { "epoch": 0.1354225228239238, "grad_norm": 0.0025483304634690285, "learning_rate": 0.001, "loss": 0.374, "step": 4908 }, { "epoch": 0.1354501150249882, "grad_norm": 0.0023265292402356863, "learning_rate": 0.001, "loss": 0.3977, "step": 4909 }, { "epoch": 0.13547770722605257, "grad_norm": 0.0066468482837080956, "learning_rate": 0.001, "loss": 0.387, "step": 4910 }, { "epoch": 0.13550529942711692, "grad_norm": 0.003016105853021145, "learning_rate": 0.001, "loss": 0.406, "step": 4911 }, { "epoch": 0.1355328916281813, "grad_norm": 0.002601313404738903, "learning_rate": 0.001, "loss": 0.3942, "step": 4912 }, { "epoch": 0.13556048382924565, "grad_norm": 0.0035740050952881575, "learning_rate": 0.001, "loss": 0.4112, "step": 4913 }, { "epoch": 0.13558807603031003, "grad_norm": 0.0030930940993130207, "learning_rate": 0.001, "loss": 0.4203, "step": 4914 }, { "epoch": 0.13561566823137441, "grad_norm": 0.0029879440553486347, "learning_rate": 0.001, "loss": 0.4089, "step": 4915 }, { "epoch": 0.13564326043243877, "grad_norm": 0.003520643338561058, "learning_rate": 0.001, "loss": 0.4005, "step": 4916 }, { "epoch": 0.13567085263350315, "grad_norm": 0.0026404429227113724, "learning_rate": 0.001, "loss": 0.4151, "step": 4917 }, { "epoch": 0.1356984448345675, "grad_norm": 0.0031609549187123775, "learning_rate": 0.001, "loss": 0.3546, "step": 4918 }, { "epoch": 0.13572603703563188, "grad_norm": 0.002632451243698597, "learning_rate": 0.001, "loss": 0.3774, "step": 4919 }, { "epoch": 0.13575362923669626, "grad_norm": 0.004959171637892723, "learning_rate": 0.001, "loss": 0.3821, "step": 4920 }, { "epoch": 0.1357812214377606, "grad_norm": 0.0027791087049990892, "learning_rate": 0.001, "loss": 0.3692, "step": 4921 }, { "epoch": 0.135808813638825, "grad_norm": 0.002658676588907838, "learning_rate": 0.001, "loss": 0.4051, "step": 4922 }, { "epoch": 0.13583640583988935, "grad_norm": 0.0022139514330774546, "learning_rate": 0.001, "loss": 0.42, "step": 4923 }, { "epoch": 0.13586399804095373, "grad_norm": 0.0026424683164805174, "learning_rate": 0.001, "loss": 0.3854, "step": 4924 }, { "epoch": 0.1358915902420181, "grad_norm": 0.0026688999496400356, "learning_rate": 0.001, "loss": 0.3799, "step": 4925 }, { "epoch": 0.13591918244308246, "grad_norm": 0.003557176562026143, "learning_rate": 0.001, "loss": 0.404, "step": 4926 }, { "epoch": 0.13594677464414684, "grad_norm": 0.002718021161854267, "learning_rate": 0.001, "loss": 0.4014, "step": 4927 }, { "epoch": 0.1359743668452112, "grad_norm": 0.0034369598142802715, "learning_rate": 0.001, "loss": 0.4215, "step": 4928 }, { "epoch": 0.13600195904627557, "grad_norm": 0.003354028332978487, "learning_rate": 0.001, "loss": 0.3488, "step": 4929 }, { "epoch": 0.13602955124733995, "grad_norm": 0.0030789237935096025, "learning_rate": 0.001, "loss": 0.367, "step": 4930 }, { "epoch": 0.1360571434484043, "grad_norm": 0.003499686485156417, "learning_rate": 0.001, "loss": 0.4172, "step": 4931 }, { "epoch": 0.13608473564946869, "grad_norm": 0.003687124466523528, "learning_rate": 0.001, "loss": 0.4303, "step": 4932 }, { "epoch": 0.13611232785053304, "grad_norm": 0.003492203541100025, "learning_rate": 0.001, "loss": 0.454, "step": 4933 }, { "epoch": 0.13613992005159742, "grad_norm": 0.002475053770467639, "learning_rate": 0.001, "loss": 0.413, "step": 4934 }, { "epoch": 0.1361675122526618, "grad_norm": 0.002234766026958823, "learning_rate": 0.001, "loss": 0.3814, "step": 4935 }, { "epoch": 0.13619510445372615, "grad_norm": 0.002103250939399004, "learning_rate": 0.001, "loss": 0.4257, "step": 4936 }, { "epoch": 0.13622269665479053, "grad_norm": 0.0036407755687832832, "learning_rate": 0.001, "loss": 0.3888, "step": 4937 }, { "epoch": 0.13625028885585488, "grad_norm": 0.003084122436121106, "learning_rate": 0.001, "loss": 0.3908, "step": 4938 }, { "epoch": 0.13627788105691926, "grad_norm": 0.0025477514136582613, "learning_rate": 0.001, "loss": 0.4523, "step": 4939 }, { "epoch": 0.13630547325798364, "grad_norm": 0.002407640218734741, "learning_rate": 0.001, "loss": 0.4229, "step": 4940 }, { "epoch": 0.136333065459048, "grad_norm": 0.004071627277880907, "learning_rate": 0.001, "loss": 0.393, "step": 4941 }, { "epoch": 0.13636065766011238, "grad_norm": 0.004198818933218718, "learning_rate": 0.001, "loss": 0.3831, "step": 4942 }, { "epoch": 0.13638824986117673, "grad_norm": 0.0038220062851905823, "learning_rate": 0.001, "loss": 0.3884, "step": 4943 }, { "epoch": 0.1364158420622411, "grad_norm": 0.003277060342952609, "learning_rate": 0.001, "loss": 0.427, "step": 4944 }, { "epoch": 0.13644343426330546, "grad_norm": 0.0034869378432631493, "learning_rate": 0.001, "loss": 0.4056, "step": 4945 }, { "epoch": 0.13647102646436984, "grad_norm": 0.00437937444075942, "learning_rate": 0.001, "loss": 0.3728, "step": 4946 }, { "epoch": 0.13649861866543422, "grad_norm": 0.0038593048229813576, "learning_rate": 0.001, "loss": 0.3715, "step": 4947 }, { "epoch": 0.13652621086649858, "grad_norm": 0.0027042904403060675, "learning_rate": 0.001, "loss": 0.4218, "step": 4948 }, { "epoch": 0.13655380306756296, "grad_norm": 0.003592225257307291, "learning_rate": 0.001, "loss": 0.3708, "step": 4949 }, { "epoch": 0.1365813952686273, "grad_norm": 0.005848668981343508, "learning_rate": 0.001, "loss": 0.3964, "step": 4950 }, { "epoch": 0.1366089874696917, "grad_norm": 0.01795089617371559, "learning_rate": 0.001, "loss": 0.3951, "step": 4951 }, { "epoch": 0.13663657967075607, "grad_norm": 0.004261403810232878, "learning_rate": 0.001, "loss": 0.4031, "step": 4952 }, { "epoch": 0.13666417187182042, "grad_norm": 0.0033142368774861097, "learning_rate": 0.001, "loss": 0.4232, "step": 4953 }, { "epoch": 0.1366917640728848, "grad_norm": 0.003166605019941926, "learning_rate": 0.001, "loss": 0.4016, "step": 4954 }, { "epoch": 0.13671935627394916, "grad_norm": 0.003505310043692589, "learning_rate": 0.001, "loss": 0.3785, "step": 4955 }, { "epoch": 0.13674694847501354, "grad_norm": 0.005368894897401333, "learning_rate": 0.001, "loss": 0.3984, "step": 4956 }, { "epoch": 0.13677454067607792, "grad_norm": 0.00306013785302639, "learning_rate": 0.001, "loss": 0.3971, "step": 4957 }, { "epoch": 0.13680213287714227, "grad_norm": 0.005562109872698784, "learning_rate": 0.001, "loss": 0.3933, "step": 4958 }, { "epoch": 0.13682972507820665, "grad_norm": 0.0037877087015658617, "learning_rate": 0.001, "loss": 0.4104, "step": 4959 }, { "epoch": 0.136857317279271, "grad_norm": 0.0031170272268354893, "learning_rate": 0.001, "loss": 0.4073, "step": 4960 }, { "epoch": 0.13688490948033538, "grad_norm": 0.003254547482356429, "learning_rate": 0.001, "loss": 0.4079, "step": 4961 }, { "epoch": 0.13691250168139976, "grad_norm": 0.003371870843693614, "learning_rate": 0.001, "loss": 0.3904, "step": 4962 }, { "epoch": 0.13694009388246411, "grad_norm": 0.003327438374981284, "learning_rate": 0.001, "loss": 0.3929, "step": 4963 }, { "epoch": 0.1369676860835285, "grad_norm": 0.003542753402143717, "learning_rate": 0.001, "loss": 0.3767, "step": 4964 }, { "epoch": 0.13699527828459285, "grad_norm": 0.0030288288835436106, "learning_rate": 0.001, "loss": 0.3652, "step": 4965 }, { "epoch": 0.13702287048565723, "grad_norm": 0.0034882482141256332, "learning_rate": 0.001, "loss": 0.4056, "step": 4966 }, { "epoch": 0.1370504626867216, "grad_norm": 0.005606031510978937, "learning_rate": 0.001, "loss": 0.394, "step": 4967 }, { "epoch": 0.13707805488778596, "grad_norm": 0.004056036937981844, "learning_rate": 0.001, "loss": 0.4103, "step": 4968 }, { "epoch": 0.13710564708885034, "grad_norm": 0.0032308900263160467, "learning_rate": 0.001, "loss": 0.4076, "step": 4969 }, { "epoch": 0.1371332392899147, "grad_norm": 0.006253343541175127, "learning_rate": 0.001, "loss": 0.3885, "step": 4970 }, { "epoch": 0.13716083149097907, "grad_norm": 0.003451489843428135, "learning_rate": 0.001, "loss": 0.3788, "step": 4971 }, { "epoch": 0.13718842369204345, "grad_norm": 0.0033425847068428993, "learning_rate": 0.001, "loss": 0.3824, "step": 4972 }, { "epoch": 0.1372160158931078, "grad_norm": 0.0033426876179873943, "learning_rate": 0.001, "loss": 0.437, "step": 4973 }, { "epoch": 0.1372436080941722, "grad_norm": 0.0031063987407833338, "learning_rate": 0.001, "loss": 0.3927, "step": 4974 }, { "epoch": 0.13727120029523654, "grad_norm": 0.003505886532366276, "learning_rate": 0.001, "loss": 0.4198, "step": 4975 }, { "epoch": 0.13729879249630092, "grad_norm": 0.0025633492041379213, "learning_rate": 0.001, "loss": 0.4173, "step": 4976 }, { "epoch": 0.1373263846973653, "grad_norm": 0.003809051588177681, "learning_rate": 0.001, "loss": 0.4064, "step": 4977 }, { "epoch": 0.13735397689842965, "grad_norm": 0.004108694847673178, "learning_rate": 0.001, "loss": 0.3849, "step": 4978 }, { "epoch": 0.13738156909949403, "grad_norm": 0.0032902159728109837, "learning_rate": 0.001, "loss": 0.4003, "step": 4979 }, { "epoch": 0.13740916130055839, "grad_norm": 0.004460613243281841, "learning_rate": 0.001, "loss": 0.3988, "step": 4980 }, { "epoch": 0.13743675350162277, "grad_norm": 0.0026315529830753803, "learning_rate": 0.001, "loss": 0.409, "step": 4981 }, { "epoch": 0.13746434570268715, "grad_norm": 0.0023952291812747717, "learning_rate": 0.001, "loss": 0.4163, "step": 4982 }, { "epoch": 0.1374919379037515, "grad_norm": 0.003244011662900448, "learning_rate": 0.001, "loss": 0.3681, "step": 4983 }, { "epoch": 0.13751953010481588, "grad_norm": 0.003023752709850669, "learning_rate": 0.001, "loss": 0.4021, "step": 4984 }, { "epoch": 0.13754712230588023, "grad_norm": 0.002322046086192131, "learning_rate": 0.001, "loss": 0.3911, "step": 4985 }, { "epoch": 0.1375747145069446, "grad_norm": 0.0032940306700766087, "learning_rate": 0.001, "loss": 0.3987, "step": 4986 }, { "epoch": 0.137602306708009, "grad_norm": 0.002528809243813157, "learning_rate": 0.001, "loss": 0.4007, "step": 4987 }, { "epoch": 0.13762989890907334, "grad_norm": 0.003885491518303752, "learning_rate": 0.001, "loss": 0.3912, "step": 4988 }, { "epoch": 0.13765749111013773, "grad_norm": 0.0026430352590978146, "learning_rate": 0.001, "loss": 0.3899, "step": 4989 }, { "epoch": 0.13768508331120208, "grad_norm": 0.0037311362102627754, "learning_rate": 0.001, "loss": 0.3837, "step": 4990 }, { "epoch": 0.13771267551226646, "grad_norm": 0.002789480844512582, "learning_rate": 0.001, "loss": 0.3944, "step": 4991 }, { "epoch": 0.13774026771333084, "grad_norm": 0.0031957447063177824, "learning_rate": 0.001, "loss": 0.4011, "step": 4992 }, { "epoch": 0.1377678599143952, "grad_norm": 0.0032790224067866802, "learning_rate": 0.001, "loss": 0.4207, "step": 4993 }, { "epoch": 0.13779545211545957, "grad_norm": 0.0031076695304363966, "learning_rate": 0.001, "loss": 0.3813, "step": 4994 }, { "epoch": 0.13782304431652392, "grad_norm": 0.0024428521282970905, "learning_rate": 0.001, "loss": 0.3828, "step": 4995 }, { "epoch": 0.1378506365175883, "grad_norm": 0.003022199496626854, "learning_rate": 0.001, "loss": 0.4236, "step": 4996 }, { "epoch": 0.13787822871865268, "grad_norm": 0.0028683668933808804, "learning_rate": 0.001, "loss": 0.3924, "step": 4997 }, { "epoch": 0.13790582091971704, "grad_norm": 0.003475520294159651, "learning_rate": 0.001, "loss": 0.395, "step": 4998 }, { "epoch": 0.13793341312078142, "grad_norm": 0.003010012209415436, "learning_rate": 0.001, "loss": 0.4281, "step": 4999 }, { "epoch": 0.13796100532184577, "grad_norm": 0.002013305900618434, "learning_rate": 0.001, "loss": 0.3957, "step": 5000 }, { "epoch": 0.13796100532184577, "eval_runtime": 23.8099, "eval_samples_per_second": 1.344, "eval_steps_per_second": 0.168, "step": 5000 }, { "epoch": 0.13798859752291015, "grad_norm": 0.00346556818112731, "learning_rate": 0.001, "loss": 0.4065, "step": 5001 }, { "epoch": 0.13801618972397453, "grad_norm": 0.003510264679789543, "learning_rate": 0.001, "loss": 0.4371, "step": 5002 }, { "epoch": 0.13804378192503888, "grad_norm": 0.0026663157623261213, "learning_rate": 0.001, "loss": 0.4163, "step": 5003 }, { "epoch": 0.13807137412610326, "grad_norm": 0.0029142978601157665, "learning_rate": 0.001, "loss": 0.3741, "step": 5004 }, { "epoch": 0.13809896632716762, "grad_norm": 0.0032160128466784954, "learning_rate": 0.001, "loss": 0.3955, "step": 5005 }, { "epoch": 0.138126558528232, "grad_norm": 0.0034054117277264595, "learning_rate": 0.001, "loss": 0.388, "step": 5006 }, { "epoch": 0.13815415072929638, "grad_norm": 0.0025454023852944374, "learning_rate": 0.001, "loss": 0.4313, "step": 5007 }, { "epoch": 0.13818174293036073, "grad_norm": 0.0025894741993397474, "learning_rate": 0.001, "loss": 0.3961, "step": 5008 }, { "epoch": 0.1382093351314251, "grad_norm": 0.0027122844476252794, "learning_rate": 0.001, "loss": 0.3894, "step": 5009 }, { "epoch": 0.13823692733248946, "grad_norm": 0.0039015160873532295, "learning_rate": 0.001, "loss": 0.4005, "step": 5010 }, { "epoch": 0.13826451953355384, "grad_norm": 0.003185526467859745, "learning_rate": 0.001, "loss": 0.3548, "step": 5011 }, { "epoch": 0.13829211173461822, "grad_norm": 0.004475030116736889, "learning_rate": 0.001, "loss": 0.3721, "step": 5012 }, { "epoch": 0.13831970393568258, "grad_norm": 0.003455979051068425, "learning_rate": 0.001, "loss": 0.4008, "step": 5013 }, { "epoch": 0.13834729613674696, "grad_norm": 0.002884593093767762, "learning_rate": 0.001, "loss": 0.3896, "step": 5014 }, { "epoch": 0.1383748883378113, "grad_norm": 0.002263484289869666, "learning_rate": 0.001, "loss": 0.4356, "step": 5015 }, { "epoch": 0.1384024805388757, "grad_norm": 0.003735289676114917, "learning_rate": 0.001, "loss": 0.3739, "step": 5016 }, { "epoch": 0.13843007273994007, "grad_norm": 0.003224038053303957, "learning_rate": 0.001, "loss": 0.3994, "step": 5017 }, { "epoch": 0.13845766494100442, "grad_norm": 0.004887696355581284, "learning_rate": 0.001, "loss": 0.4223, "step": 5018 }, { "epoch": 0.1384852571420688, "grad_norm": 0.004760736133903265, "learning_rate": 0.001, "loss": 0.3837, "step": 5019 }, { "epoch": 0.13851284934313315, "grad_norm": 0.0037501987535506487, "learning_rate": 0.001, "loss": 0.4277, "step": 5020 }, { "epoch": 0.13854044154419753, "grad_norm": 0.0027801606338471174, "learning_rate": 0.001, "loss": 0.4105, "step": 5021 }, { "epoch": 0.13856803374526191, "grad_norm": 0.004847459960728884, "learning_rate": 0.001, "loss": 0.4016, "step": 5022 }, { "epoch": 0.13859562594632627, "grad_norm": 0.0057189855724573135, "learning_rate": 0.001, "loss": 0.3729, "step": 5023 }, { "epoch": 0.13862321814739065, "grad_norm": 0.002882015658542514, "learning_rate": 0.001, "loss": 0.3725, "step": 5024 }, { "epoch": 0.138650810348455, "grad_norm": 0.002629178110510111, "learning_rate": 0.001, "loss": 0.3793, "step": 5025 }, { "epoch": 0.13867840254951938, "grad_norm": 0.0042918650433421135, "learning_rate": 0.001, "loss": 0.3935, "step": 5026 }, { "epoch": 0.13870599475058376, "grad_norm": 0.002527192234992981, "learning_rate": 0.001, "loss": 0.3924, "step": 5027 }, { "epoch": 0.1387335869516481, "grad_norm": 0.0032653820235282183, "learning_rate": 0.001, "loss": 0.4435, "step": 5028 }, { "epoch": 0.1387611791527125, "grad_norm": 0.007327108643949032, "learning_rate": 0.001, "loss": 0.3957, "step": 5029 }, { "epoch": 0.13878877135377685, "grad_norm": 0.0036960134748369455, "learning_rate": 0.001, "loss": 0.4006, "step": 5030 }, { "epoch": 0.13881636355484123, "grad_norm": 0.0025663794949650764, "learning_rate": 0.001, "loss": 0.3843, "step": 5031 }, { "epoch": 0.1388439557559056, "grad_norm": 0.0026950971223413944, "learning_rate": 0.001, "loss": 0.3932, "step": 5032 }, { "epoch": 0.13887154795696996, "grad_norm": 0.002336070640012622, "learning_rate": 0.001, "loss": 0.4152, "step": 5033 }, { "epoch": 0.13889914015803434, "grad_norm": 0.0024258256889879704, "learning_rate": 0.001, "loss": 0.4286, "step": 5034 }, { "epoch": 0.1389267323590987, "grad_norm": 0.002580720465630293, "learning_rate": 0.001, "loss": 0.3786, "step": 5035 }, { "epoch": 0.13895432456016307, "grad_norm": 0.00329298572614789, "learning_rate": 0.001, "loss": 0.3764, "step": 5036 }, { "epoch": 0.13898191676122743, "grad_norm": 0.0029764564242213964, "learning_rate": 0.001, "loss": 0.3951, "step": 5037 }, { "epoch": 0.1390095089622918, "grad_norm": 0.002715889597311616, "learning_rate": 0.001, "loss": 0.382, "step": 5038 }, { "epoch": 0.13903710116335619, "grad_norm": 0.002393190050497651, "learning_rate": 0.001, "loss": 0.3743, "step": 5039 }, { "epoch": 0.13906469336442054, "grad_norm": 0.0024876859970390797, "learning_rate": 0.001, "loss": 0.397, "step": 5040 }, { "epoch": 0.13909228556548492, "grad_norm": 0.0032655515242367983, "learning_rate": 0.001, "loss": 0.3793, "step": 5041 }, { "epoch": 0.13911987776654927, "grad_norm": 0.002442015800625086, "learning_rate": 0.001, "loss": 0.4281, "step": 5042 }, { "epoch": 0.13914746996761365, "grad_norm": 0.002807747106999159, "learning_rate": 0.001, "loss": 0.4181, "step": 5043 }, { "epoch": 0.13917506216867803, "grad_norm": 0.005945953540503979, "learning_rate": 0.001, "loss": 0.3867, "step": 5044 }, { "epoch": 0.13920265436974238, "grad_norm": 0.0031765319872647524, "learning_rate": 0.001, "loss": 0.416, "step": 5045 }, { "epoch": 0.13923024657080676, "grad_norm": 0.0037979150656610727, "learning_rate": 0.001, "loss": 0.4096, "step": 5046 }, { "epoch": 0.13925783877187112, "grad_norm": 0.0030904149170964956, "learning_rate": 0.001, "loss": 0.4243, "step": 5047 }, { "epoch": 0.1392854309729355, "grad_norm": 0.0037039672024548054, "learning_rate": 0.001, "loss": 0.3912, "step": 5048 }, { "epoch": 0.13931302317399988, "grad_norm": 0.0033529032953083515, "learning_rate": 0.001, "loss": 0.3702, "step": 5049 }, { "epoch": 0.13934061537506423, "grad_norm": 0.0024797532241791487, "learning_rate": 0.001, "loss": 0.4347, "step": 5050 }, { "epoch": 0.1393682075761286, "grad_norm": 0.003208388341590762, "learning_rate": 0.001, "loss": 0.3977, "step": 5051 }, { "epoch": 0.13939579977719296, "grad_norm": 0.0024538834113627672, "learning_rate": 0.001, "loss": 0.4231, "step": 5052 }, { "epoch": 0.13942339197825734, "grad_norm": 0.0028640846721827984, "learning_rate": 0.001, "loss": 0.3768, "step": 5053 }, { "epoch": 0.13945098417932172, "grad_norm": 0.002653739182278514, "learning_rate": 0.001, "loss": 0.4074, "step": 5054 }, { "epoch": 0.13947857638038608, "grad_norm": 0.003615357680246234, "learning_rate": 0.001, "loss": 0.419, "step": 5055 }, { "epoch": 0.13950616858145046, "grad_norm": 0.002402637619525194, "learning_rate": 0.001, "loss": 0.409, "step": 5056 }, { "epoch": 0.1395337607825148, "grad_norm": 0.0025258746463805437, "learning_rate": 0.001, "loss": 0.4042, "step": 5057 }, { "epoch": 0.1395613529835792, "grad_norm": 0.0028938499744981527, "learning_rate": 0.001, "loss": 0.3877, "step": 5058 }, { "epoch": 0.13958894518464357, "grad_norm": 0.008427090011537075, "learning_rate": 0.001, "loss": 0.4044, "step": 5059 }, { "epoch": 0.13961653738570792, "grad_norm": 0.00399380037561059, "learning_rate": 0.001, "loss": 0.3757, "step": 5060 }, { "epoch": 0.1396441295867723, "grad_norm": 0.002580770291388035, "learning_rate": 0.001, "loss": 0.3837, "step": 5061 }, { "epoch": 0.13967172178783666, "grad_norm": 0.0026310610119253397, "learning_rate": 0.001, "loss": 0.4212, "step": 5062 }, { "epoch": 0.13969931398890104, "grad_norm": 0.003709648735821247, "learning_rate": 0.001, "loss": 0.3849, "step": 5063 }, { "epoch": 0.13972690618996542, "grad_norm": 0.0026174660306423903, "learning_rate": 0.001, "loss": 0.4129, "step": 5064 }, { "epoch": 0.13975449839102977, "grad_norm": 0.002821024041622877, "learning_rate": 0.001, "loss": 0.4026, "step": 5065 }, { "epoch": 0.13978209059209415, "grad_norm": 0.0045610954985022545, "learning_rate": 0.001, "loss": 0.3604, "step": 5066 }, { "epoch": 0.1398096827931585, "grad_norm": 0.003461558138951659, "learning_rate": 0.001, "loss": 0.3955, "step": 5067 }, { "epoch": 0.13983727499422288, "grad_norm": 0.00443047983571887, "learning_rate": 0.001, "loss": 0.4274, "step": 5068 }, { "epoch": 0.13986486719528726, "grad_norm": 0.0035488642752170563, "learning_rate": 0.001, "loss": 0.4064, "step": 5069 }, { "epoch": 0.13989245939635161, "grad_norm": 0.005335607565939426, "learning_rate": 0.001, "loss": 0.4271, "step": 5070 }, { "epoch": 0.139920051597416, "grad_norm": 0.003680909052491188, "learning_rate": 0.001, "loss": 0.4332, "step": 5071 }, { "epoch": 0.13994764379848035, "grad_norm": 0.004192323889583349, "learning_rate": 0.001, "loss": 0.3989, "step": 5072 }, { "epoch": 0.13997523599954473, "grad_norm": 0.0035443026572465897, "learning_rate": 0.001, "loss": 0.4363, "step": 5073 }, { "epoch": 0.1400028282006091, "grad_norm": 0.0038134013302624226, "learning_rate": 0.001, "loss": 0.3794, "step": 5074 }, { "epoch": 0.14003042040167346, "grad_norm": 0.0036664907820522785, "learning_rate": 0.001, "loss": 0.428, "step": 5075 }, { "epoch": 0.14005801260273784, "grad_norm": 0.0024540217127650976, "learning_rate": 0.001, "loss": 0.3829, "step": 5076 }, { "epoch": 0.1400856048038022, "grad_norm": 0.0027097316924482584, "learning_rate": 0.001, "loss": 0.4205, "step": 5077 }, { "epoch": 0.14011319700486657, "grad_norm": 0.002922753570601344, "learning_rate": 0.001, "loss": 0.4021, "step": 5078 }, { "epoch": 0.14014078920593095, "grad_norm": 0.003050590632483363, "learning_rate": 0.001, "loss": 0.4488, "step": 5079 }, { "epoch": 0.1401683814069953, "grad_norm": 0.0027282421942800283, "learning_rate": 0.001, "loss": 0.406, "step": 5080 }, { "epoch": 0.1401959736080597, "grad_norm": 0.0027401361148804426, "learning_rate": 0.001, "loss": 0.4186, "step": 5081 }, { "epoch": 0.14022356580912404, "grad_norm": 0.006228750105947256, "learning_rate": 0.001, "loss": 0.3926, "step": 5082 }, { "epoch": 0.14025115801018842, "grad_norm": 0.002542686415836215, "learning_rate": 0.001, "loss": 0.453, "step": 5083 }, { "epoch": 0.1402787502112528, "grad_norm": 0.002365612657740712, "learning_rate": 0.001, "loss": 0.4089, "step": 5084 }, { "epoch": 0.14030634241231715, "grad_norm": 0.008088381960988045, "learning_rate": 0.001, "loss": 0.3808, "step": 5085 }, { "epoch": 0.14033393461338153, "grad_norm": 0.002785349264740944, "learning_rate": 0.001, "loss": 0.3973, "step": 5086 }, { "epoch": 0.14036152681444589, "grad_norm": 0.00489590922370553, "learning_rate": 0.001, "loss": 0.4241, "step": 5087 }, { "epoch": 0.14038911901551027, "grad_norm": 0.0027916128747165203, "learning_rate": 0.001, "loss": 0.3714, "step": 5088 }, { "epoch": 0.14041671121657465, "grad_norm": 0.002344654407352209, "learning_rate": 0.001, "loss": 0.388, "step": 5089 }, { "epoch": 0.140444303417639, "grad_norm": 0.0031531101558357477, "learning_rate": 0.001, "loss": 0.3841, "step": 5090 }, { "epoch": 0.14047189561870338, "grad_norm": 0.0027754863258451223, "learning_rate": 0.001, "loss": 0.3897, "step": 5091 }, { "epoch": 0.14049948781976773, "grad_norm": 0.003328294726088643, "learning_rate": 0.001, "loss": 0.4077, "step": 5092 }, { "epoch": 0.1405270800208321, "grad_norm": 0.0021852529607713223, "learning_rate": 0.001, "loss": 0.4113, "step": 5093 }, { "epoch": 0.1405546722218965, "grad_norm": 0.004019197542220354, "learning_rate": 0.001, "loss": 0.3825, "step": 5094 }, { "epoch": 0.14058226442296085, "grad_norm": 0.0024554249830543995, "learning_rate": 0.001, "loss": 0.4263, "step": 5095 }, { "epoch": 0.14060985662402523, "grad_norm": 0.0032448593992739916, "learning_rate": 0.001, "loss": 0.3924, "step": 5096 }, { "epoch": 0.14063744882508958, "grad_norm": 0.0027907416224479675, "learning_rate": 0.001, "loss": 0.4575, "step": 5097 }, { "epoch": 0.14066504102615396, "grad_norm": 0.006815130822360516, "learning_rate": 0.001, "loss": 0.401, "step": 5098 }, { "epoch": 0.14069263322721834, "grad_norm": 0.00370893650688231, "learning_rate": 0.001, "loss": 0.3962, "step": 5099 }, { "epoch": 0.1407202254282827, "grad_norm": 0.002180712530389428, "learning_rate": 0.001, "loss": 0.4123, "step": 5100 }, { "epoch": 0.14074781762934707, "grad_norm": 0.0035834074951708317, "learning_rate": 0.001, "loss": 0.3723, "step": 5101 }, { "epoch": 0.14077540983041142, "grad_norm": 0.003018912160769105, "learning_rate": 0.001, "loss": 0.4232, "step": 5102 }, { "epoch": 0.1408030020314758, "grad_norm": 0.002520001260563731, "learning_rate": 0.001, "loss": 0.3921, "step": 5103 }, { "epoch": 0.14083059423254018, "grad_norm": 0.00585381593555212, "learning_rate": 0.001, "loss": 0.4159, "step": 5104 }, { "epoch": 0.14085818643360454, "grad_norm": 0.0028747236356139183, "learning_rate": 0.001, "loss": 0.4045, "step": 5105 }, { "epoch": 0.14088577863466892, "grad_norm": 0.005821559578180313, "learning_rate": 0.001, "loss": 0.3755, "step": 5106 }, { "epoch": 0.14091337083573327, "grad_norm": 0.0025346369948238134, "learning_rate": 0.001, "loss": 0.399, "step": 5107 }, { "epoch": 0.14094096303679765, "grad_norm": 0.002172187902033329, "learning_rate": 0.001, "loss": 0.4051, "step": 5108 }, { "epoch": 0.14096855523786203, "grad_norm": 0.0038861907087266445, "learning_rate": 0.001, "loss": 0.4174, "step": 5109 }, { "epoch": 0.14099614743892638, "grad_norm": 0.0024652110878378153, "learning_rate": 0.001, "loss": 0.3963, "step": 5110 }, { "epoch": 0.14102373963999076, "grad_norm": 0.003532834816724062, "learning_rate": 0.001, "loss": 0.4111, "step": 5111 }, { "epoch": 0.14105133184105512, "grad_norm": 0.006292164325714111, "learning_rate": 0.001, "loss": 0.3828, "step": 5112 }, { "epoch": 0.1410789240421195, "grad_norm": 0.00268157827667892, "learning_rate": 0.001, "loss": 0.4055, "step": 5113 }, { "epoch": 0.14110651624318388, "grad_norm": 0.0035765564534813166, "learning_rate": 0.001, "loss": 0.3599, "step": 5114 }, { "epoch": 0.14113410844424823, "grad_norm": 0.0026933897752314806, "learning_rate": 0.001, "loss": 0.3904, "step": 5115 }, { "epoch": 0.1411617006453126, "grad_norm": 0.0043404726311564445, "learning_rate": 0.001, "loss": 0.4012, "step": 5116 }, { "epoch": 0.14118929284637696, "grad_norm": 0.0029684489127248526, "learning_rate": 0.001, "loss": 0.3667, "step": 5117 }, { "epoch": 0.14121688504744134, "grad_norm": 0.0028438603039830923, "learning_rate": 0.001, "loss": 0.3819, "step": 5118 }, { "epoch": 0.14124447724850572, "grad_norm": 0.002665071515366435, "learning_rate": 0.001, "loss": 0.3911, "step": 5119 }, { "epoch": 0.14127206944957008, "grad_norm": 0.0035951558966189623, "learning_rate": 0.001, "loss": 0.3738, "step": 5120 }, { "epoch": 0.14129966165063446, "grad_norm": 0.007121792994439602, "learning_rate": 0.001, "loss": 0.4219, "step": 5121 }, { "epoch": 0.1413272538516988, "grad_norm": 0.0026034133043140173, "learning_rate": 0.001, "loss": 0.4131, "step": 5122 }, { "epoch": 0.1413548460527632, "grad_norm": 0.004363723564893007, "learning_rate": 0.001, "loss": 0.4695, "step": 5123 }, { "epoch": 0.14138243825382757, "grad_norm": 0.002366288099437952, "learning_rate": 0.001, "loss": 0.4195, "step": 5124 }, { "epoch": 0.14141003045489192, "grad_norm": 0.002647766610607505, "learning_rate": 0.001, "loss": 0.4012, "step": 5125 }, { "epoch": 0.1414376226559563, "grad_norm": 0.002447773702442646, "learning_rate": 0.001, "loss": 0.4414, "step": 5126 }, { "epoch": 0.14146521485702065, "grad_norm": 0.0026955637149512768, "learning_rate": 0.001, "loss": 0.3644, "step": 5127 }, { "epoch": 0.14149280705808503, "grad_norm": 0.004317718092352152, "learning_rate": 0.001, "loss": 0.3877, "step": 5128 }, { "epoch": 0.1415203992591494, "grad_norm": 0.002862673718482256, "learning_rate": 0.001, "loss": 0.4218, "step": 5129 }, { "epoch": 0.14154799146021377, "grad_norm": 0.004444323480129242, "learning_rate": 0.001, "loss": 0.417, "step": 5130 }, { "epoch": 0.14157558366127815, "grad_norm": 0.008723779581487179, "learning_rate": 0.001, "loss": 0.4092, "step": 5131 }, { "epoch": 0.1416031758623425, "grad_norm": 0.0023829471319913864, "learning_rate": 0.001, "loss": 0.427, "step": 5132 }, { "epoch": 0.14163076806340688, "grad_norm": 0.0023019250947982073, "learning_rate": 0.001, "loss": 0.404, "step": 5133 }, { "epoch": 0.14165836026447123, "grad_norm": 0.002866733353585005, "learning_rate": 0.001, "loss": 0.4068, "step": 5134 }, { "epoch": 0.14168595246553561, "grad_norm": 0.002325895708054304, "learning_rate": 0.001, "loss": 0.3829, "step": 5135 }, { "epoch": 0.1417135446666, "grad_norm": 0.0031419056467711926, "learning_rate": 0.001, "loss": 0.394, "step": 5136 }, { "epoch": 0.14174113686766435, "grad_norm": 0.0049896384589374065, "learning_rate": 0.001, "loss": 0.4001, "step": 5137 }, { "epoch": 0.14176872906872873, "grad_norm": 0.006104922853410244, "learning_rate": 0.001, "loss": 0.3583, "step": 5138 }, { "epoch": 0.14179632126979308, "grad_norm": 0.005146206822246313, "learning_rate": 0.001, "loss": 0.3609, "step": 5139 }, { "epoch": 0.14182391347085746, "grad_norm": 0.0036859933752566576, "learning_rate": 0.001, "loss": 0.3677, "step": 5140 }, { "epoch": 0.14185150567192184, "grad_norm": 0.0021758072543889284, "learning_rate": 0.001, "loss": 0.401, "step": 5141 }, { "epoch": 0.1418790978729862, "grad_norm": 0.002568889642134309, "learning_rate": 0.001, "loss": 0.4083, "step": 5142 }, { "epoch": 0.14190669007405057, "grad_norm": 0.003079349873587489, "learning_rate": 0.001, "loss": 0.4044, "step": 5143 }, { "epoch": 0.14193428227511493, "grad_norm": 0.006028784904628992, "learning_rate": 0.001, "loss": 0.3697, "step": 5144 }, { "epoch": 0.1419618744761793, "grad_norm": 0.0028043135534971952, "learning_rate": 0.001, "loss": 0.3852, "step": 5145 }, { "epoch": 0.1419894666772437, "grad_norm": 0.0026649232022464275, "learning_rate": 0.001, "loss": 0.4122, "step": 5146 }, { "epoch": 0.14201705887830804, "grad_norm": 0.003915826324373484, "learning_rate": 0.001, "loss": 0.382, "step": 5147 }, { "epoch": 0.14204465107937242, "grad_norm": 0.0024036671966314316, "learning_rate": 0.001, "loss": 0.423, "step": 5148 }, { "epoch": 0.14207224328043677, "grad_norm": 0.0042000822722911835, "learning_rate": 0.001, "loss": 0.3781, "step": 5149 }, { "epoch": 0.14209983548150115, "grad_norm": 0.0055633410811424255, "learning_rate": 0.001, "loss": 0.4201, "step": 5150 }, { "epoch": 0.14212742768256553, "grad_norm": 0.0037763305008411407, "learning_rate": 0.001, "loss": 0.4247, "step": 5151 }, { "epoch": 0.14215501988362989, "grad_norm": 0.0034036405850201845, "learning_rate": 0.001, "loss": 0.399, "step": 5152 }, { "epoch": 0.14218261208469427, "grad_norm": 0.005322457291185856, "learning_rate": 0.001, "loss": 0.4153, "step": 5153 }, { "epoch": 0.14221020428575862, "grad_norm": 0.0037400966975837946, "learning_rate": 0.001, "loss": 0.4159, "step": 5154 }, { "epoch": 0.142237796486823, "grad_norm": 0.005500406958162785, "learning_rate": 0.001, "loss": 0.4355, "step": 5155 }, { "epoch": 0.14226538868788738, "grad_norm": 0.003691243240609765, "learning_rate": 0.001, "loss": 0.4085, "step": 5156 }, { "epoch": 0.14229298088895173, "grad_norm": 0.005320982076227665, "learning_rate": 0.001, "loss": 0.3824, "step": 5157 }, { "epoch": 0.1423205730900161, "grad_norm": 0.0033200359903275967, "learning_rate": 0.001, "loss": 0.4042, "step": 5158 }, { "epoch": 0.14234816529108046, "grad_norm": 0.0034416653215885162, "learning_rate": 0.001, "loss": 0.398, "step": 5159 }, { "epoch": 0.14237575749214484, "grad_norm": 0.0032915968913584948, "learning_rate": 0.001, "loss": 0.4318, "step": 5160 }, { "epoch": 0.14240334969320922, "grad_norm": 0.00513947568833828, "learning_rate": 0.001, "loss": 0.4042, "step": 5161 }, { "epoch": 0.14243094189427358, "grad_norm": 0.005058473441749811, "learning_rate": 0.001, "loss": 0.3942, "step": 5162 }, { "epoch": 0.14245853409533796, "grad_norm": 0.003918751142919064, "learning_rate": 0.001, "loss": 0.3813, "step": 5163 }, { "epoch": 0.1424861262964023, "grad_norm": 0.0029717017896473408, "learning_rate": 0.001, "loss": 0.4186, "step": 5164 }, { "epoch": 0.1425137184974667, "grad_norm": 0.004301862791180611, "learning_rate": 0.001, "loss": 0.3641, "step": 5165 }, { "epoch": 0.14254131069853107, "grad_norm": 0.0028698716778308153, "learning_rate": 0.001, "loss": 0.4313, "step": 5166 }, { "epoch": 0.14256890289959542, "grad_norm": 0.002715424867346883, "learning_rate": 0.001, "loss": 0.407, "step": 5167 }, { "epoch": 0.1425964951006598, "grad_norm": 0.002695337636396289, "learning_rate": 0.001, "loss": 0.403, "step": 5168 }, { "epoch": 0.14262408730172416, "grad_norm": 0.002396870171651244, "learning_rate": 0.001, "loss": 0.3978, "step": 5169 }, { "epoch": 0.14265167950278854, "grad_norm": 0.0023134411312639713, "learning_rate": 0.001, "loss": 0.405, "step": 5170 }, { "epoch": 0.14267927170385292, "grad_norm": 0.007215626537799835, "learning_rate": 0.001, "loss": 0.4436, "step": 5171 }, { "epoch": 0.14270686390491727, "grad_norm": 0.0030377882067114115, "learning_rate": 0.001, "loss": 0.3841, "step": 5172 }, { "epoch": 0.14273445610598165, "grad_norm": 0.0052809203043580055, "learning_rate": 0.001, "loss": 0.3806, "step": 5173 }, { "epoch": 0.142762048307046, "grad_norm": 0.0030719470232725143, "learning_rate": 0.001, "loss": 0.3556, "step": 5174 }, { "epoch": 0.14278964050811038, "grad_norm": 0.003323314245790243, "learning_rate": 0.001, "loss": 0.3951, "step": 5175 }, { "epoch": 0.14281723270917476, "grad_norm": 0.004299065563827753, "learning_rate": 0.001, "loss": 0.4111, "step": 5176 }, { "epoch": 0.14284482491023912, "grad_norm": 0.006395953707396984, "learning_rate": 0.001, "loss": 0.4018, "step": 5177 }, { "epoch": 0.1428724171113035, "grad_norm": 0.0036343904212117195, "learning_rate": 0.001, "loss": 0.3925, "step": 5178 }, { "epoch": 0.14290000931236785, "grad_norm": 0.003354752203449607, "learning_rate": 0.001, "loss": 0.4022, "step": 5179 }, { "epoch": 0.14292760151343223, "grad_norm": 0.0029036353807896376, "learning_rate": 0.001, "loss": 0.4085, "step": 5180 }, { "epoch": 0.1429551937144966, "grad_norm": 0.0033736219629645348, "learning_rate": 0.001, "loss": 0.3972, "step": 5181 }, { "epoch": 0.14298278591556096, "grad_norm": 0.00228399527259171, "learning_rate": 0.001, "loss": 0.4349, "step": 5182 }, { "epoch": 0.14301037811662534, "grad_norm": 0.0025460305623710155, "learning_rate": 0.001, "loss": 0.3771, "step": 5183 }, { "epoch": 0.1430379703176897, "grad_norm": 0.0031551956199109554, "learning_rate": 0.001, "loss": 0.3833, "step": 5184 }, { "epoch": 0.14306556251875407, "grad_norm": 0.003111305646598339, "learning_rate": 0.001, "loss": 0.399, "step": 5185 }, { "epoch": 0.14309315471981846, "grad_norm": 0.0028325961902737617, "learning_rate": 0.001, "loss": 0.4014, "step": 5186 }, { "epoch": 0.1431207469208828, "grad_norm": 0.0034688191954046488, "learning_rate": 0.001, "loss": 0.4025, "step": 5187 }, { "epoch": 0.1431483391219472, "grad_norm": 0.0022599126677960157, "learning_rate": 0.001, "loss": 0.4351, "step": 5188 }, { "epoch": 0.14317593132301154, "grad_norm": 0.004831294994801283, "learning_rate": 0.001, "loss": 0.4004, "step": 5189 }, { "epoch": 0.14320352352407592, "grad_norm": 0.0029807155951857567, "learning_rate": 0.001, "loss": 0.3886, "step": 5190 }, { "epoch": 0.1432311157251403, "grad_norm": 0.003226724686101079, "learning_rate": 0.001, "loss": 0.4137, "step": 5191 }, { "epoch": 0.14325870792620465, "grad_norm": 0.0025674651842564344, "learning_rate": 0.001, "loss": 0.4403, "step": 5192 }, { "epoch": 0.14328630012726903, "grad_norm": 0.002636546269059181, "learning_rate": 0.001, "loss": 0.4202, "step": 5193 }, { "epoch": 0.1433138923283334, "grad_norm": 0.003959611523896456, "learning_rate": 0.001, "loss": 0.4153, "step": 5194 }, { "epoch": 0.14334148452939777, "grad_norm": 0.00245193880982697, "learning_rate": 0.001, "loss": 0.4404, "step": 5195 }, { "epoch": 0.14336907673046215, "grad_norm": 0.0033164939377456903, "learning_rate": 0.001, "loss": 0.384, "step": 5196 }, { "epoch": 0.1433966689315265, "grad_norm": 0.0031362988520413637, "learning_rate": 0.001, "loss": 0.3822, "step": 5197 }, { "epoch": 0.14342426113259088, "grad_norm": 0.0031361919827759266, "learning_rate": 0.001, "loss": 0.387, "step": 5198 }, { "epoch": 0.14345185333365523, "grad_norm": 0.0025203994009643793, "learning_rate": 0.001, "loss": 0.4244, "step": 5199 }, { "epoch": 0.1434794455347196, "grad_norm": 0.0021688216365873814, "learning_rate": 0.001, "loss": 0.3967, "step": 5200 }, { "epoch": 0.143507037735784, "grad_norm": 0.0022967627737671137, "learning_rate": 0.001, "loss": 0.443, "step": 5201 }, { "epoch": 0.14353462993684835, "grad_norm": 0.002367371693253517, "learning_rate": 0.001, "loss": 0.4249, "step": 5202 }, { "epoch": 0.14356222213791273, "grad_norm": 0.002286283066496253, "learning_rate": 0.001, "loss": 0.381, "step": 5203 }, { "epoch": 0.14358981433897708, "grad_norm": 0.002556881867349148, "learning_rate": 0.001, "loss": 0.4153, "step": 5204 }, { "epoch": 0.14361740654004146, "grad_norm": 0.002457389608025551, "learning_rate": 0.001, "loss": 0.4515, "step": 5205 }, { "epoch": 0.14364499874110584, "grad_norm": 0.0026917587965726852, "learning_rate": 0.001, "loss": 0.4123, "step": 5206 }, { "epoch": 0.1436725909421702, "grad_norm": 0.0024027330800890923, "learning_rate": 0.001, "loss": 0.4429, "step": 5207 }, { "epoch": 0.14370018314323457, "grad_norm": 0.0036219994071871042, "learning_rate": 0.001, "loss": 0.3975, "step": 5208 }, { "epoch": 0.14372777534429892, "grad_norm": 0.0024791264440864325, "learning_rate": 0.001, "loss": 0.3944, "step": 5209 }, { "epoch": 0.1437553675453633, "grad_norm": 0.002413226757198572, "learning_rate": 0.001, "loss": 0.4413, "step": 5210 }, { "epoch": 0.14378295974642769, "grad_norm": 0.0027174693532288074, "learning_rate": 0.001, "loss": 0.3878, "step": 5211 }, { "epoch": 0.14381055194749204, "grad_norm": 0.002803023438900709, "learning_rate": 0.001, "loss": 0.3682, "step": 5212 }, { "epoch": 0.14383814414855642, "grad_norm": 0.004972567781805992, "learning_rate": 0.001, "loss": 0.4127, "step": 5213 }, { "epoch": 0.14386573634962077, "grad_norm": 0.003360881470143795, "learning_rate": 0.001, "loss": 0.414, "step": 5214 }, { "epoch": 0.14389332855068515, "grad_norm": 0.0027532910462468863, "learning_rate": 0.001, "loss": 0.3699, "step": 5215 }, { "epoch": 0.14392092075174953, "grad_norm": 0.003589104162529111, "learning_rate": 0.001, "loss": 0.3631, "step": 5216 }, { "epoch": 0.14394851295281388, "grad_norm": 0.004771512001752853, "learning_rate": 0.001, "loss": 0.3912, "step": 5217 }, { "epoch": 0.14397610515387826, "grad_norm": 0.0026405095122754574, "learning_rate": 0.001, "loss": 0.3975, "step": 5218 }, { "epoch": 0.14400369735494262, "grad_norm": 0.004283549264073372, "learning_rate": 0.001, "loss": 0.3746, "step": 5219 }, { "epoch": 0.144031289556007, "grad_norm": 0.0045742373913526535, "learning_rate": 0.001, "loss": 0.3972, "step": 5220 }, { "epoch": 0.14405888175707138, "grad_norm": 0.0042295148596167564, "learning_rate": 0.001, "loss": 0.392, "step": 5221 }, { "epoch": 0.14408647395813573, "grad_norm": 0.004257339984178543, "learning_rate": 0.001, "loss": 0.42, "step": 5222 }, { "epoch": 0.1441140661592001, "grad_norm": 0.002708716783672571, "learning_rate": 0.001, "loss": 0.4022, "step": 5223 }, { "epoch": 0.14414165836026446, "grad_norm": 0.004137181676924229, "learning_rate": 0.001, "loss": 0.4084, "step": 5224 }, { "epoch": 0.14416925056132884, "grad_norm": 0.005688484758138657, "learning_rate": 0.001, "loss": 0.4089, "step": 5225 }, { "epoch": 0.1441968427623932, "grad_norm": 0.008182493969798088, "learning_rate": 0.001, "loss": 0.4253, "step": 5226 }, { "epoch": 0.14422443496345758, "grad_norm": 0.003255674382671714, "learning_rate": 0.001, "loss": 0.4268, "step": 5227 }, { "epoch": 0.14425202716452196, "grad_norm": 0.0024103031028062105, "learning_rate": 0.001, "loss": 0.4263, "step": 5228 }, { "epoch": 0.1442796193655863, "grad_norm": 0.0030367637518793344, "learning_rate": 0.001, "loss": 0.4082, "step": 5229 }, { "epoch": 0.1443072115666507, "grad_norm": 0.004183416720479727, "learning_rate": 0.001, "loss": 0.42, "step": 5230 }, { "epoch": 0.14433480376771504, "grad_norm": 0.002727340441197157, "learning_rate": 0.001, "loss": 0.3887, "step": 5231 }, { "epoch": 0.14436239596877942, "grad_norm": 0.003591054119169712, "learning_rate": 0.001, "loss": 0.3937, "step": 5232 }, { "epoch": 0.1443899881698438, "grad_norm": 0.0028464680071920156, "learning_rate": 0.001, "loss": 0.359, "step": 5233 }, { "epoch": 0.14441758037090816, "grad_norm": 0.006335907150059938, "learning_rate": 0.001, "loss": 0.3894, "step": 5234 }, { "epoch": 0.14444517257197254, "grad_norm": 0.0027192372363060713, "learning_rate": 0.001, "loss": 0.4119, "step": 5235 }, { "epoch": 0.1444727647730369, "grad_norm": 0.003246571170166135, "learning_rate": 0.001, "loss": 0.385, "step": 5236 }, { "epoch": 0.14450035697410127, "grad_norm": 0.0024179634638130665, "learning_rate": 0.001, "loss": 0.4123, "step": 5237 }, { "epoch": 0.14452794917516565, "grad_norm": 0.006938959006220102, "learning_rate": 0.001, "loss": 0.4152, "step": 5238 }, { "epoch": 0.14455554137623, "grad_norm": 0.004923888016492128, "learning_rate": 0.001, "loss": 0.4235, "step": 5239 }, { "epoch": 0.14458313357729438, "grad_norm": 0.0031706364825367928, "learning_rate": 0.001, "loss": 0.3494, "step": 5240 }, { "epoch": 0.14461072577835873, "grad_norm": 0.002737032249569893, "learning_rate": 0.001, "loss": 0.4248, "step": 5241 }, { "epoch": 0.14463831797942311, "grad_norm": 0.0025771090295165777, "learning_rate": 0.001, "loss": 0.4101, "step": 5242 }, { "epoch": 0.1446659101804875, "grad_norm": 0.0030646645464003086, "learning_rate": 0.001, "loss": 0.3956, "step": 5243 }, { "epoch": 0.14469350238155185, "grad_norm": 0.0036759376525878906, "learning_rate": 0.001, "loss": 0.3846, "step": 5244 }, { "epoch": 0.14472109458261623, "grad_norm": 0.0030812059994786978, "learning_rate": 0.001, "loss": 0.4069, "step": 5245 }, { "epoch": 0.14474868678368058, "grad_norm": 0.0022093786392360926, "learning_rate": 0.001, "loss": 0.4016, "step": 5246 }, { "epoch": 0.14477627898474496, "grad_norm": 0.009639604948461056, "learning_rate": 0.001, "loss": 0.4283, "step": 5247 }, { "epoch": 0.14480387118580934, "grad_norm": 0.0032258466817438602, "learning_rate": 0.001, "loss": 0.4242, "step": 5248 }, { "epoch": 0.1448314633868737, "grad_norm": 0.0022797686979174614, "learning_rate": 0.001, "loss": 0.4, "step": 5249 }, { "epoch": 0.14485905558793807, "grad_norm": 0.004146902356296778, "learning_rate": 0.001, "loss": 0.3939, "step": 5250 }, { "epoch": 0.14488664778900243, "grad_norm": 0.006115823984146118, "learning_rate": 0.001, "loss": 0.4382, "step": 5251 }, { "epoch": 0.1449142399900668, "grad_norm": 0.007470074575394392, "learning_rate": 0.001, "loss": 0.4258, "step": 5252 }, { "epoch": 0.1449418321911312, "grad_norm": 0.005569383502006531, "learning_rate": 0.001, "loss": 0.4357, "step": 5253 }, { "epoch": 0.14496942439219554, "grad_norm": 0.004871412180364132, "learning_rate": 0.001, "loss": 0.4048, "step": 5254 }, { "epoch": 0.14499701659325992, "grad_norm": 0.0038668601773679256, "learning_rate": 0.001, "loss": 0.3764, "step": 5255 }, { "epoch": 0.14502460879432427, "grad_norm": 0.00738053023815155, "learning_rate": 0.001, "loss": 0.4162, "step": 5256 }, { "epoch": 0.14505220099538865, "grad_norm": 0.004165946505963802, "learning_rate": 0.001, "loss": 0.3671, "step": 5257 }, { "epoch": 0.14507979319645303, "grad_norm": 0.004606361500918865, "learning_rate": 0.001, "loss": 0.3856, "step": 5258 }, { "epoch": 0.14510738539751739, "grad_norm": 0.002536054700613022, "learning_rate": 0.001, "loss": 0.4243, "step": 5259 }, { "epoch": 0.14513497759858177, "grad_norm": 0.005060167983174324, "learning_rate": 0.001, "loss": 0.4091, "step": 5260 }, { "epoch": 0.14516256979964612, "grad_norm": 0.004944161977618933, "learning_rate": 0.001, "loss": 0.3806, "step": 5261 }, { "epoch": 0.1451901620007105, "grad_norm": 0.0029536515939980745, "learning_rate": 0.001, "loss": 0.3705, "step": 5262 }, { "epoch": 0.14521775420177488, "grad_norm": 0.0047346302308142185, "learning_rate": 0.001, "loss": 0.3741, "step": 5263 }, { "epoch": 0.14524534640283923, "grad_norm": 0.0024177066516131163, "learning_rate": 0.001, "loss": 0.4059, "step": 5264 }, { "epoch": 0.1452729386039036, "grad_norm": 0.0036898923572152853, "learning_rate": 0.001, "loss": 0.4304, "step": 5265 }, { "epoch": 0.14530053080496796, "grad_norm": 0.0027802560944110155, "learning_rate": 0.001, "loss": 0.3662, "step": 5266 }, { "epoch": 0.14532812300603234, "grad_norm": 0.0035621714778244495, "learning_rate": 0.001, "loss": 0.4256, "step": 5267 }, { "epoch": 0.14535571520709673, "grad_norm": 0.004401834215968847, "learning_rate": 0.001, "loss": 0.427, "step": 5268 }, { "epoch": 0.14538330740816108, "grad_norm": 0.0035394374281167984, "learning_rate": 0.001, "loss": 0.4051, "step": 5269 }, { "epoch": 0.14541089960922546, "grad_norm": 0.017217174172401428, "learning_rate": 0.001, "loss": 0.3675, "step": 5270 }, { "epoch": 0.1454384918102898, "grad_norm": 0.0022773677483201027, "learning_rate": 0.001, "loss": 0.384, "step": 5271 }, { "epoch": 0.1454660840113542, "grad_norm": 0.002403079532086849, "learning_rate": 0.001, "loss": 0.4004, "step": 5272 }, { "epoch": 0.14549367621241857, "grad_norm": 0.0033804429695010185, "learning_rate": 0.001, "loss": 0.3854, "step": 5273 }, { "epoch": 0.14552126841348292, "grad_norm": 0.002910598646849394, "learning_rate": 0.001, "loss": 0.3705, "step": 5274 }, { "epoch": 0.1455488606145473, "grad_norm": 0.008523901924490929, "learning_rate": 0.001, "loss": 0.4028, "step": 5275 }, { "epoch": 0.14557645281561166, "grad_norm": 0.003371842671185732, "learning_rate": 0.001, "loss": 0.3933, "step": 5276 }, { "epoch": 0.14560404501667604, "grad_norm": 0.009514998644590378, "learning_rate": 0.001, "loss": 0.38, "step": 5277 }, { "epoch": 0.14563163721774042, "grad_norm": 0.002840045839548111, "learning_rate": 0.001, "loss": 0.3884, "step": 5278 }, { "epoch": 0.14565922941880477, "grad_norm": 0.0025841807946562767, "learning_rate": 0.001, "loss": 0.4236, "step": 5279 }, { "epoch": 0.14568682161986915, "grad_norm": 0.003922698087990284, "learning_rate": 0.001, "loss": 0.3799, "step": 5280 }, { "epoch": 0.1457144138209335, "grad_norm": 0.004982591141015291, "learning_rate": 0.001, "loss": 0.3799, "step": 5281 }, { "epoch": 0.14574200602199788, "grad_norm": 0.003860799828544259, "learning_rate": 0.001, "loss": 0.3455, "step": 5282 }, { "epoch": 0.14576959822306226, "grad_norm": 0.003407267387956381, "learning_rate": 0.001, "loss": 0.3778, "step": 5283 }, { "epoch": 0.14579719042412662, "grad_norm": 0.003815464908257127, "learning_rate": 0.001, "loss": 0.4121, "step": 5284 }, { "epoch": 0.145824782625191, "grad_norm": 0.002572751371189952, "learning_rate": 0.001, "loss": 0.4426, "step": 5285 }, { "epoch": 0.14585237482625535, "grad_norm": 0.0026371986605226994, "learning_rate": 0.001, "loss": 0.4126, "step": 5286 }, { "epoch": 0.14587996702731973, "grad_norm": 0.0028192142490297556, "learning_rate": 0.001, "loss": 0.3687, "step": 5287 }, { "epoch": 0.1459075592283841, "grad_norm": 0.0033562618773430586, "learning_rate": 0.001, "loss": 0.384, "step": 5288 }, { "epoch": 0.14593515142944846, "grad_norm": 0.0029199314303696156, "learning_rate": 0.001, "loss": 0.4294, "step": 5289 }, { "epoch": 0.14596274363051284, "grad_norm": 0.0029893077444285154, "learning_rate": 0.001, "loss": 0.3899, "step": 5290 }, { "epoch": 0.1459903358315772, "grad_norm": 0.002518982160836458, "learning_rate": 0.001, "loss": 0.3975, "step": 5291 }, { "epoch": 0.14601792803264158, "grad_norm": 0.0025784457102417946, "learning_rate": 0.001, "loss": 0.3852, "step": 5292 }, { "epoch": 0.14604552023370596, "grad_norm": 0.002763275755569339, "learning_rate": 0.001, "loss": 0.3677, "step": 5293 }, { "epoch": 0.1460731124347703, "grad_norm": 0.0020732074044644833, "learning_rate": 0.001, "loss": 0.38, "step": 5294 }, { "epoch": 0.1461007046358347, "grad_norm": 0.002579174702987075, "learning_rate": 0.001, "loss": 0.3737, "step": 5295 }, { "epoch": 0.14612829683689904, "grad_norm": 0.0026421458460390568, "learning_rate": 0.001, "loss": 0.4119, "step": 5296 }, { "epoch": 0.14615588903796342, "grad_norm": 0.0025588644202798605, "learning_rate": 0.001, "loss": 0.3795, "step": 5297 }, { "epoch": 0.1461834812390278, "grad_norm": 0.002680868376046419, "learning_rate": 0.001, "loss": 0.3794, "step": 5298 }, { "epoch": 0.14621107344009215, "grad_norm": 0.002638003323227167, "learning_rate": 0.001, "loss": 0.372, "step": 5299 }, { "epoch": 0.14623866564115653, "grad_norm": 0.002944410778582096, "learning_rate": 0.001, "loss": 0.3894, "step": 5300 }, { "epoch": 0.1462662578422209, "grad_norm": 0.0032715783454477787, "learning_rate": 0.001, "loss": 0.4187, "step": 5301 }, { "epoch": 0.14629385004328527, "grad_norm": 0.0038660853169858456, "learning_rate": 0.001, "loss": 0.3874, "step": 5302 }, { "epoch": 0.14632144224434965, "grad_norm": 0.0023569557815790176, "learning_rate": 0.001, "loss": 0.399, "step": 5303 }, { "epoch": 0.146349034445414, "grad_norm": 0.01049228198826313, "learning_rate": 0.001, "loss": 0.3707, "step": 5304 }, { "epoch": 0.14637662664647838, "grad_norm": 0.0023562125861644745, "learning_rate": 0.001, "loss": 0.4398, "step": 5305 }, { "epoch": 0.14640421884754273, "grad_norm": 0.002021912718191743, "learning_rate": 0.001, "loss": 0.4416, "step": 5306 }, { "epoch": 0.1464318110486071, "grad_norm": 0.002516190754249692, "learning_rate": 0.001, "loss": 0.4164, "step": 5307 }, { "epoch": 0.1464594032496715, "grad_norm": 0.0023932051844894886, "learning_rate": 0.001, "loss": 0.3724, "step": 5308 }, { "epoch": 0.14648699545073585, "grad_norm": 0.0037087369710206985, "learning_rate": 0.001, "loss": 0.3692, "step": 5309 }, { "epoch": 0.14651458765180023, "grad_norm": 0.002698789816349745, "learning_rate": 0.001, "loss": 0.4025, "step": 5310 }, { "epoch": 0.14654217985286458, "grad_norm": 0.0025848429650068283, "learning_rate": 0.001, "loss": 0.379, "step": 5311 }, { "epoch": 0.14656977205392896, "grad_norm": 0.00317647447809577, "learning_rate": 0.001, "loss": 0.3865, "step": 5312 }, { "epoch": 0.14659736425499334, "grad_norm": 0.0027970026712864637, "learning_rate": 0.001, "loss": 0.3687, "step": 5313 }, { "epoch": 0.1466249564560577, "grad_norm": 0.0031060813926160336, "learning_rate": 0.001, "loss": 0.3773, "step": 5314 }, { "epoch": 0.14665254865712207, "grad_norm": 0.004864480346441269, "learning_rate": 0.001, "loss": 0.4137, "step": 5315 }, { "epoch": 0.14668014085818643, "grad_norm": 0.0022926428355276585, "learning_rate": 0.001, "loss": 0.4042, "step": 5316 }, { "epoch": 0.1467077330592508, "grad_norm": 0.0029684528708457947, "learning_rate": 0.001, "loss": 0.3917, "step": 5317 }, { "epoch": 0.14673532526031516, "grad_norm": 0.0022980044595897198, "learning_rate": 0.001, "loss": 0.4402, "step": 5318 }, { "epoch": 0.14676291746137954, "grad_norm": 0.0021655671298503876, "learning_rate": 0.001, "loss": 0.4054, "step": 5319 }, { "epoch": 0.14679050966244392, "grad_norm": 0.002639767248183489, "learning_rate": 0.001, "loss": 0.3838, "step": 5320 }, { "epoch": 0.14681810186350827, "grad_norm": 0.005498727783560753, "learning_rate": 0.001, "loss": 0.3696, "step": 5321 }, { "epoch": 0.14684569406457265, "grad_norm": 0.0033152250107377768, "learning_rate": 0.001, "loss": 0.4278, "step": 5322 }, { "epoch": 0.146873286265637, "grad_norm": 0.004971036221832037, "learning_rate": 0.001, "loss": 0.3751, "step": 5323 }, { "epoch": 0.14690087846670138, "grad_norm": 0.0024375154171139, "learning_rate": 0.001, "loss": 0.3895, "step": 5324 }, { "epoch": 0.14692847066776576, "grad_norm": 0.002927098423242569, "learning_rate": 0.001, "loss": 0.3463, "step": 5325 }, { "epoch": 0.14695606286883012, "grad_norm": 0.0036146354395896196, "learning_rate": 0.001, "loss": 0.3996, "step": 5326 }, { "epoch": 0.1469836550698945, "grad_norm": 0.0024644196964800358, "learning_rate": 0.001, "loss": 0.4235, "step": 5327 }, { "epoch": 0.14701124727095885, "grad_norm": 0.0025911214761435986, "learning_rate": 0.001, "loss": 0.4279, "step": 5328 }, { "epoch": 0.14703883947202323, "grad_norm": 0.0025596728082746267, "learning_rate": 0.001, "loss": 0.3997, "step": 5329 }, { "epoch": 0.1470664316730876, "grad_norm": 0.002164337085559964, "learning_rate": 0.001, "loss": 0.416, "step": 5330 }, { "epoch": 0.14709402387415196, "grad_norm": 0.002624908462166786, "learning_rate": 0.001, "loss": 0.4399, "step": 5331 }, { "epoch": 0.14712161607521634, "grad_norm": 0.003358762711286545, "learning_rate": 0.001, "loss": 0.3718, "step": 5332 }, { "epoch": 0.1471492082762807, "grad_norm": 0.008317602798342705, "learning_rate": 0.001, "loss": 0.3849, "step": 5333 }, { "epoch": 0.14717680047734508, "grad_norm": 0.002502370160073042, "learning_rate": 0.001, "loss": 0.4124, "step": 5334 }, { "epoch": 0.14720439267840946, "grad_norm": 0.002517363289371133, "learning_rate": 0.001, "loss": 0.4155, "step": 5335 }, { "epoch": 0.1472319848794738, "grad_norm": 0.003344867378473282, "learning_rate": 0.001, "loss": 0.384, "step": 5336 }, { "epoch": 0.1472595770805382, "grad_norm": 0.003851872170343995, "learning_rate": 0.001, "loss": 0.3729, "step": 5337 }, { "epoch": 0.14728716928160254, "grad_norm": 0.002437508897855878, "learning_rate": 0.001, "loss": 0.4154, "step": 5338 }, { "epoch": 0.14731476148266692, "grad_norm": 0.0034037036821246147, "learning_rate": 0.001, "loss": 0.3994, "step": 5339 }, { "epoch": 0.1473423536837313, "grad_norm": 0.0027613432612270117, "learning_rate": 0.001, "loss": 0.4394, "step": 5340 }, { "epoch": 0.14736994588479566, "grad_norm": 0.002495642751455307, "learning_rate": 0.001, "loss": 0.4221, "step": 5341 }, { "epoch": 0.14739753808586004, "grad_norm": 0.0029324067290872335, "learning_rate": 0.001, "loss": 0.3461, "step": 5342 }, { "epoch": 0.1474251302869244, "grad_norm": 0.003891981905326247, "learning_rate": 0.001, "loss": 0.3634, "step": 5343 }, { "epoch": 0.14745272248798877, "grad_norm": 0.005491616670042276, "learning_rate": 0.001, "loss": 0.3875, "step": 5344 }, { "epoch": 0.14748031468905315, "grad_norm": 0.0028310040943324566, "learning_rate": 0.001, "loss": 0.3742, "step": 5345 }, { "epoch": 0.1475079068901175, "grad_norm": 0.0035124190617352724, "learning_rate": 0.001, "loss": 0.4099, "step": 5346 }, { "epoch": 0.14753549909118188, "grad_norm": 0.003958097193390131, "learning_rate": 0.001, "loss": 0.4097, "step": 5347 }, { "epoch": 0.14756309129224623, "grad_norm": 0.0031724879518151283, "learning_rate": 0.001, "loss": 0.3901, "step": 5348 }, { "epoch": 0.14759068349331061, "grad_norm": 0.006823610980063677, "learning_rate": 0.001, "loss": 0.3982, "step": 5349 }, { "epoch": 0.147618275694375, "grad_norm": 0.0023501457180827856, "learning_rate": 0.001, "loss": 0.3836, "step": 5350 }, { "epoch": 0.14764586789543935, "grad_norm": 0.0020646711345762014, "learning_rate": 0.001, "loss": 0.4265, "step": 5351 }, { "epoch": 0.14767346009650373, "grad_norm": 0.002760658971965313, "learning_rate": 0.001, "loss": 0.4048, "step": 5352 }, { "epoch": 0.14770105229756808, "grad_norm": 0.0029554900247603655, "learning_rate": 0.001, "loss": 0.4108, "step": 5353 }, { "epoch": 0.14772864449863246, "grad_norm": 0.0038962808903306723, "learning_rate": 0.001, "loss": 0.4192, "step": 5354 }, { "epoch": 0.14775623669969684, "grad_norm": 0.002745499601587653, "learning_rate": 0.001, "loss": 0.3895, "step": 5355 }, { "epoch": 0.1477838289007612, "grad_norm": 0.004177890717983246, "learning_rate": 0.001, "loss": 0.3632, "step": 5356 }, { "epoch": 0.14781142110182557, "grad_norm": 0.002319454913958907, "learning_rate": 0.001, "loss": 0.4225, "step": 5357 }, { "epoch": 0.14783901330288993, "grad_norm": 0.0026902640238404274, "learning_rate": 0.001, "loss": 0.4026, "step": 5358 }, { "epoch": 0.1478666055039543, "grad_norm": 0.003064702032133937, "learning_rate": 0.001, "loss": 0.4165, "step": 5359 }, { "epoch": 0.1478941977050187, "grad_norm": 0.003410772420465946, "learning_rate": 0.001, "loss": 0.3601, "step": 5360 }, { "epoch": 0.14792178990608304, "grad_norm": 0.0030657979659736156, "learning_rate": 0.001, "loss": 0.4095, "step": 5361 }, { "epoch": 0.14794938210714742, "grad_norm": 0.008095750585198402, "learning_rate": 0.001, "loss": 0.4101, "step": 5362 }, { "epoch": 0.14797697430821177, "grad_norm": 0.004853304475545883, "learning_rate": 0.001, "loss": 0.404, "step": 5363 }, { "epoch": 0.14800456650927615, "grad_norm": 0.0032035887707024813, "learning_rate": 0.001, "loss": 0.4001, "step": 5364 }, { "epoch": 0.14803215871034053, "grad_norm": 0.004527990240603685, "learning_rate": 0.001, "loss": 0.3729, "step": 5365 }, { "epoch": 0.14805975091140489, "grad_norm": 0.006700141355395317, "learning_rate": 0.001, "loss": 0.375, "step": 5366 }, { "epoch": 0.14808734311246927, "grad_norm": 0.00633378978818655, "learning_rate": 0.001, "loss": 0.4253, "step": 5367 }, { "epoch": 0.14811493531353362, "grad_norm": 0.0023305609356611967, "learning_rate": 0.001, "loss": 0.4106, "step": 5368 }, { "epoch": 0.148142527514598, "grad_norm": 0.0034147268161177635, "learning_rate": 0.001, "loss": 0.3556, "step": 5369 }, { "epoch": 0.14817011971566238, "grad_norm": 0.003086140612140298, "learning_rate": 0.001, "loss": 0.3958, "step": 5370 }, { "epoch": 0.14819771191672673, "grad_norm": 0.0061439163982868195, "learning_rate": 0.001, "loss": 0.3608, "step": 5371 }, { "epoch": 0.1482253041177911, "grad_norm": 0.0025781714357435703, "learning_rate": 0.001, "loss": 0.3895, "step": 5372 }, { "epoch": 0.14825289631885546, "grad_norm": 0.0026104720309376717, "learning_rate": 0.001, "loss": 0.427, "step": 5373 }, { "epoch": 0.14828048851991985, "grad_norm": 0.0037834334652870893, "learning_rate": 0.001, "loss": 0.391, "step": 5374 }, { "epoch": 0.14830808072098423, "grad_norm": 0.005206381902098656, "learning_rate": 0.001, "loss": 0.3974, "step": 5375 }, { "epoch": 0.14833567292204858, "grad_norm": 0.002475143875926733, "learning_rate": 0.001, "loss": 0.4218, "step": 5376 }, { "epoch": 0.14836326512311296, "grad_norm": 0.005634800065308809, "learning_rate": 0.001, "loss": 0.388, "step": 5377 }, { "epoch": 0.1483908573241773, "grad_norm": 0.004982566460967064, "learning_rate": 0.001, "loss": 0.4025, "step": 5378 }, { "epoch": 0.1484184495252417, "grad_norm": 0.0032306865323334932, "learning_rate": 0.001, "loss": 0.4247, "step": 5379 }, { "epoch": 0.14844604172630607, "grad_norm": 0.0074884905479848385, "learning_rate": 0.001, "loss": 0.405, "step": 5380 }, { "epoch": 0.14847363392737042, "grad_norm": 0.003231652779504657, "learning_rate": 0.001, "loss": 0.4048, "step": 5381 }, { "epoch": 0.1485012261284348, "grad_norm": 0.004374918062239885, "learning_rate": 0.001, "loss": 0.3829, "step": 5382 }, { "epoch": 0.14852881832949916, "grad_norm": 0.003996079787611961, "learning_rate": 0.001, "loss": 0.3606, "step": 5383 }, { "epoch": 0.14855641053056354, "grad_norm": 0.0023612806107848883, "learning_rate": 0.001, "loss": 0.4327, "step": 5384 }, { "epoch": 0.14858400273162792, "grad_norm": 0.002323776250705123, "learning_rate": 0.001, "loss": 0.3969, "step": 5385 }, { "epoch": 0.14861159493269227, "grad_norm": 0.003044640878215432, "learning_rate": 0.001, "loss": 0.4175, "step": 5386 }, { "epoch": 0.14863918713375665, "grad_norm": 0.0029183158185333014, "learning_rate": 0.001, "loss": 0.3853, "step": 5387 }, { "epoch": 0.148666779334821, "grad_norm": 0.0031076192390173674, "learning_rate": 0.001, "loss": 0.4054, "step": 5388 }, { "epoch": 0.14869437153588538, "grad_norm": 0.0032536948565393686, "learning_rate": 0.001, "loss": 0.4417, "step": 5389 }, { "epoch": 0.14872196373694976, "grad_norm": 0.0028777210973203182, "learning_rate": 0.001, "loss": 0.4035, "step": 5390 }, { "epoch": 0.14874955593801412, "grad_norm": 0.003044008044525981, "learning_rate": 0.001, "loss": 0.4017, "step": 5391 }, { "epoch": 0.1487771481390785, "grad_norm": 0.004723208025097847, "learning_rate": 0.001, "loss": 0.3847, "step": 5392 }, { "epoch": 0.14880474034014285, "grad_norm": 0.003199602710083127, "learning_rate": 0.001, "loss": 0.4212, "step": 5393 }, { "epoch": 0.14883233254120723, "grad_norm": 0.0029654945246875286, "learning_rate": 0.001, "loss": 0.4177, "step": 5394 }, { "epoch": 0.1488599247422716, "grad_norm": 0.003065300639718771, "learning_rate": 0.001, "loss": 0.4206, "step": 5395 }, { "epoch": 0.14888751694333596, "grad_norm": 0.003566544270142913, "learning_rate": 0.001, "loss": 0.3763, "step": 5396 }, { "epoch": 0.14891510914440034, "grad_norm": 0.0034122609067708254, "learning_rate": 0.001, "loss": 0.4046, "step": 5397 }, { "epoch": 0.1489427013454647, "grad_norm": 0.0025029098615050316, "learning_rate": 0.001, "loss": 0.3565, "step": 5398 }, { "epoch": 0.14897029354652908, "grad_norm": 0.0030023674480617046, "learning_rate": 0.001, "loss": 0.3885, "step": 5399 }, { "epoch": 0.14899788574759346, "grad_norm": 0.00424955366179347, "learning_rate": 0.001, "loss": 0.3888, "step": 5400 }, { "epoch": 0.1490254779486578, "grad_norm": 0.0030338119249790907, "learning_rate": 0.001, "loss": 0.3927, "step": 5401 }, { "epoch": 0.1490530701497222, "grad_norm": 0.0028448712546378374, "learning_rate": 0.001, "loss": 0.432, "step": 5402 }, { "epoch": 0.14908066235078654, "grad_norm": 0.0028374232351779938, "learning_rate": 0.001, "loss": 0.4157, "step": 5403 }, { "epoch": 0.14910825455185092, "grad_norm": 0.002151310909539461, "learning_rate": 0.001, "loss": 0.4508, "step": 5404 }, { "epoch": 0.1491358467529153, "grad_norm": 0.002506339456886053, "learning_rate": 0.001, "loss": 0.4381, "step": 5405 }, { "epoch": 0.14916343895397965, "grad_norm": 0.002707758452743292, "learning_rate": 0.001, "loss": 0.4113, "step": 5406 }, { "epoch": 0.14919103115504403, "grad_norm": 0.002374933334067464, "learning_rate": 0.001, "loss": 0.3643, "step": 5407 }, { "epoch": 0.1492186233561084, "grad_norm": 0.0027373386546969414, "learning_rate": 0.001, "loss": 0.3823, "step": 5408 }, { "epoch": 0.14924621555717277, "grad_norm": 0.004208944737911224, "learning_rate": 0.001, "loss": 0.4265, "step": 5409 }, { "epoch": 0.14927380775823715, "grad_norm": 0.0031958618201315403, "learning_rate": 0.001, "loss": 0.3799, "step": 5410 }, { "epoch": 0.1493013999593015, "grad_norm": 0.006793424021452665, "learning_rate": 0.001, "loss": 0.403, "step": 5411 }, { "epoch": 0.14932899216036588, "grad_norm": 0.003309778869152069, "learning_rate": 0.001, "loss": 0.3925, "step": 5412 }, { "epoch": 0.14935658436143023, "grad_norm": 0.002356642158702016, "learning_rate": 0.001, "loss": 0.4026, "step": 5413 }, { "epoch": 0.1493841765624946, "grad_norm": 0.0032743606716394424, "learning_rate": 0.001, "loss": 0.3839, "step": 5414 }, { "epoch": 0.14941176876355897, "grad_norm": 0.005772759672254324, "learning_rate": 0.001, "loss": 0.4099, "step": 5415 }, { "epoch": 0.14943936096462335, "grad_norm": 0.0065681166015565395, "learning_rate": 0.001, "loss": 0.4149, "step": 5416 }, { "epoch": 0.14946695316568773, "grad_norm": 0.0028679503593593836, "learning_rate": 0.001, "loss": 0.3978, "step": 5417 }, { "epoch": 0.14949454536675208, "grad_norm": 0.003005877137184143, "learning_rate": 0.001, "loss": 0.3848, "step": 5418 }, { "epoch": 0.14952213756781646, "grad_norm": 0.0022220280952751637, "learning_rate": 0.001, "loss": 0.4393, "step": 5419 }, { "epoch": 0.1495497297688808, "grad_norm": 0.0033852518536150455, "learning_rate": 0.001, "loss": 0.3692, "step": 5420 }, { "epoch": 0.1495773219699452, "grad_norm": 0.006787192076444626, "learning_rate": 0.001, "loss": 0.3895, "step": 5421 }, { "epoch": 0.14960491417100957, "grad_norm": 0.0036864059511572123, "learning_rate": 0.001, "loss": 0.4334, "step": 5422 }, { "epoch": 0.14963250637207393, "grad_norm": 0.002667534863576293, "learning_rate": 0.001, "loss": 0.4084, "step": 5423 }, { "epoch": 0.1496600985731383, "grad_norm": 0.003278045216575265, "learning_rate": 0.001, "loss": 0.416, "step": 5424 }, { "epoch": 0.14968769077420266, "grad_norm": 0.003076856257393956, "learning_rate": 0.001, "loss": 0.4169, "step": 5425 }, { "epoch": 0.14971528297526704, "grad_norm": 0.0023882794193923473, "learning_rate": 0.001, "loss": 0.3999, "step": 5426 }, { "epoch": 0.14974287517633142, "grad_norm": 0.003012517001479864, "learning_rate": 0.001, "loss": 0.4049, "step": 5427 }, { "epoch": 0.14977046737739577, "grad_norm": 0.0023737861774861813, "learning_rate": 0.001, "loss": 0.4265, "step": 5428 }, { "epoch": 0.14979805957846015, "grad_norm": 0.0022319392301142216, "learning_rate": 0.001, "loss": 0.4013, "step": 5429 }, { "epoch": 0.1498256517795245, "grad_norm": 0.0032811984419822693, "learning_rate": 0.001, "loss": 0.3898, "step": 5430 }, { "epoch": 0.14985324398058888, "grad_norm": 0.005578971467912197, "learning_rate": 0.001, "loss": 0.39, "step": 5431 }, { "epoch": 0.14988083618165327, "grad_norm": 0.003999901469796896, "learning_rate": 0.001, "loss": 0.4003, "step": 5432 }, { "epoch": 0.14990842838271762, "grad_norm": 0.002937354613095522, "learning_rate": 0.001, "loss": 0.4189, "step": 5433 }, { "epoch": 0.149936020583782, "grad_norm": 0.004472947679460049, "learning_rate": 0.001, "loss": 0.3836, "step": 5434 }, { "epoch": 0.14996361278484635, "grad_norm": 0.0043602860532701015, "learning_rate": 0.001, "loss": 0.3565, "step": 5435 }, { "epoch": 0.14999120498591073, "grad_norm": 0.0026499300729483366, "learning_rate": 0.001, "loss": 0.4039, "step": 5436 }, { "epoch": 0.1500187971869751, "grad_norm": 0.003208121517673135, "learning_rate": 0.001, "loss": 0.4198, "step": 5437 }, { "epoch": 0.15004638938803946, "grad_norm": 0.0024985617492347956, "learning_rate": 0.001, "loss": 0.4716, "step": 5438 }, { "epoch": 0.15007398158910384, "grad_norm": 0.0026381593197584152, "learning_rate": 0.001, "loss": 0.3799, "step": 5439 }, { "epoch": 0.1501015737901682, "grad_norm": 0.0035929230507463217, "learning_rate": 0.001, "loss": 0.3716, "step": 5440 }, { "epoch": 0.15012916599123258, "grad_norm": 0.005476321559399366, "learning_rate": 0.001, "loss": 0.4043, "step": 5441 }, { "epoch": 0.15015675819229696, "grad_norm": 0.00393927376717329, "learning_rate": 0.001, "loss": 0.3844, "step": 5442 }, { "epoch": 0.1501843503933613, "grad_norm": 0.003273927140980959, "learning_rate": 0.001, "loss": 0.403, "step": 5443 }, { "epoch": 0.1502119425944257, "grad_norm": 0.015024540945887566, "learning_rate": 0.001, "loss": 0.3915, "step": 5444 }, { "epoch": 0.15023953479549004, "grad_norm": 0.004785169847309589, "learning_rate": 0.001, "loss": 0.3738, "step": 5445 }, { "epoch": 0.15026712699655442, "grad_norm": 0.00336294062435627, "learning_rate": 0.001, "loss": 0.406, "step": 5446 }, { "epoch": 0.1502947191976188, "grad_norm": 0.0036235249135643244, "learning_rate": 0.001, "loss": 0.3839, "step": 5447 }, { "epoch": 0.15032231139868316, "grad_norm": 0.002741090953350067, "learning_rate": 0.001, "loss": 0.3762, "step": 5448 }, { "epoch": 0.15034990359974754, "grad_norm": 0.0024599130265414715, "learning_rate": 0.001, "loss": 0.4038, "step": 5449 }, { "epoch": 0.1503774958008119, "grad_norm": 0.002163279801607132, "learning_rate": 0.001, "loss": 0.4581, "step": 5450 }, { "epoch": 0.15040508800187627, "grad_norm": 0.005329553037881851, "learning_rate": 0.001, "loss": 0.375, "step": 5451 }, { "epoch": 0.15043268020294065, "grad_norm": 0.004300289321690798, "learning_rate": 0.001, "loss": 0.3871, "step": 5452 }, { "epoch": 0.150460272404005, "grad_norm": 0.003734968136996031, "learning_rate": 0.001, "loss": 0.3975, "step": 5453 }, { "epoch": 0.15048786460506938, "grad_norm": 0.0053179883398115635, "learning_rate": 0.001, "loss": 0.3729, "step": 5454 }, { "epoch": 0.15051545680613374, "grad_norm": 0.003352331230416894, "learning_rate": 0.001, "loss": 0.41, "step": 5455 }, { "epoch": 0.15054304900719812, "grad_norm": 0.0027811650652438402, "learning_rate": 0.001, "loss": 0.4354, "step": 5456 }, { "epoch": 0.1505706412082625, "grad_norm": 0.0069539607502520084, "learning_rate": 0.001, "loss": 0.4068, "step": 5457 }, { "epoch": 0.15059823340932685, "grad_norm": 0.002543016104027629, "learning_rate": 0.001, "loss": 0.475, "step": 5458 }, { "epoch": 0.15062582561039123, "grad_norm": 0.004235788248479366, "learning_rate": 0.001, "loss": 0.3636, "step": 5459 }, { "epoch": 0.15065341781145558, "grad_norm": 0.0021371094044297934, "learning_rate": 0.001, "loss": 0.3899, "step": 5460 }, { "epoch": 0.15068101001251996, "grad_norm": 0.0031575944740325212, "learning_rate": 0.001, "loss": 0.3835, "step": 5461 }, { "epoch": 0.15070860221358434, "grad_norm": 0.002763645024970174, "learning_rate": 0.001, "loss": 0.3843, "step": 5462 }, { "epoch": 0.1507361944146487, "grad_norm": 0.003907541744410992, "learning_rate": 0.001, "loss": 0.3748, "step": 5463 }, { "epoch": 0.15076378661571307, "grad_norm": 0.004303582012653351, "learning_rate": 0.001, "loss": 0.3733, "step": 5464 }, { "epoch": 0.15079137881677743, "grad_norm": 0.005774863064289093, "learning_rate": 0.001, "loss": 0.3817, "step": 5465 }, { "epoch": 0.1508189710178418, "grad_norm": 0.00487649068236351, "learning_rate": 0.001, "loss": 0.3776, "step": 5466 }, { "epoch": 0.1508465632189062, "grad_norm": 0.0033277124166488647, "learning_rate": 0.001, "loss": 0.418, "step": 5467 }, { "epoch": 0.15087415541997054, "grad_norm": 0.003046071156859398, "learning_rate": 0.001, "loss": 0.3428, "step": 5468 }, { "epoch": 0.15090174762103492, "grad_norm": 0.0024130302481353283, "learning_rate": 0.001, "loss": 0.3924, "step": 5469 }, { "epoch": 0.15092933982209927, "grad_norm": 0.002598168794065714, "learning_rate": 0.001, "loss": 0.3969, "step": 5470 }, { "epoch": 0.15095693202316365, "grad_norm": 0.00266568036749959, "learning_rate": 0.001, "loss": 0.4135, "step": 5471 }, { "epoch": 0.15098452422422803, "grad_norm": 0.0028029074892401695, "learning_rate": 0.001, "loss": 0.3714, "step": 5472 }, { "epoch": 0.1510121164252924, "grad_norm": 0.008100908249616623, "learning_rate": 0.001, "loss": 0.3997, "step": 5473 }, { "epoch": 0.15103970862635677, "grad_norm": 0.00268415710888803, "learning_rate": 0.001, "loss": 0.4178, "step": 5474 }, { "epoch": 0.15106730082742112, "grad_norm": 0.00282164104282856, "learning_rate": 0.001, "loss": 0.4258, "step": 5475 }, { "epoch": 0.1510948930284855, "grad_norm": 0.0029998181853443384, "learning_rate": 0.001, "loss": 0.422, "step": 5476 }, { "epoch": 0.15112248522954988, "grad_norm": 0.005322444252669811, "learning_rate": 0.001, "loss": 0.3662, "step": 5477 }, { "epoch": 0.15115007743061423, "grad_norm": 0.0020565236918628216, "learning_rate": 0.001, "loss": 0.4042, "step": 5478 }, { "epoch": 0.1511776696316786, "grad_norm": 0.0021356120705604553, "learning_rate": 0.001, "loss": 0.4134, "step": 5479 }, { "epoch": 0.15120526183274297, "grad_norm": 0.003150090342387557, "learning_rate": 0.001, "loss": 0.4225, "step": 5480 }, { "epoch": 0.15123285403380735, "grad_norm": 0.002523067407310009, "learning_rate": 0.001, "loss": 0.4469, "step": 5481 }, { "epoch": 0.15126044623487173, "grad_norm": 0.003910950850695372, "learning_rate": 0.001, "loss": 0.4317, "step": 5482 }, { "epoch": 0.15128803843593608, "grad_norm": 0.0035898578353226185, "learning_rate": 0.001, "loss": 0.429, "step": 5483 }, { "epoch": 0.15131563063700046, "grad_norm": 0.0025848867371678352, "learning_rate": 0.001, "loss": 0.3965, "step": 5484 }, { "epoch": 0.1513432228380648, "grad_norm": 0.0024130861274898052, "learning_rate": 0.001, "loss": 0.4291, "step": 5485 }, { "epoch": 0.1513708150391292, "grad_norm": 0.005302855744957924, "learning_rate": 0.001, "loss": 0.3912, "step": 5486 }, { "epoch": 0.15139840724019357, "grad_norm": 0.0032018995843827724, "learning_rate": 0.001, "loss": 0.3642, "step": 5487 }, { "epoch": 0.15142599944125792, "grad_norm": 0.0025910425465554, "learning_rate": 0.001, "loss": 0.3785, "step": 5488 }, { "epoch": 0.1514535916423223, "grad_norm": 0.003524304600432515, "learning_rate": 0.001, "loss": 0.4421, "step": 5489 }, { "epoch": 0.15148118384338666, "grad_norm": 0.0029097420629113913, "learning_rate": 0.001, "loss": 0.3874, "step": 5490 }, { "epoch": 0.15150877604445104, "grad_norm": 0.003354964079335332, "learning_rate": 0.001, "loss": 0.4103, "step": 5491 }, { "epoch": 0.15153636824551542, "grad_norm": 0.0032378071919083595, "learning_rate": 0.001, "loss": 0.4041, "step": 5492 }, { "epoch": 0.15156396044657977, "grad_norm": 0.0037639355286955833, "learning_rate": 0.001, "loss": 0.3751, "step": 5493 }, { "epoch": 0.15159155264764415, "grad_norm": 0.0043389336206018925, "learning_rate": 0.001, "loss": 0.3813, "step": 5494 }, { "epoch": 0.1516191448487085, "grad_norm": 0.0026339939795434475, "learning_rate": 0.001, "loss": 0.3972, "step": 5495 }, { "epoch": 0.15164673704977288, "grad_norm": 0.0039835344068706036, "learning_rate": 0.001, "loss": 0.3862, "step": 5496 }, { "epoch": 0.15167432925083726, "grad_norm": 0.003993440419435501, "learning_rate": 0.001, "loss": 0.4029, "step": 5497 }, { "epoch": 0.15170192145190162, "grad_norm": 0.003669349942356348, "learning_rate": 0.001, "loss": 0.3656, "step": 5498 }, { "epoch": 0.151729513652966, "grad_norm": 0.005064620170742273, "learning_rate": 0.001, "loss": 0.3677, "step": 5499 }, { "epoch": 0.15175710585403035, "grad_norm": 0.0034543632064014673, "learning_rate": 0.001, "loss": 0.4298, "step": 5500 }, { "epoch": 0.15175710585403035, "eval_runtime": 27.0484, "eval_samples_per_second": 1.183, "eval_steps_per_second": 0.148, "step": 5500 }, { "epoch": 0.15178469805509473, "grad_norm": 0.0021760701201856136, "learning_rate": 0.001, "loss": 0.4394, "step": 5501 }, { "epoch": 0.1518122902561591, "grad_norm": 0.0033101621083915234, "learning_rate": 0.001, "loss": 0.392, "step": 5502 }, { "epoch": 0.15183988245722346, "grad_norm": 0.005687963217496872, "learning_rate": 0.001, "loss": 0.3858, "step": 5503 }, { "epoch": 0.15186747465828784, "grad_norm": 0.0031170856673270464, "learning_rate": 0.001, "loss": 0.3453, "step": 5504 }, { "epoch": 0.1518950668593522, "grad_norm": 0.0025914981961250305, "learning_rate": 0.001, "loss": 0.3746, "step": 5505 }, { "epoch": 0.15192265906041658, "grad_norm": 0.0037096338346600533, "learning_rate": 0.001, "loss": 0.384, "step": 5506 }, { "epoch": 0.15195025126148093, "grad_norm": 0.0023521913681179285, "learning_rate": 0.001, "loss": 0.4116, "step": 5507 }, { "epoch": 0.1519778434625453, "grad_norm": 0.0021949107758700848, "learning_rate": 0.001, "loss": 0.4032, "step": 5508 }, { "epoch": 0.1520054356636097, "grad_norm": 0.00198304932564497, "learning_rate": 0.001, "loss": 0.4209, "step": 5509 }, { "epoch": 0.15203302786467404, "grad_norm": 0.0021362160332500935, "learning_rate": 0.001, "loss": 0.4115, "step": 5510 }, { "epoch": 0.15206062006573842, "grad_norm": 0.0028702253475785255, "learning_rate": 0.001, "loss": 0.402, "step": 5511 }, { "epoch": 0.15208821226680277, "grad_norm": 0.003037867834791541, "learning_rate": 0.001, "loss": 0.3797, "step": 5512 }, { "epoch": 0.15211580446786716, "grad_norm": 0.004557423759251833, "learning_rate": 0.001, "loss": 0.3621, "step": 5513 }, { "epoch": 0.15214339666893154, "grad_norm": 0.0027468795888125896, "learning_rate": 0.001, "loss": 0.4179, "step": 5514 }, { "epoch": 0.1521709888699959, "grad_norm": 0.004307581577450037, "learning_rate": 0.001, "loss": 0.3822, "step": 5515 }, { "epoch": 0.15219858107106027, "grad_norm": 0.0030242314096540213, "learning_rate": 0.001, "loss": 0.3733, "step": 5516 }, { "epoch": 0.15222617327212462, "grad_norm": 0.00433747936040163, "learning_rate": 0.001, "loss": 0.4184, "step": 5517 }, { "epoch": 0.152253765473189, "grad_norm": 0.009036600589752197, "learning_rate": 0.001, "loss": 0.4181, "step": 5518 }, { "epoch": 0.15228135767425338, "grad_norm": 0.012543873861432076, "learning_rate": 0.001, "loss": 0.4278, "step": 5519 }, { "epoch": 0.15230894987531773, "grad_norm": 0.009851133450865746, "learning_rate": 0.001, "loss": 0.4012, "step": 5520 }, { "epoch": 0.15233654207638211, "grad_norm": 0.013184795156121254, "learning_rate": 0.001, "loss": 0.4147, "step": 5521 }, { "epoch": 0.15236413427744647, "grad_norm": 0.017032135277986526, "learning_rate": 0.001, "loss": 0.3805, "step": 5522 }, { "epoch": 0.15239172647851085, "grad_norm": 0.002919899532571435, "learning_rate": 0.001, "loss": 0.4131, "step": 5523 }, { "epoch": 0.15241931867957523, "grad_norm": 0.002256933366879821, "learning_rate": 0.001, "loss": 0.4279, "step": 5524 }, { "epoch": 0.15244691088063958, "grad_norm": 0.0027415261138230562, "learning_rate": 0.001, "loss": 0.4324, "step": 5525 }, { "epoch": 0.15247450308170396, "grad_norm": 0.0031243073754012585, "learning_rate": 0.001, "loss": 0.3676, "step": 5526 }, { "epoch": 0.1525020952827683, "grad_norm": 0.0021765967831015587, "learning_rate": 0.001, "loss": 0.4111, "step": 5527 }, { "epoch": 0.1525296874838327, "grad_norm": 0.003611369989812374, "learning_rate": 0.001, "loss": 0.3896, "step": 5528 }, { "epoch": 0.15255727968489707, "grad_norm": 0.005910592619329691, "learning_rate": 0.001, "loss": 0.3917, "step": 5529 }, { "epoch": 0.15258487188596143, "grad_norm": 0.01214669831097126, "learning_rate": 0.001, "loss": 0.3718, "step": 5530 }, { "epoch": 0.1526124640870258, "grad_norm": 0.0060065449215471745, "learning_rate": 0.001, "loss": 0.402, "step": 5531 }, { "epoch": 0.15264005628809016, "grad_norm": 0.0025461267214268446, "learning_rate": 0.001, "loss": 0.3835, "step": 5532 }, { "epoch": 0.15266764848915454, "grad_norm": 0.002425495535135269, "learning_rate": 0.001, "loss": 0.381, "step": 5533 }, { "epoch": 0.15269524069021892, "grad_norm": 0.004517144989222288, "learning_rate": 0.001, "loss": 0.3968, "step": 5534 }, { "epoch": 0.15272283289128327, "grad_norm": 0.003472298150882125, "learning_rate": 0.001, "loss": 0.4016, "step": 5535 }, { "epoch": 0.15275042509234765, "grad_norm": 0.002562028355896473, "learning_rate": 0.001, "loss": 0.3576, "step": 5536 }, { "epoch": 0.152778017293412, "grad_norm": 0.0028723133727908134, "learning_rate": 0.001, "loss": 0.4317, "step": 5537 }, { "epoch": 0.15280560949447639, "grad_norm": 0.0022548751439899206, "learning_rate": 0.001, "loss": 0.4182, "step": 5538 }, { "epoch": 0.15283320169554077, "grad_norm": 0.0031076574232429266, "learning_rate": 0.001, "loss": 0.4241, "step": 5539 }, { "epoch": 0.15286079389660512, "grad_norm": 0.00288601266220212, "learning_rate": 0.001, "loss": 0.4068, "step": 5540 }, { "epoch": 0.1528883860976695, "grad_norm": 0.004477125592529774, "learning_rate": 0.001, "loss": 0.3715, "step": 5541 }, { "epoch": 0.15291597829873385, "grad_norm": 0.0031475056894123554, "learning_rate": 0.001, "loss": 0.4294, "step": 5542 }, { "epoch": 0.15294357049979823, "grad_norm": 0.0023465659469366074, "learning_rate": 0.001, "loss": 0.4104, "step": 5543 }, { "epoch": 0.1529711627008626, "grad_norm": 0.0032081464305520058, "learning_rate": 0.001, "loss": 0.3931, "step": 5544 }, { "epoch": 0.15299875490192696, "grad_norm": 0.002472371095791459, "learning_rate": 0.001, "loss": 0.4276, "step": 5545 }, { "epoch": 0.15302634710299134, "grad_norm": 0.006937100552022457, "learning_rate": 0.001, "loss": 0.4047, "step": 5546 }, { "epoch": 0.1530539393040557, "grad_norm": 0.0034694471396505833, "learning_rate": 0.001, "loss": 0.4476, "step": 5547 }, { "epoch": 0.15308153150512008, "grad_norm": 0.0028970185667276382, "learning_rate": 0.001, "loss": 0.3899, "step": 5548 }, { "epoch": 0.15310912370618446, "grad_norm": 0.0024902718141674995, "learning_rate": 0.001, "loss": 0.449, "step": 5549 }, { "epoch": 0.1531367159072488, "grad_norm": 0.0023354976437985897, "learning_rate": 0.001, "loss": 0.3648, "step": 5550 }, { "epoch": 0.1531643081083132, "grad_norm": 0.004294807091355324, "learning_rate": 0.001, "loss": 0.4073, "step": 5551 }, { "epoch": 0.15319190030937754, "grad_norm": 0.0020946157164871693, "learning_rate": 0.001, "loss": 0.3978, "step": 5552 }, { "epoch": 0.15321949251044192, "grad_norm": 0.003225631546229124, "learning_rate": 0.001, "loss": 0.3803, "step": 5553 }, { "epoch": 0.1532470847115063, "grad_norm": 0.0035047954879701138, "learning_rate": 0.001, "loss": 0.3462, "step": 5554 }, { "epoch": 0.15327467691257066, "grad_norm": 0.0029047110583633184, "learning_rate": 0.001, "loss": 0.393, "step": 5555 }, { "epoch": 0.15330226911363504, "grad_norm": 0.0030687206890434027, "learning_rate": 0.001, "loss": 0.3931, "step": 5556 }, { "epoch": 0.1533298613146994, "grad_norm": 0.0027540400624275208, "learning_rate": 0.001, "loss": 0.4126, "step": 5557 }, { "epoch": 0.15335745351576377, "grad_norm": 0.002840878674760461, "learning_rate": 0.001, "loss": 0.3541, "step": 5558 }, { "epoch": 0.15338504571682815, "grad_norm": 0.0027139533776789904, "learning_rate": 0.001, "loss": 0.4351, "step": 5559 }, { "epoch": 0.1534126379178925, "grad_norm": 0.0027314014732837677, "learning_rate": 0.001, "loss": 0.4102, "step": 5560 }, { "epoch": 0.15344023011895688, "grad_norm": 0.002478542272001505, "learning_rate": 0.001, "loss": 0.3936, "step": 5561 }, { "epoch": 0.15346782232002124, "grad_norm": 0.007170672062784433, "learning_rate": 0.001, "loss": 0.3703, "step": 5562 }, { "epoch": 0.15349541452108562, "grad_norm": 0.004415068309754133, "learning_rate": 0.001, "loss": 0.4195, "step": 5563 }, { "epoch": 0.15352300672215, "grad_norm": 0.004127574618905783, "learning_rate": 0.001, "loss": 0.3705, "step": 5564 }, { "epoch": 0.15355059892321435, "grad_norm": 0.0033065411262214184, "learning_rate": 0.001, "loss": 0.3814, "step": 5565 }, { "epoch": 0.15357819112427873, "grad_norm": 0.0036155430134385824, "learning_rate": 0.001, "loss": 0.4144, "step": 5566 }, { "epoch": 0.15360578332534308, "grad_norm": 0.0034816188272088766, "learning_rate": 0.001, "loss": 0.3936, "step": 5567 }, { "epoch": 0.15363337552640746, "grad_norm": 0.003091090824455023, "learning_rate": 0.001, "loss": 0.4052, "step": 5568 }, { "epoch": 0.15366096772747184, "grad_norm": 0.006985298823565245, "learning_rate": 0.001, "loss": 0.4364, "step": 5569 }, { "epoch": 0.1536885599285362, "grad_norm": 0.0029920649249106646, "learning_rate": 0.001, "loss": 0.4319, "step": 5570 }, { "epoch": 0.15371615212960058, "grad_norm": 0.004347649868577719, "learning_rate": 0.001, "loss": 0.361, "step": 5571 }, { "epoch": 0.15374374433066493, "grad_norm": 0.002314548706635833, "learning_rate": 0.001, "loss": 0.4181, "step": 5572 }, { "epoch": 0.1537713365317293, "grad_norm": 0.0023193899542093277, "learning_rate": 0.001, "loss": 0.4267, "step": 5573 }, { "epoch": 0.1537989287327937, "grad_norm": 0.004895602352917194, "learning_rate": 0.001, "loss": 0.3866, "step": 5574 }, { "epoch": 0.15382652093385804, "grad_norm": 0.003913875203579664, "learning_rate": 0.001, "loss": 0.3747, "step": 5575 }, { "epoch": 0.15385411313492242, "grad_norm": 0.002599104307591915, "learning_rate": 0.001, "loss": 0.4489, "step": 5576 }, { "epoch": 0.15388170533598677, "grad_norm": 0.0025326511822640896, "learning_rate": 0.001, "loss": 0.3903, "step": 5577 }, { "epoch": 0.15390929753705115, "grad_norm": 0.003771717194467783, "learning_rate": 0.001, "loss": 0.3915, "step": 5578 }, { "epoch": 0.15393688973811553, "grad_norm": 0.004678527358919382, "learning_rate": 0.001, "loss": 0.3761, "step": 5579 }, { "epoch": 0.1539644819391799, "grad_norm": 0.0024059766437858343, "learning_rate": 0.001, "loss": 0.3815, "step": 5580 }, { "epoch": 0.15399207414024427, "grad_norm": 0.003771177725866437, "learning_rate": 0.001, "loss": 0.3776, "step": 5581 }, { "epoch": 0.15401966634130862, "grad_norm": 0.002773088635876775, "learning_rate": 0.001, "loss": 0.4007, "step": 5582 }, { "epoch": 0.154047258542373, "grad_norm": 0.003286458784714341, "learning_rate": 0.001, "loss": 0.4284, "step": 5583 }, { "epoch": 0.15407485074343738, "grad_norm": 0.007167492527514696, "learning_rate": 0.001, "loss": 0.3719, "step": 5584 }, { "epoch": 0.15410244294450173, "grad_norm": 0.0022903766948729753, "learning_rate": 0.001, "loss": 0.4223, "step": 5585 }, { "epoch": 0.1541300351455661, "grad_norm": 0.0034131724387407303, "learning_rate": 0.001, "loss": 0.3919, "step": 5586 }, { "epoch": 0.15415762734663047, "grad_norm": 0.005761331412941217, "learning_rate": 0.001, "loss": 0.3989, "step": 5587 }, { "epoch": 0.15418521954769485, "grad_norm": 0.0029236627742648125, "learning_rate": 0.001, "loss": 0.3858, "step": 5588 }, { "epoch": 0.15421281174875923, "grad_norm": 0.0033100962173193693, "learning_rate": 0.001, "loss": 0.405, "step": 5589 }, { "epoch": 0.15424040394982358, "grad_norm": 0.003999118227511644, "learning_rate": 0.001, "loss": 0.4109, "step": 5590 }, { "epoch": 0.15426799615088796, "grad_norm": 0.006767808459699154, "learning_rate": 0.001, "loss": 0.4015, "step": 5591 }, { "epoch": 0.1542955883519523, "grad_norm": 0.0028603686951100826, "learning_rate": 0.001, "loss": 0.4035, "step": 5592 }, { "epoch": 0.1543231805530167, "grad_norm": 0.002447220031172037, "learning_rate": 0.001, "loss": 0.3855, "step": 5593 }, { "epoch": 0.15435077275408107, "grad_norm": 0.00598937040194869, "learning_rate": 0.001, "loss": 0.3911, "step": 5594 }, { "epoch": 0.15437836495514543, "grad_norm": 0.0037801298312842846, "learning_rate": 0.001, "loss": 0.4432, "step": 5595 }, { "epoch": 0.1544059571562098, "grad_norm": 0.00243676477111876, "learning_rate": 0.001, "loss": 0.4197, "step": 5596 }, { "epoch": 0.15443354935727416, "grad_norm": 0.0022302521392703056, "learning_rate": 0.001, "loss": 0.4172, "step": 5597 }, { "epoch": 0.15446114155833854, "grad_norm": 0.002573461504653096, "learning_rate": 0.001, "loss": 0.4177, "step": 5598 }, { "epoch": 0.15448873375940292, "grad_norm": 0.004255924839526415, "learning_rate": 0.001, "loss": 0.3875, "step": 5599 }, { "epoch": 0.15451632596046727, "grad_norm": 0.0026880300138145685, "learning_rate": 0.001, "loss": 0.3755, "step": 5600 }, { "epoch": 0.15454391816153165, "grad_norm": 0.00242446456104517, "learning_rate": 0.001, "loss": 0.4213, "step": 5601 }, { "epoch": 0.154571510362596, "grad_norm": 0.00247559929266572, "learning_rate": 0.001, "loss": 0.4105, "step": 5602 }, { "epoch": 0.15459910256366038, "grad_norm": 0.003476825775578618, "learning_rate": 0.001, "loss": 0.3411, "step": 5603 }, { "epoch": 0.15462669476472474, "grad_norm": 0.003844754071906209, "learning_rate": 0.001, "loss": 0.3922, "step": 5604 }, { "epoch": 0.15465428696578912, "grad_norm": 0.0029344146605581045, "learning_rate": 0.001, "loss": 0.4211, "step": 5605 }, { "epoch": 0.1546818791668535, "grad_norm": 0.0040620798245072365, "learning_rate": 0.001, "loss": 0.3821, "step": 5606 }, { "epoch": 0.15470947136791785, "grad_norm": 0.0020004198886454105, "learning_rate": 0.001, "loss": 0.4069, "step": 5607 }, { "epoch": 0.15473706356898223, "grad_norm": 0.002827103016898036, "learning_rate": 0.001, "loss": 0.376, "step": 5608 }, { "epoch": 0.15476465577004658, "grad_norm": 0.0023741433396935463, "learning_rate": 0.001, "loss": 0.4229, "step": 5609 }, { "epoch": 0.15479224797111096, "grad_norm": 0.004414036870002747, "learning_rate": 0.001, "loss": 0.4071, "step": 5610 }, { "epoch": 0.15481984017217534, "grad_norm": 0.0030676499009132385, "learning_rate": 0.001, "loss": 0.3843, "step": 5611 }, { "epoch": 0.1548474323732397, "grad_norm": 0.0032093478366732597, "learning_rate": 0.001, "loss": 0.401, "step": 5612 }, { "epoch": 0.15487502457430408, "grad_norm": 0.0024001640267670155, "learning_rate": 0.001, "loss": 0.4511, "step": 5613 }, { "epoch": 0.15490261677536843, "grad_norm": 0.004838323220610619, "learning_rate": 0.001, "loss": 0.4094, "step": 5614 }, { "epoch": 0.1549302089764328, "grad_norm": 0.003309109481051564, "learning_rate": 0.001, "loss": 0.3565, "step": 5615 }, { "epoch": 0.1549578011774972, "grad_norm": 0.0025519407354295254, "learning_rate": 0.001, "loss": 0.4047, "step": 5616 }, { "epoch": 0.15498539337856154, "grad_norm": 0.002377223689109087, "learning_rate": 0.001, "loss": 0.4146, "step": 5617 }, { "epoch": 0.15501298557962592, "grad_norm": 0.002599177649244666, "learning_rate": 0.001, "loss": 0.4414, "step": 5618 }, { "epoch": 0.15504057778069028, "grad_norm": 0.0029773900751024485, "learning_rate": 0.001, "loss": 0.4301, "step": 5619 }, { "epoch": 0.15506816998175466, "grad_norm": 0.0042325593531131744, "learning_rate": 0.001, "loss": 0.4303, "step": 5620 }, { "epoch": 0.15509576218281904, "grad_norm": 0.0032665403559803963, "learning_rate": 0.001, "loss": 0.4201, "step": 5621 }, { "epoch": 0.1551233543838834, "grad_norm": 0.00355579168535769, "learning_rate": 0.001, "loss": 0.3707, "step": 5622 }, { "epoch": 0.15515094658494777, "grad_norm": 0.0023576144594699144, "learning_rate": 0.001, "loss": 0.4181, "step": 5623 }, { "epoch": 0.15517853878601212, "grad_norm": 0.0028574687894433737, "learning_rate": 0.001, "loss": 0.399, "step": 5624 }, { "epoch": 0.1552061309870765, "grad_norm": 0.0038885304238647223, "learning_rate": 0.001, "loss": 0.4392, "step": 5625 }, { "epoch": 0.15523372318814088, "grad_norm": 0.0036933154333382845, "learning_rate": 0.001, "loss": 0.375, "step": 5626 }, { "epoch": 0.15526131538920523, "grad_norm": 0.0026938861701637506, "learning_rate": 0.001, "loss": 0.3936, "step": 5627 }, { "epoch": 0.15528890759026961, "grad_norm": 0.00399070093408227, "learning_rate": 0.001, "loss": 0.3792, "step": 5628 }, { "epoch": 0.15531649979133397, "grad_norm": 0.004512227140367031, "learning_rate": 0.001, "loss": 0.4562, "step": 5629 }, { "epoch": 0.15534409199239835, "grad_norm": 0.006776070687919855, "learning_rate": 0.001, "loss": 0.4011, "step": 5630 }, { "epoch": 0.15537168419346273, "grad_norm": 0.006678242702037096, "learning_rate": 0.001, "loss": 0.4211, "step": 5631 }, { "epoch": 0.15539927639452708, "grad_norm": 0.0036010188050568104, "learning_rate": 0.001, "loss": 0.4175, "step": 5632 }, { "epoch": 0.15542686859559146, "grad_norm": 0.0073088183999061584, "learning_rate": 0.001, "loss": 0.3522, "step": 5633 }, { "epoch": 0.1554544607966558, "grad_norm": 0.005029013846069574, "learning_rate": 0.001, "loss": 0.4045, "step": 5634 }, { "epoch": 0.1554820529977202, "grad_norm": 0.002414390444755554, "learning_rate": 0.001, "loss": 0.4185, "step": 5635 }, { "epoch": 0.15550964519878457, "grad_norm": 0.0036553817335516214, "learning_rate": 0.001, "loss": 0.3909, "step": 5636 }, { "epoch": 0.15553723739984893, "grad_norm": 0.005032503046095371, "learning_rate": 0.001, "loss": 0.3812, "step": 5637 }, { "epoch": 0.1555648296009133, "grad_norm": 0.0028513423167169094, "learning_rate": 0.001, "loss": 0.4276, "step": 5638 }, { "epoch": 0.15559242180197766, "grad_norm": 0.004651295021176338, "learning_rate": 0.001, "loss": 0.4122, "step": 5639 }, { "epoch": 0.15562001400304204, "grad_norm": 0.0026212476659566164, "learning_rate": 0.001, "loss": 0.3865, "step": 5640 }, { "epoch": 0.15564760620410642, "grad_norm": 0.0022126201074570417, "learning_rate": 0.001, "loss": 0.4105, "step": 5641 }, { "epoch": 0.15567519840517077, "grad_norm": 0.002519601956009865, "learning_rate": 0.001, "loss": 0.3929, "step": 5642 }, { "epoch": 0.15570279060623515, "grad_norm": 0.002760083880275488, "learning_rate": 0.001, "loss": 0.3819, "step": 5643 }, { "epoch": 0.1557303828072995, "grad_norm": 0.003575262613594532, "learning_rate": 0.001, "loss": 0.401, "step": 5644 }, { "epoch": 0.15575797500836389, "grad_norm": 0.0028452950064092875, "learning_rate": 0.001, "loss": 0.3747, "step": 5645 }, { "epoch": 0.15578556720942827, "grad_norm": 0.0025837207213044167, "learning_rate": 0.001, "loss": 0.3975, "step": 5646 }, { "epoch": 0.15581315941049262, "grad_norm": 0.0023428683634847403, "learning_rate": 0.001, "loss": 0.4274, "step": 5647 }, { "epoch": 0.155840751611557, "grad_norm": 0.0025679313112050295, "learning_rate": 0.001, "loss": 0.3871, "step": 5648 }, { "epoch": 0.15586834381262135, "grad_norm": 0.002796493237838149, "learning_rate": 0.001, "loss": 0.4153, "step": 5649 }, { "epoch": 0.15589593601368573, "grad_norm": 0.002868711482733488, "learning_rate": 0.001, "loss": 0.3826, "step": 5650 }, { "epoch": 0.1559235282147501, "grad_norm": 0.0034229152370244265, "learning_rate": 0.001, "loss": 0.4015, "step": 5651 }, { "epoch": 0.15595112041581446, "grad_norm": 0.0044869715347886086, "learning_rate": 0.001, "loss": 0.3901, "step": 5652 }, { "epoch": 0.15597871261687885, "grad_norm": 0.0027872773353010416, "learning_rate": 0.001, "loss": 0.4223, "step": 5653 }, { "epoch": 0.1560063048179432, "grad_norm": 0.007318846881389618, "learning_rate": 0.001, "loss": 0.4144, "step": 5654 }, { "epoch": 0.15603389701900758, "grad_norm": 0.003985683433711529, "learning_rate": 0.001, "loss": 0.3814, "step": 5655 }, { "epoch": 0.15606148922007196, "grad_norm": 0.002402941230684519, "learning_rate": 0.001, "loss": 0.4094, "step": 5656 }, { "epoch": 0.1560890814211363, "grad_norm": 0.00238110963255167, "learning_rate": 0.001, "loss": 0.4056, "step": 5657 }, { "epoch": 0.1561166736222007, "grad_norm": 0.00369979883544147, "learning_rate": 0.001, "loss": 0.3758, "step": 5658 }, { "epoch": 0.15614426582326504, "grad_norm": 0.0022104033268988132, "learning_rate": 0.001, "loss": 0.4058, "step": 5659 }, { "epoch": 0.15617185802432942, "grad_norm": 0.0024638620670884848, "learning_rate": 0.001, "loss": 0.4071, "step": 5660 }, { "epoch": 0.1561994502253938, "grad_norm": 0.0023760111071169376, "learning_rate": 0.001, "loss": 0.4247, "step": 5661 }, { "epoch": 0.15622704242645816, "grad_norm": 0.002687407424673438, "learning_rate": 0.001, "loss": 0.3877, "step": 5662 }, { "epoch": 0.15625463462752254, "grad_norm": 0.002467637648805976, "learning_rate": 0.001, "loss": 0.4084, "step": 5663 }, { "epoch": 0.1562822268285869, "grad_norm": 0.0020871292799711227, "learning_rate": 0.001, "loss": 0.3866, "step": 5664 }, { "epoch": 0.15630981902965127, "grad_norm": 0.0048971157521009445, "learning_rate": 0.001, "loss": 0.4112, "step": 5665 }, { "epoch": 0.15633741123071565, "grad_norm": 0.002223189687356353, "learning_rate": 0.001, "loss": 0.387, "step": 5666 }, { "epoch": 0.15636500343178, "grad_norm": 0.0026094792410731316, "learning_rate": 0.001, "loss": 0.4407, "step": 5667 }, { "epoch": 0.15639259563284438, "grad_norm": 0.004317654296755791, "learning_rate": 0.001, "loss": 0.4128, "step": 5668 }, { "epoch": 0.15642018783390874, "grad_norm": 0.0028076712042093277, "learning_rate": 0.001, "loss": 0.3887, "step": 5669 }, { "epoch": 0.15644778003497312, "grad_norm": 0.0033742813393473625, "learning_rate": 0.001, "loss": 0.4305, "step": 5670 }, { "epoch": 0.1564753722360375, "grad_norm": 0.004012165125459433, "learning_rate": 0.001, "loss": 0.3531, "step": 5671 }, { "epoch": 0.15650296443710185, "grad_norm": 0.0029937534127384424, "learning_rate": 0.001, "loss": 0.3737, "step": 5672 }, { "epoch": 0.15653055663816623, "grad_norm": 0.004357090685516596, "learning_rate": 0.001, "loss": 0.3949, "step": 5673 }, { "epoch": 0.15655814883923058, "grad_norm": 0.006484336219727993, "learning_rate": 0.001, "loss": 0.3871, "step": 5674 }, { "epoch": 0.15658574104029496, "grad_norm": 0.0038871821016073227, "learning_rate": 0.001, "loss": 0.4232, "step": 5675 }, { "epoch": 0.15661333324135934, "grad_norm": 0.008595773950219154, "learning_rate": 0.001, "loss": 0.3821, "step": 5676 }, { "epoch": 0.1566409254424237, "grad_norm": 0.009347690269351006, "learning_rate": 0.001, "loss": 0.3448, "step": 5677 }, { "epoch": 0.15666851764348808, "grad_norm": 0.002826757961884141, "learning_rate": 0.001, "loss": 0.3628, "step": 5678 }, { "epoch": 0.15669610984455243, "grad_norm": 0.003515382297337055, "learning_rate": 0.001, "loss": 0.3935, "step": 5679 }, { "epoch": 0.1567237020456168, "grad_norm": 0.0032721932511776686, "learning_rate": 0.001, "loss": 0.4253, "step": 5680 }, { "epoch": 0.1567512942466812, "grad_norm": 0.003463307162746787, "learning_rate": 0.001, "loss": 0.384, "step": 5681 }, { "epoch": 0.15677888644774554, "grad_norm": 0.003177905920892954, "learning_rate": 0.001, "loss": 0.3769, "step": 5682 }, { "epoch": 0.15680647864880992, "grad_norm": 0.003270684042945504, "learning_rate": 0.001, "loss": 0.4242, "step": 5683 }, { "epoch": 0.15683407084987427, "grad_norm": 0.0029053555335849524, "learning_rate": 0.001, "loss": 0.4024, "step": 5684 }, { "epoch": 0.15686166305093865, "grad_norm": 0.004513274412602186, "learning_rate": 0.001, "loss": 0.3607, "step": 5685 }, { "epoch": 0.15688925525200303, "grad_norm": 0.012942062690854073, "learning_rate": 0.001, "loss": 0.408, "step": 5686 }, { "epoch": 0.1569168474530674, "grad_norm": 0.004073042422533035, "learning_rate": 0.001, "loss": 0.3747, "step": 5687 }, { "epoch": 0.15694443965413177, "grad_norm": 0.0024588184896856546, "learning_rate": 0.001, "loss": 0.4046, "step": 5688 }, { "epoch": 0.15697203185519612, "grad_norm": 0.002686630468815565, "learning_rate": 0.001, "loss": 0.4188, "step": 5689 }, { "epoch": 0.1569996240562605, "grad_norm": 0.0031974229495972395, "learning_rate": 0.001, "loss": 0.3942, "step": 5690 }, { "epoch": 0.15702721625732488, "grad_norm": 0.0040585664100945, "learning_rate": 0.001, "loss": 0.3606, "step": 5691 }, { "epoch": 0.15705480845838923, "grad_norm": 0.005035550333559513, "learning_rate": 0.001, "loss": 0.4055, "step": 5692 }, { "epoch": 0.1570824006594536, "grad_norm": 0.005043357145041227, "learning_rate": 0.001, "loss": 0.4051, "step": 5693 }, { "epoch": 0.15710999286051797, "grad_norm": 0.002406257903203368, "learning_rate": 0.001, "loss": 0.3943, "step": 5694 }, { "epoch": 0.15713758506158235, "grad_norm": 0.002501264214515686, "learning_rate": 0.001, "loss": 0.395, "step": 5695 }, { "epoch": 0.1571651772626467, "grad_norm": 0.003250819630920887, "learning_rate": 0.001, "loss": 0.3747, "step": 5696 }, { "epoch": 0.15719276946371108, "grad_norm": 0.0020628594793379307, "learning_rate": 0.001, "loss": 0.3828, "step": 5697 }, { "epoch": 0.15722036166477546, "grad_norm": 0.002165653510019183, "learning_rate": 0.001, "loss": 0.3879, "step": 5698 }, { "epoch": 0.1572479538658398, "grad_norm": 0.003081389470025897, "learning_rate": 0.001, "loss": 0.4097, "step": 5699 }, { "epoch": 0.1572755460669042, "grad_norm": 0.0035273549146950245, "learning_rate": 0.001, "loss": 0.4187, "step": 5700 }, { "epoch": 0.15730313826796855, "grad_norm": 0.0023170190397650003, "learning_rate": 0.001, "loss": 0.4429, "step": 5701 }, { "epoch": 0.15733073046903293, "grad_norm": 0.0033377818763256073, "learning_rate": 0.001, "loss": 0.4085, "step": 5702 }, { "epoch": 0.1573583226700973, "grad_norm": 0.0030271231662482023, "learning_rate": 0.001, "loss": 0.3799, "step": 5703 }, { "epoch": 0.15738591487116166, "grad_norm": 0.002670741407200694, "learning_rate": 0.001, "loss": 0.3989, "step": 5704 }, { "epoch": 0.15741350707222604, "grad_norm": 0.0025286702439188957, "learning_rate": 0.001, "loss": 0.4247, "step": 5705 }, { "epoch": 0.1574410992732904, "grad_norm": 0.002894239965826273, "learning_rate": 0.001, "loss": 0.4041, "step": 5706 }, { "epoch": 0.15746869147435477, "grad_norm": 0.0030300067737698555, "learning_rate": 0.001, "loss": 0.4297, "step": 5707 }, { "epoch": 0.15749628367541915, "grad_norm": 0.0026801263447850943, "learning_rate": 0.001, "loss": 0.3788, "step": 5708 }, { "epoch": 0.1575238758764835, "grad_norm": 0.0029794182628393173, "learning_rate": 0.001, "loss": 0.3889, "step": 5709 }, { "epoch": 0.15755146807754788, "grad_norm": 0.0026555589865893126, "learning_rate": 0.001, "loss": 0.4181, "step": 5710 }, { "epoch": 0.15757906027861224, "grad_norm": 0.0025151160079985857, "learning_rate": 0.001, "loss": 0.3574, "step": 5711 }, { "epoch": 0.15760665247967662, "grad_norm": 0.0026525617577135563, "learning_rate": 0.001, "loss": 0.3906, "step": 5712 }, { "epoch": 0.157634244680741, "grad_norm": 0.002047165296971798, "learning_rate": 0.001, "loss": 0.4115, "step": 5713 }, { "epoch": 0.15766183688180535, "grad_norm": 0.00422664824873209, "learning_rate": 0.001, "loss": 0.3657, "step": 5714 }, { "epoch": 0.15768942908286973, "grad_norm": 0.005134572274982929, "learning_rate": 0.001, "loss": 0.3954, "step": 5715 }, { "epoch": 0.15771702128393408, "grad_norm": 0.004460239317268133, "learning_rate": 0.001, "loss": 0.4211, "step": 5716 }, { "epoch": 0.15774461348499846, "grad_norm": 0.0029927766881883144, "learning_rate": 0.001, "loss": 0.3724, "step": 5717 }, { "epoch": 0.15777220568606284, "grad_norm": 0.005971200298517942, "learning_rate": 0.001, "loss": 0.3673, "step": 5718 }, { "epoch": 0.1577997978871272, "grad_norm": 0.0025469562970101833, "learning_rate": 0.001, "loss": 0.381, "step": 5719 }, { "epoch": 0.15782739008819158, "grad_norm": 0.0036222515627741814, "learning_rate": 0.001, "loss": 0.3634, "step": 5720 }, { "epoch": 0.15785498228925593, "grad_norm": 0.002309228293597698, "learning_rate": 0.001, "loss": 0.399, "step": 5721 }, { "epoch": 0.1578825744903203, "grad_norm": 0.006437615491449833, "learning_rate": 0.001, "loss": 0.3823, "step": 5722 }, { "epoch": 0.1579101666913847, "grad_norm": 0.0027779482770711184, "learning_rate": 0.001, "loss": 0.4044, "step": 5723 }, { "epoch": 0.15793775889244904, "grad_norm": 0.006843385752290487, "learning_rate": 0.001, "loss": 0.4139, "step": 5724 }, { "epoch": 0.15796535109351342, "grad_norm": 0.005818149074912071, "learning_rate": 0.001, "loss": 0.3991, "step": 5725 }, { "epoch": 0.15799294329457778, "grad_norm": 0.004674671217799187, "learning_rate": 0.001, "loss": 0.4334, "step": 5726 }, { "epoch": 0.15802053549564216, "grad_norm": 0.0025879840832203627, "learning_rate": 0.001, "loss": 0.4182, "step": 5727 }, { "epoch": 0.15804812769670654, "grad_norm": 0.0030769093427807093, "learning_rate": 0.001, "loss": 0.4101, "step": 5728 }, { "epoch": 0.1580757198977709, "grad_norm": 0.0031094071455299854, "learning_rate": 0.001, "loss": 0.4282, "step": 5729 }, { "epoch": 0.15810331209883527, "grad_norm": 0.0025110999122262, "learning_rate": 0.001, "loss": 0.4245, "step": 5730 }, { "epoch": 0.15813090429989962, "grad_norm": 0.003311107400804758, "learning_rate": 0.001, "loss": 0.4133, "step": 5731 }, { "epoch": 0.158158496500964, "grad_norm": 0.003340107621625066, "learning_rate": 0.001, "loss": 0.3871, "step": 5732 }, { "epoch": 0.15818608870202838, "grad_norm": 0.004575404338538647, "learning_rate": 0.001, "loss": 0.3994, "step": 5733 }, { "epoch": 0.15821368090309273, "grad_norm": 0.0022902588825672865, "learning_rate": 0.001, "loss": 0.3931, "step": 5734 }, { "epoch": 0.15824127310415712, "grad_norm": 0.0030541005544364452, "learning_rate": 0.001, "loss": 0.4236, "step": 5735 }, { "epoch": 0.15826886530522147, "grad_norm": 0.002666509710252285, "learning_rate": 0.001, "loss": 0.399, "step": 5736 }, { "epoch": 0.15829645750628585, "grad_norm": 0.0029707769863307476, "learning_rate": 0.001, "loss": 0.4407, "step": 5737 }, { "epoch": 0.15832404970735023, "grad_norm": 0.0034186067059636116, "learning_rate": 0.001, "loss": 0.3971, "step": 5738 }, { "epoch": 0.15835164190841458, "grad_norm": 0.002819865709170699, "learning_rate": 0.001, "loss": 0.3952, "step": 5739 }, { "epoch": 0.15837923410947896, "grad_norm": 0.004903051070868969, "learning_rate": 0.001, "loss": 0.3917, "step": 5740 }, { "epoch": 0.1584068263105433, "grad_norm": 0.0036358917132019997, "learning_rate": 0.001, "loss": 0.4009, "step": 5741 }, { "epoch": 0.1584344185116077, "grad_norm": 0.004073919262737036, "learning_rate": 0.001, "loss": 0.4096, "step": 5742 }, { "epoch": 0.15846201071267207, "grad_norm": 0.0033164892811328173, "learning_rate": 0.001, "loss": 0.431, "step": 5743 }, { "epoch": 0.15848960291373643, "grad_norm": 0.008852209895849228, "learning_rate": 0.001, "loss": 0.4249, "step": 5744 }, { "epoch": 0.1585171951148008, "grad_norm": 0.004943115636706352, "learning_rate": 0.001, "loss": 0.3866, "step": 5745 }, { "epoch": 0.15854478731586516, "grad_norm": 0.0025044376961886883, "learning_rate": 0.001, "loss": 0.4301, "step": 5746 }, { "epoch": 0.15857237951692954, "grad_norm": 0.0032009256538003683, "learning_rate": 0.001, "loss": 0.3997, "step": 5747 }, { "epoch": 0.15859997171799392, "grad_norm": 0.002845719689503312, "learning_rate": 0.001, "loss": 0.4215, "step": 5748 }, { "epoch": 0.15862756391905827, "grad_norm": 0.00325995241291821, "learning_rate": 0.001, "loss": 0.408, "step": 5749 }, { "epoch": 0.15865515612012265, "grad_norm": 0.0026871482841670513, "learning_rate": 0.001, "loss": 0.4205, "step": 5750 }, { "epoch": 0.158682748321187, "grad_norm": 0.0027641262859106064, "learning_rate": 0.001, "loss": 0.3618, "step": 5751 }, { "epoch": 0.1587103405222514, "grad_norm": 0.003129362827166915, "learning_rate": 0.001, "loss": 0.4006, "step": 5752 }, { "epoch": 0.15873793272331577, "grad_norm": 0.0031827734783291817, "learning_rate": 0.001, "loss": 0.3633, "step": 5753 }, { "epoch": 0.15876552492438012, "grad_norm": 0.0046300506219267845, "learning_rate": 0.001, "loss": 0.3883, "step": 5754 }, { "epoch": 0.1587931171254445, "grad_norm": 0.00367173389531672, "learning_rate": 0.001, "loss": 0.4506, "step": 5755 }, { "epoch": 0.15882070932650885, "grad_norm": 0.0031661444809287786, "learning_rate": 0.001, "loss": 0.4077, "step": 5756 }, { "epoch": 0.15884830152757323, "grad_norm": 0.002517246874049306, "learning_rate": 0.001, "loss": 0.366, "step": 5757 }, { "epoch": 0.1588758937286376, "grad_norm": 0.0026858425699174404, "learning_rate": 0.001, "loss": 0.4244, "step": 5758 }, { "epoch": 0.15890348592970197, "grad_norm": 0.0026828250847756863, "learning_rate": 0.001, "loss": 0.4196, "step": 5759 }, { "epoch": 0.15893107813076635, "grad_norm": 0.002697891555726528, "learning_rate": 0.001, "loss": 0.389, "step": 5760 }, { "epoch": 0.1589586703318307, "grad_norm": 0.004389958921819925, "learning_rate": 0.001, "loss": 0.4066, "step": 5761 }, { "epoch": 0.15898626253289508, "grad_norm": 0.003987609874457121, "learning_rate": 0.001, "loss": 0.396, "step": 5762 }, { "epoch": 0.15901385473395946, "grad_norm": 0.003649795660749078, "learning_rate": 0.001, "loss": 0.3842, "step": 5763 }, { "epoch": 0.1590414469350238, "grad_norm": 0.0034654231276363134, "learning_rate": 0.001, "loss": 0.409, "step": 5764 }, { "epoch": 0.1590690391360882, "grad_norm": 0.005718625150620937, "learning_rate": 0.001, "loss": 0.405, "step": 5765 }, { "epoch": 0.15909663133715254, "grad_norm": 0.0025557433255016804, "learning_rate": 0.001, "loss": 0.4179, "step": 5766 }, { "epoch": 0.15912422353821692, "grad_norm": 0.002460428746417165, "learning_rate": 0.001, "loss": 0.3853, "step": 5767 }, { "epoch": 0.1591518157392813, "grad_norm": 0.007104699965566397, "learning_rate": 0.001, "loss": 0.3963, "step": 5768 }, { "epoch": 0.15917940794034566, "grad_norm": 0.006836418528109789, "learning_rate": 0.001, "loss": 0.3664, "step": 5769 }, { "epoch": 0.15920700014141004, "grad_norm": 0.0034382655285298824, "learning_rate": 0.001, "loss": 0.4309, "step": 5770 }, { "epoch": 0.1592345923424744, "grad_norm": 0.0040556564927101135, "learning_rate": 0.001, "loss": 0.3581, "step": 5771 }, { "epoch": 0.15926218454353877, "grad_norm": 0.0038001069333404303, "learning_rate": 0.001, "loss": 0.3918, "step": 5772 }, { "epoch": 0.15928977674460315, "grad_norm": 0.004240202251821756, "learning_rate": 0.001, "loss": 0.4036, "step": 5773 }, { "epoch": 0.1593173689456675, "grad_norm": 0.004947451408952475, "learning_rate": 0.001, "loss": 0.3871, "step": 5774 }, { "epoch": 0.15934496114673188, "grad_norm": 0.002301145112141967, "learning_rate": 0.001, "loss": 0.4065, "step": 5775 }, { "epoch": 0.15937255334779624, "grad_norm": 0.003091468010097742, "learning_rate": 0.001, "loss": 0.4057, "step": 5776 }, { "epoch": 0.15940014554886062, "grad_norm": 0.004508181009441614, "learning_rate": 0.001, "loss": 0.3781, "step": 5777 }, { "epoch": 0.159427737749925, "grad_norm": 0.002182744676247239, "learning_rate": 0.001, "loss": 0.4129, "step": 5778 }, { "epoch": 0.15945532995098935, "grad_norm": 0.00251766131259501, "learning_rate": 0.001, "loss": 0.4302, "step": 5779 }, { "epoch": 0.15948292215205373, "grad_norm": 0.002740236232057214, "learning_rate": 0.001, "loss": 0.4014, "step": 5780 }, { "epoch": 0.15951051435311808, "grad_norm": 0.005189212504774332, "learning_rate": 0.001, "loss": 0.4264, "step": 5781 }, { "epoch": 0.15953810655418246, "grad_norm": 0.002994902664795518, "learning_rate": 0.001, "loss": 0.3908, "step": 5782 }, { "epoch": 0.15956569875524684, "grad_norm": 0.003750283271074295, "learning_rate": 0.001, "loss": 0.4208, "step": 5783 }, { "epoch": 0.1595932909563112, "grad_norm": 0.003004671772941947, "learning_rate": 0.001, "loss": 0.4253, "step": 5784 }, { "epoch": 0.15962088315737558, "grad_norm": 0.004584962036460638, "learning_rate": 0.001, "loss": 0.4165, "step": 5785 }, { "epoch": 0.15964847535843993, "grad_norm": 0.005403813440352678, "learning_rate": 0.001, "loss": 0.3689, "step": 5786 }, { "epoch": 0.1596760675595043, "grad_norm": 0.0032318695448338985, "learning_rate": 0.001, "loss": 0.4165, "step": 5787 }, { "epoch": 0.15970365976056866, "grad_norm": 0.002624890301376581, "learning_rate": 0.001, "loss": 0.369, "step": 5788 }, { "epoch": 0.15973125196163304, "grad_norm": 0.004142098128795624, "learning_rate": 0.001, "loss": 0.3614, "step": 5789 }, { "epoch": 0.15975884416269742, "grad_norm": 0.002787158126011491, "learning_rate": 0.001, "loss": 0.3912, "step": 5790 }, { "epoch": 0.15978643636376177, "grad_norm": 0.004009711556136608, "learning_rate": 0.001, "loss": 0.4172, "step": 5791 }, { "epoch": 0.15981402856482615, "grad_norm": 0.002381292637437582, "learning_rate": 0.001, "loss": 0.3975, "step": 5792 }, { "epoch": 0.1598416207658905, "grad_norm": 0.0021126163192093372, "learning_rate": 0.001, "loss": 0.4088, "step": 5793 }, { "epoch": 0.1598692129669549, "grad_norm": 0.0049142311327159405, "learning_rate": 0.001, "loss": 0.3782, "step": 5794 }, { "epoch": 0.15989680516801927, "grad_norm": 0.0029681632295250893, "learning_rate": 0.001, "loss": 0.3565, "step": 5795 }, { "epoch": 0.15992439736908362, "grad_norm": 0.002932732691988349, "learning_rate": 0.001, "loss": 0.4096, "step": 5796 }, { "epoch": 0.159951989570148, "grad_norm": 0.002819716464728117, "learning_rate": 0.001, "loss": 0.3889, "step": 5797 }, { "epoch": 0.15997958177121235, "grad_norm": 0.0028946103993803263, "learning_rate": 0.001, "loss": 0.395, "step": 5798 }, { "epoch": 0.16000717397227673, "grad_norm": 0.0024298951029777527, "learning_rate": 0.001, "loss": 0.3859, "step": 5799 }, { "epoch": 0.16003476617334111, "grad_norm": 0.002949990564957261, "learning_rate": 0.001, "loss": 0.4012, "step": 5800 }, { "epoch": 0.16006235837440547, "grad_norm": 0.003484488232061267, "learning_rate": 0.001, "loss": 0.3983, "step": 5801 }, { "epoch": 0.16008995057546985, "grad_norm": 0.004663324449211359, "learning_rate": 0.001, "loss": 0.4009, "step": 5802 }, { "epoch": 0.1601175427765342, "grad_norm": 0.003363480092957616, "learning_rate": 0.001, "loss": 0.4289, "step": 5803 }, { "epoch": 0.16014513497759858, "grad_norm": 0.0023565501905977726, "learning_rate": 0.001, "loss": 0.3825, "step": 5804 }, { "epoch": 0.16017272717866296, "grad_norm": 0.002411758527159691, "learning_rate": 0.001, "loss": 0.4388, "step": 5805 }, { "epoch": 0.1602003193797273, "grad_norm": 0.0026753805577754974, "learning_rate": 0.001, "loss": 0.4267, "step": 5806 }, { "epoch": 0.1602279115807917, "grad_norm": 0.003161199390888214, "learning_rate": 0.001, "loss": 0.3725, "step": 5807 }, { "epoch": 0.16025550378185605, "grad_norm": 0.0042272647842764854, "learning_rate": 0.001, "loss": 0.3912, "step": 5808 }, { "epoch": 0.16028309598292043, "grad_norm": 0.0023989747278392315, "learning_rate": 0.001, "loss": 0.3888, "step": 5809 }, { "epoch": 0.1603106881839848, "grad_norm": 0.003144536865875125, "learning_rate": 0.001, "loss": 0.4189, "step": 5810 }, { "epoch": 0.16033828038504916, "grad_norm": 0.002298231702297926, "learning_rate": 0.001, "loss": 0.3817, "step": 5811 }, { "epoch": 0.16036587258611354, "grad_norm": 0.005583870690315962, "learning_rate": 0.001, "loss": 0.4163, "step": 5812 }, { "epoch": 0.1603934647871779, "grad_norm": 0.002705441555008292, "learning_rate": 0.001, "loss": 0.4142, "step": 5813 }, { "epoch": 0.16042105698824227, "grad_norm": 0.0025329829659312963, "learning_rate": 0.001, "loss": 0.4042, "step": 5814 }, { "epoch": 0.16044864918930665, "grad_norm": 0.00348359951749444, "learning_rate": 0.001, "loss": 0.4062, "step": 5815 }, { "epoch": 0.160476241390371, "grad_norm": 0.005613440182060003, "learning_rate": 0.001, "loss": 0.3973, "step": 5816 }, { "epoch": 0.16050383359143539, "grad_norm": 0.005014996509999037, "learning_rate": 0.001, "loss": 0.3909, "step": 5817 }, { "epoch": 0.16053142579249974, "grad_norm": 0.003465597052127123, "learning_rate": 0.001, "loss": 0.4292, "step": 5818 }, { "epoch": 0.16055901799356412, "grad_norm": 0.003062979318201542, "learning_rate": 0.001, "loss": 0.3688, "step": 5819 }, { "epoch": 0.1605866101946285, "grad_norm": 0.003151744371280074, "learning_rate": 0.001, "loss": 0.3744, "step": 5820 }, { "epoch": 0.16061420239569285, "grad_norm": 0.0023994911462068558, "learning_rate": 0.001, "loss": 0.4317, "step": 5821 }, { "epoch": 0.16064179459675723, "grad_norm": 0.002751079387962818, "learning_rate": 0.001, "loss": 0.4137, "step": 5822 }, { "epoch": 0.16066938679782158, "grad_norm": 0.0021729813888669014, "learning_rate": 0.001, "loss": 0.3928, "step": 5823 }, { "epoch": 0.16069697899888596, "grad_norm": 0.0027657474856823683, "learning_rate": 0.001, "loss": 0.4277, "step": 5824 }, { "epoch": 0.16072457119995034, "grad_norm": 0.0026696647983044386, "learning_rate": 0.001, "loss": 0.3872, "step": 5825 }, { "epoch": 0.1607521634010147, "grad_norm": 0.0028585640247911215, "learning_rate": 0.001, "loss": 0.3727, "step": 5826 }, { "epoch": 0.16077975560207908, "grad_norm": 0.003272457979619503, "learning_rate": 0.001, "loss": 0.395, "step": 5827 }, { "epoch": 0.16080734780314343, "grad_norm": 0.004849962890148163, "learning_rate": 0.001, "loss": 0.3662, "step": 5828 }, { "epoch": 0.1608349400042078, "grad_norm": 0.0022566032130271196, "learning_rate": 0.001, "loss": 0.4437, "step": 5829 }, { "epoch": 0.1608625322052722, "grad_norm": 0.0026730254758149385, "learning_rate": 0.001, "loss": 0.3895, "step": 5830 }, { "epoch": 0.16089012440633654, "grad_norm": 0.005255574360489845, "learning_rate": 0.001, "loss": 0.4395, "step": 5831 }, { "epoch": 0.16091771660740092, "grad_norm": 0.0031837450806051493, "learning_rate": 0.001, "loss": 0.4005, "step": 5832 }, { "epoch": 0.16094530880846528, "grad_norm": 0.0028844368644058704, "learning_rate": 0.001, "loss": 0.4413, "step": 5833 }, { "epoch": 0.16097290100952966, "grad_norm": 0.0036064505111426115, "learning_rate": 0.001, "loss": 0.4039, "step": 5834 }, { "epoch": 0.16100049321059404, "grad_norm": 0.0031712886411696672, "learning_rate": 0.001, "loss": 0.3584, "step": 5835 }, { "epoch": 0.1610280854116584, "grad_norm": 0.0029163220897316933, "learning_rate": 0.001, "loss": 0.4165, "step": 5836 }, { "epoch": 0.16105567761272277, "grad_norm": 0.002263015601783991, "learning_rate": 0.001, "loss": 0.3831, "step": 5837 }, { "epoch": 0.16108326981378712, "grad_norm": 0.0027008019387722015, "learning_rate": 0.001, "loss": 0.4, "step": 5838 }, { "epoch": 0.1611108620148515, "grad_norm": 0.002291950862854719, "learning_rate": 0.001, "loss": 0.384, "step": 5839 }, { "epoch": 0.16113845421591588, "grad_norm": 0.003169293748214841, "learning_rate": 0.001, "loss": 0.3912, "step": 5840 }, { "epoch": 0.16116604641698024, "grad_norm": 0.0032045808620750904, "learning_rate": 0.001, "loss": 0.4201, "step": 5841 }, { "epoch": 0.16119363861804462, "grad_norm": 0.00491705909371376, "learning_rate": 0.001, "loss": 0.3684, "step": 5842 }, { "epoch": 0.16122123081910897, "grad_norm": 0.007085980381816626, "learning_rate": 0.001, "loss": 0.3874, "step": 5843 }, { "epoch": 0.16124882302017335, "grad_norm": 0.0026187379844486713, "learning_rate": 0.001, "loss": 0.3899, "step": 5844 }, { "epoch": 0.16127641522123773, "grad_norm": 0.0021291703451424837, "learning_rate": 0.001, "loss": 0.4321, "step": 5845 }, { "epoch": 0.16130400742230208, "grad_norm": 0.0025390980299562216, "learning_rate": 0.001, "loss": 0.3974, "step": 5846 }, { "epoch": 0.16133159962336646, "grad_norm": 0.0024054215755313635, "learning_rate": 0.001, "loss": 0.3825, "step": 5847 }, { "epoch": 0.16135919182443081, "grad_norm": 0.0030334896873682737, "learning_rate": 0.001, "loss": 0.3689, "step": 5848 }, { "epoch": 0.1613867840254952, "grad_norm": 0.004912371281534433, "learning_rate": 0.001, "loss": 0.3735, "step": 5849 }, { "epoch": 0.16141437622655957, "grad_norm": 0.00569628132507205, "learning_rate": 0.001, "loss": 0.4294, "step": 5850 }, { "epoch": 0.16144196842762393, "grad_norm": 0.01722198724746704, "learning_rate": 0.001, "loss": 0.3729, "step": 5851 }, { "epoch": 0.1614695606286883, "grad_norm": 0.00705372542142868, "learning_rate": 0.001, "loss": 0.3854, "step": 5852 }, { "epoch": 0.16149715282975266, "grad_norm": 0.0025785481557250023, "learning_rate": 0.001, "loss": 0.4384, "step": 5853 }, { "epoch": 0.16152474503081704, "grad_norm": 0.0024269360583275557, "learning_rate": 0.001, "loss": 0.3897, "step": 5854 }, { "epoch": 0.16155233723188142, "grad_norm": 0.0034063782077282667, "learning_rate": 0.001, "loss": 0.4463, "step": 5855 }, { "epoch": 0.16157992943294577, "grad_norm": 0.0034151726868003607, "learning_rate": 0.001, "loss": 0.4093, "step": 5856 }, { "epoch": 0.16160752163401015, "grad_norm": 0.0038089072331786156, "learning_rate": 0.001, "loss": 0.386, "step": 5857 }, { "epoch": 0.1616351138350745, "grad_norm": 0.0021995375864207745, "learning_rate": 0.001, "loss": 0.42, "step": 5858 }, { "epoch": 0.1616627060361389, "grad_norm": 0.004200818948447704, "learning_rate": 0.001, "loss": 0.4232, "step": 5859 }, { "epoch": 0.16169029823720327, "grad_norm": 0.002936551347374916, "learning_rate": 0.001, "loss": 0.3806, "step": 5860 }, { "epoch": 0.16171789043826762, "grad_norm": 0.0025540448259562254, "learning_rate": 0.001, "loss": 0.4272, "step": 5861 }, { "epoch": 0.161745482639332, "grad_norm": 0.0026053600013256073, "learning_rate": 0.001, "loss": 0.3827, "step": 5862 }, { "epoch": 0.16177307484039635, "grad_norm": 0.0023329800460487604, "learning_rate": 0.001, "loss": 0.3666, "step": 5863 }, { "epoch": 0.16180066704146073, "grad_norm": 0.004357020370662212, "learning_rate": 0.001, "loss": 0.3969, "step": 5864 }, { "epoch": 0.1618282592425251, "grad_norm": 0.0025468000676482916, "learning_rate": 0.001, "loss": 0.4233, "step": 5865 }, { "epoch": 0.16185585144358947, "grad_norm": 0.004209857899695635, "learning_rate": 0.001, "loss": 0.362, "step": 5866 }, { "epoch": 0.16188344364465385, "grad_norm": 0.005030781961977482, "learning_rate": 0.001, "loss": 0.4034, "step": 5867 }, { "epoch": 0.1619110358457182, "grad_norm": 0.0026541538536548615, "learning_rate": 0.001, "loss": 0.4342, "step": 5868 }, { "epoch": 0.16193862804678258, "grad_norm": 0.0038330042734742165, "learning_rate": 0.001, "loss": 0.3936, "step": 5869 }, { "epoch": 0.16196622024784696, "grad_norm": 0.004821475129574537, "learning_rate": 0.001, "loss": 0.413, "step": 5870 }, { "epoch": 0.1619938124489113, "grad_norm": 0.0028450365643948317, "learning_rate": 0.001, "loss": 0.3942, "step": 5871 }, { "epoch": 0.1620214046499757, "grad_norm": 0.0027043556328862906, "learning_rate": 0.001, "loss": 0.4009, "step": 5872 }, { "epoch": 0.16204899685104004, "grad_norm": 0.0035783706698566675, "learning_rate": 0.001, "loss": 0.3574, "step": 5873 }, { "epoch": 0.16207658905210443, "grad_norm": 0.0049888514913618565, "learning_rate": 0.001, "loss": 0.3756, "step": 5874 }, { "epoch": 0.1621041812531688, "grad_norm": 0.0031105815432965755, "learning_rate": 0.001, "loss": 0.4, "step": 5875 }, { "epoch": 0.16213177345423316, "grad_norm": 0.0034052070695906878, "learning_rate": 0.001, "loss": 0.4151, "step": 5876 }, { "epoch": 0.16215936565529754, "grad_norm": 0.009355590678751469, "learning_rate": 0.001, "loss": 0.3995, "step": 5877 }, { "epoch": 0.1621869578563619, "grad_norm": 0.0034844528418034315, "learning_rate": 0.001, "loss": 0.4489, "step": 5878 }, { "epoch": 0.16221455005742627, "grad_norm": 0.0033237759489566088, "learning_rate": 0.001, "loss": 0.4136, "step": 5879 }, { "epoch": 0.16224214225849065, "grad_norm": 0.0030154725536704063, "learning_rate": 0.001, "loss": 0.3679, "step": 5880 }, { "epoch": 0.162269734459555, "grad_norm": 0.00340478727594018, "learning_rate": 0.001, "loss": 0.3674, "step": 5881 }, { "epoch": 0.16229732666061938, "grad_norm": 0.003332821885123849, "learning_rate": 0.001, "loss": 0.3668, "step": 5882 }, { "epoch": 0.16232491886168374, "grad_norm": 0.00302005629055202, "learning_rate": 0.001, "loss": 0.4317, "step": 5883 }, { "epoch": 0.16235251106274812, "grad_norm": 0.0027085014153271914, "learning_rate": 0.001, "loss": 0.379, "step": 5884 }, { "epoch": 0.16238010326381247, "grad_norm": 0.0031871707178652287, "learning_rate": 0.001, "loss": 0.3755, "step": 5885 }, { "epoch": 0.16240769546487685, "grad_norm": 0.0026585953310132027, "learning_rate": 0.001, "loss": 0.3933, "step": 5886 }, { "epoch": 0.16243528766594123, "grad_norm": 0.0022077590692788363, "learning_rate": 0.001, "loss": 0.3791, "step": 5887 }, { "epoch": 0.16246287986700558, "grad_norm": 0.0099159125238657, "learning_rate": 0.001, "loss": 0.4363, "step": 5888 }, { "epoch": 0.16249047206806996, "grad_norm": 0.004561326466500759, "learning_rate": 0.001, "loss": 0.4033, "step": 5889 }, { "epoch": 0.16251806426913432, "grad_norm": 0.004134157672524452, "learning_rate": 0.001, "loss": 0.4241, "step": 5890 }, { "epoch": 0.1625456564701987, "grad_norm": 0.0028744975570589304, "learning_rate": 0.001, "loss": 0.4235, "step": 5891 }, { "epoch": 0.16257324867126308, "grad_norm": 0.0063214050605893135, "learning_rate": 0.001, "loss": 0.4302, "step": 5892 }, { "epoch": 0.16260084087232743, "grad_norm": 0.0033373129554092884, "learning_rate": 0.001, "loss": 0.4211, "step": 5893 }, { "epoch": 0.1626284330733918, "grad_norm": 0.007856364361941814, "learning_rate": 0.001, "loss": 0.3508, "step": 5894 }, { "epoch": 0.16265602527445616, "grad_norm": 0.005610327236354351, "learning_rate": 0.001, "loss": 0.4022, "step": 5895 }, { "epoch": 0.16268361747552054, "grad_norm": 0.0030173116829246283, "learning_rate": 0.001, "loss": 0.4308, "step": 5896 }, { "epoch": 0.16271120967658492, "grad_norm": 0.007099445443600416, "learning_rate": 0.001, "loss": 0.4093, "step": 5897 }, { "epoch": 0.16273880187764928, "grad_norm": 0.0066776215098798275, "learning_rate": 0.001, "loss": 0.425, "step": 5898 }, { "epoch": 0.16276639407871366, "grad_norm": 0.0048042964190244675, "learning_rate": 0.001, "loss": 0.4283, "step": 5899 }, { "epoch": 0.162793986279778, "grad_norm": 0.013856537640094757, "learning_rate": 0.001, "loss": 0.3666, "step": 5900 }, { "epoch": 0.1628215784808424, "grad_norm": 0.00262752384878695, "learning_rate": 0.001, "loss": 0.4292, "step": 5901 }, { "epoch": 0.16284917068190677, "grad_norm": 0.004112104419618845, "learning_rate": 0.001, "loss": 0.3826, "step": 5902 }, { "epoch": 0.16287676288297112, "grad_norm": 0.0023975521326065063, "learning_rate": 0.001, "loss": 0.4522, "step": 5903 }, { "epoch": 0.1629043550840355, "grad_norm": 0.003720303997397423, "learning_rate": 0.001, "loss": 0.3911, "step": 5904 }, { "epoch": 0.16293194728509985, "grad_norm": 0.0029375324957072735, "learning_rate": 0.001, "loss": 0.4242, "step": 5905 }, { "epoch": 0.16295953948616423, "grad_norm": 0.003167448565363884, "learning_rate": 0.001, "loss": 0.4212, "step": 5906 }, { "epoch": 0.16298713168722861, "grad_norm": 0.0032344404608011246, "learning_rate": 0.001, "loss": 0.4288, "step": 5907 }, { "epoch": 0.16301472388829297, "grad_norm": 0.0028254149947315454, "learning_rate": 0.001, "loss": 0.3897, "step": 5908 }, { "epoch": 0.16304231608935735, "grad_norm": 0.0026103025302290916, "learning_rate": 0.001, "loss": 0.3859, "step": 5909 }, { "epoch": 0.1630699082904217, "grad_norm": 0.0032244473695755005, "learning_rate": 0.001, "loss": 0.4391, "step": 5910 }, { "epoch": 0.16309750049148608, "grad_norm": 0.005498593673110008, "learning_rate": 0.001, "loss": 0.3777, "step": 5911 }, { "epoch": 0.16312509269255046, "grad_norm": 0.002438473980873823, "learning_rate": 0.001, "loss": 0.3835, "step": 5912 }, { "epoch": 0.1631526848936148, "grad_norm": 0.002533574588596821, "learning_rate": 0.001, "loss": 0.4007, "step": 5913 }, { "epoch": 0.1631802770946792, "grad_norm": 0.0030868607573211193, "learning_rate": 0.001, "loss": 0.3878, "step": 5914 }, { "epoch": 0.16320786929574355, "grad_norm": 0.002708959858864546, "learning_rate": 0.001, "loss": 0.4111, "step": 5915 }, { "epoch": 0.16323546149680793, "grad_norm": 0.0037433947436511517, "learning_rate": 0.001, "loss": 0.4038, "step": 5916 }, { "epoch": 0.1632630536978723, "grad_norm": 0.0034228821750730276, "learning_rate": 0.001, "loss": 0.397, "step": 5917 }, { "epoch": 0.16329064589893666, "grad_norm": 0.0021487257909029722, "learning_rate": 0.001, "loss": 0.4127, "step": 5918 }, { "epoch": 0.16331823810000104, "grad_norm": 0.008542514406144619, "learning_rate": 0.001, "loss": 0.3763, "step": 5919 }, { "epoch": 0.1633458303010654, "grad_norm": 0.0028414383996278048, "learning_rate": 0.001, "loss": 0.3694, "step": 5920 }, { "epoch": 0.16337342250212977, "grad_norm": 0.003626716323196888, "learning_rate": 0.001, "loss": 0.3819, "step": 5921 }, { "epoch": 0.16340101470319415, "grad_norm": 0.004335957579314709, "learning_rate": 0.001, "loss": 0.3939, "step": 5922 }, { "epoch": 0.1634286069042585, "grad_norm": 0.003027878934517503, "learning_rate": 0.001, "loss": 0.4225, "step": 5923 }, { "epoch": 0.16345619910532289, "grad_norm": 0.002936540637165308, "learning_rate": 0.001, "loss": 0.3906, "step": 5924 }, { "epoch": 0.16348379130638724, "grad_norm": 0.004033949691802263, "learning_rate": 0.001, "loss": 0.4212, "step": 5925 }, { "epoch": 0.16351138350745162, "grad_norm": 0.003175846766680479, "learning_rate": 0.001, "loss": 0.4137, "step": 5926 }, { "epoch": 0.163538975708516, "grad_norm": 0.002765644108876586, "learning_rate": 0.001, "loss": 0.4257, "step": 5927 }, { "epoch": 0.16356656790958035, "grad_norm": 0.0026081749238073826, "learning_rate": 0.001, "loss": 0.46, "step": 5928 }, { "epoch": 0.16359416011064473, "grad_norm": 0.002697083866223693, "learning_rate": 0.001, "loss": 0.4118, "step": 5929 }, { "epoch": 0.16362175231170908, "grad_norm": 0.0040725781582295895, "learning_rate": 0.001, "loss": 0.4067, "step": 5930 }, { "epoch": 0.16364934451277346, "grad_norm": 0.0018469118513166904, "learning_rate": 0.001, "loss": 0.4157, "step": 5931 }, { "epoch": 0.16367693671383785, "grad_norm": 0.0027053162921220064, "learning_rate": 0.001, "loss": 0.3853, "step": 5932 }, { "epoch": 0.1637045289149022, "grad_norm": 0.0021168517414480448, "learning_rate": 0.001, "loss": 0.4013, "step": 5933 }, { "epoch": 0.16373212111596658, "grad_norm": 0.002157708862796426, "learning_rate": 0.001, "loss": 0.3991, "step": 5934 }, { "epoch": 0.16375971331703093, "grad_norm": 0.00347045436501503, "learning_rate": 0.001, "loss": 0.3945, "step": 5935 }, { "epoch": 0.1637873055180953, "grad_norm": 0.0034281827975064516, "learning_rate": 0.001, "loss": 0.4147, "step": 5936 }, { "epoch": 0.1638148977191597, "grad_norm": 0.005607214290648699, "learning_rate": 0.001, "loss": 0.3952, "step": 5937 }, { "epoch": 0.16384248992022404, "grad_norm": 0.0018512718379497528, "learning_rate": 0.001, "loss": 0.4062, "step": 5938 }, { "epoch": 0.16387008212128842, "grad_norm": 0.005284006241708994, "learning_rate": 0.001, "loss": 0.4104, "step": 5939 }, { "epoch": 0.16389767432235278, "grad_norm": 0.006320610176771879, "learning_rate": 0.001, "loss": 0.3999, "step": 5940 }, { "epoch": 0.16392526652341716, "grad_norm": 0.0025526133831590414, "learning_rate": 0.001, "loss": 0.3816, "step": 5941 }, { "epoch": 0.16395285872448154, "grad_norm": 0.0021038020495325327, "learning_rate": 0.001, "loss": 0.4013, "step": 5942 }, { "epoch": 0.1639804509255459, "grad_norm": 0.0026413672603666782, "learning_rate": 0.001, "loss": 0.4036, "step": 5943 }, { "epoch": 0.16400804312661027, "grad_norm": 0.004636452533304691, "learning_rate": 0.001, "loss": 0.4089, "step": 5944 }, { "epoch": 0.16403563532767462, "grad_norm": 0.005048688966780901, "learning_rate": 0.001, "loss": 0.4018, "step": 5945 }, { "epoch": 0.164063227528739, "grad_norm": 0.0032856205943971872, "learning_rate": 0.001, "loss": 0.4095, "step": 5946 }, { "epoch": 0.16409081972980338, "grad_norm": 0.0027474837843328714, "learning_rate": 0.001, "loss": 0.4642, "step": 5947 }, { "epoch": 0.16411841193086774, "grad_norm": 0.004227146506309509, "learning_rate": 0.001, "loss": 0.3778, "step": 5948 }, { "epoch": 0.16414600413193212, "grad_norm": 0.00409206748008728, "learning_rate": 0.001, "loss": 0.4036, "step": 5949 }, { "epoch": 0.16417359633299647, "grad_norm": 0.004566535819321871, "learning_rate": 0.001, "loss": 0.4191, "step": 5950 }, { "epoch": 0.16420118853406085, "grad_norm": 0.0042169406078755856, "learning_rate": 0.001, "loss": 0.384, "step": 5951 }, { "epoch": 0.16422878073512523, "grad_norm": 0.0025692598428577185, "learning_rate": 0.001, "loss": 0.4062, "step": 5952 }, { "epoch": 0.16425637293618958, "grad_norm": 0.0040415204130113125, "learning_rate": 0.001, "loss": 0.4316, "step": 5953 }, { "epoch": 0.16428396513725396, "grad_norm": 0.002447271952405572, "learning_rate": 0.001, "loss": 0.3831, "step": 5954 }, { "epoch": 0.16431155733831831, "grad_norm": 0.00370286637917161, "learning_rate": 0.001, "loss": 0.3911, "step": 5955 }, { "epoch": 0.1643391495393827, "grad_norm": 0.0034686720464378595, "learning_rate": 0.001, "loss": 0.3902, "step": 5956 }, { "epoch": 0.16436674174044708, "grad_norm": 0.002705411519855261, "learning_rate": 0.001, "loss": 0.4178, "step": 5957 }, { "epoch": 0.16439433394151143, "grad_norm": 0.003815694246441126, "learning_rate": 0.001, "loss": 0.4295, "step": 5958 }, { "epoch": 0.1644219261425758, "grad_norm": 0.0033353432081639767, "learning_rate": 0.001, "loss": 0.4033, "step": 5959 }, { "epoch": 0.16444951834364016, "grad_norm": 0.003151560202240944, "learning_rate": 0.001, "loss": 0.4065, "step": 5960 }, { "epoch": 0.16447711054470454, "grad_norm": 0.0059262774884700775, "learning_rate": 0.001, "loss": 0.4036, "step": 5961 }, { "epoch": 0.16450470274576892, "grad_norm": 0.0027054711245000362, "learning_rate": 0.001, "loss": 0.3902, "step": 5962 }, { "epoch": 0.16453229494683327, "grad_norm": 0.005763984750956297, "learning_rate": 0.001, "loss": 0.3304, "step": 5963 }, { "epoch": 0.16455988714789765, "grad_norm": 0.002855742583051324, "learning_rate": 0.001, "loss": 0.3459, "step": 5964 }, { "epoch": 0.164587479348962, "grad_norm": 0.003007008694112301, "learning_rate": 0.001, "loss": 0.425, "step": 5965 }, { "epoch": 0.1646150715500264, "grad_norm": 0.0030072838999330997, "learning_rate": 0.001, "loss": 0.3738, "step": 5966 }, { "epoch": 0.16464266375109077, "grad_norm": 0.005015561822801828, "learning_rate": 0.001, "loss": 0.3966, "step": 5967 }, { "epoch": 0.16467025595215512, "grad_norm": 0.0044816480949521065, "learning_rate": 0.001, "loss": 0.4192, "step": 5968 }, { "epoch": 0.1646978481532195, "grad_norm": 0.0035232999362051487, "learning_rate": 0.001, "loss": 0.3555, "step": 5969 }, { "epoch": 0.16472544035428385, "grad_norm": 0.0043287379667162895, "learning_rate": 0.001, "loss": 0.3861, "step": 5970 }, { "epoch": 0.16475303255534823, "grad_norm": 0.00282704783603549, "learning_rate": 0.001, "loss": 0.4321, "step": 5971 }, { "epoch": 0.1647806247564126, "grad_norm": 0.004027531016618013, "learning_rate": 0.001, "loss": 0.4018, "step": 5972 }, { "epoch": 0.16480821695747697, "grad_norm": 0.011107796803116798, "learning_rate": 0.001, "loss": 0.4055, "step": 5973 }, { "epoch": 0.16483580915854135, "grad_norm": 0.004610290750861168, "learning_rate": 0.001, "loss": 0.4326, "step": 5974 }, { "epoch": 0.1648634013596057, "grad_norm": 0.002399605233222246, "learning_rate": 0.001, "loss": 0.3964, "step": 5975 }, { "epoch": 0.16489099356067008, "grad_norm": 0.004088590387254953, "learning_rate": 0.001, "loss": 0.414, "step": 5976 }, { "epoch": 0.16491858576173443, "grad_norm": 0.0037160133942961693, "learning_rate": 0.001, "loss": 0.3613, "step": 5977 }, { "epoch": 0.1649461779627988, "grad_norm": 0.004553014412522316, "learning_rate": 0.001, "loss": 0.414, "step": 5978 }, { "epoch": 0.1649737701638632, "grad_norm": 0.0035007258411496878, "learning_rate": 0.001, "loss": 0.3974, "step": 5979 }, { "epoch": 0.16500136236492755, "grad_norm": 0.0026380368508398533, "learning_rate": 0.001, "loss": 0.3852, "step": 5980 }, { "epoch": 0.16502895456599193, "grad_norm": 0.0043465979397296906, "learning_rate": 0.001, "loss": 0.3681, "step": 5981 }, { "epoch": 0.16505654676705628, "grad_norm": 0.00366704142652452, "learning_rate": 0.001, "loss": 0.3833, "step": 5982 }, { "epoch": 0.16508413896812066, "grad_norm": 0.002497212029993534, "learning_rate": 0.001, "loss": 0.4077, "step": 5983 }, { "epoch": 0.16511173116918504, "grad_norm": 0.007907644845545292, "learning_rate": 0.001, "loss": 0.407, "step": 5984 }, { "epoch": 0.1651393233702494, "grad_norm": 0.0024078672286123037, "learning_rate": 0.001, "loss": 0.4098, "step": 5985 }, { "epoch": 0.16516691557131377, "grad_norm": 0.003503212472423911, "learning_rate": 0.001, "loss": 0.4093, "step": 5986 }, { "epoch": 0.16519450777237812, "grad_norm": 0.00796839315444231, "learning_rate": 0.001, "loss": 0.4138, "step": 5987 }, { "epoch": 0.1652220999734425, "grad_norm": 0.007227026857435703, "learning_rate": 0.001, "loss": 0.367, "step": 5988 }, { "epoch": 0.16524969217450688, "grad_norm": 0.0025876981671899557, "learning_rate": 0.001, "loss": 0.4025, "step": 5989 }, { "epoch": 0.16527728437557124, "grad_norm": 0.0023945660796016455, "learning_rate": 0.001, "loss": 0.4076, "step": 5990 }, { "epoch": 0.16530487657663562, "grad_norm": 0.0032521584071218967, "learning_rate": 0.001, "loss": 0.399, "step": 5991 }, { "epoch": 0.16533246877769997, "grad_norm": 0.0025216443464159966, "learning_rate": 0.001, "loss": 0.4063, "step": 5992 }, { "epoch": 0.16536006097876435, "grad_norm": 0.0021180277690291405, "learning_rate": 0.001, "loss": 0.4105, "step": 5993 }, { "epoch": 0.16538765317982873, "grad_norm": 0.002108354354277253, "learning_rate": 0.001, "loss": 0.4072, "step": 5994 }, { "epoch": 0.16541524538089308, "grad_norm": 0.0022766771726310253, "learning_rate": 0.001, "loss": 0.4213, "step": 5995 }, { "epoch": 0.16544283758195746, "grad_norm": 0.0022483475040644407, "learning_rate": 0.001, "loss": 0.389, "step": 5996 }, { "epoch": 0.16547042978302182, "grad_norm": 0.006487622391432524, "learning_rate": 0.001, "loss": 0.4283, "step": 5997 }, { "epoch": 0.1654980219840862, "grad_norm": 0.00356247927993536, "learning_rate": 0.001, "loss": 0.3738, "step": 5998 }, { "epoch": 0.16552561418515058, "grad_norm": 0.002978307893499732, "learning_rate": 0.001, "loss": 0.3778, "step": 5999 }, { "epoch": 0.16555320638621493, "grad_norm": 0.007124268915504217, "learning_rate": 0.001, "loss": 0.3882, "step": 6000 }, { "epoch": 0.16555320638621493, "eval_runtime": 24.3281, "eval_samples_per_second": 1.315, "eval_steps_per_second": 0.164, "step": 6000 }, { "epoch": 0.1655807985872793, "grad_norm": 0.0031695945654064417, "learning_rate": 0.001, "loss": 0.4021, "step": 6001 }, { "epoch": 0.16560839078834366, "grad_norm": 0.0024594555143266916, "learning_rate": 0.001, "loss": 0.3992, "step": 6002 }, { "epoch": 0.16563598298940804, "grad_norm": 0.002371816663071513, "learning_rate": 0.001, "loss": 0.4138, "step": 6003 }, { "epoch": 0.16566357519047242, "grad_norm": 0.0034170527942478657, "learning_rate": 0.001, "loss": 0.3857, "step": 6004 }, { "epoch": 0.16569116739153678, "grad_norm": 0.0031782693695276976, "learning_rate": 0.001, "loss": 0.3954, "step": 6005 }, { "epoch": 0.16571875959260116, "grad_norm": 0.0026943848934024572, "learning_rate": 0.001, "loss": 0.3939, "step": 6006 }, { "epoch": 0.1657463517936655, "grad_norm": 0.003218788420781493, "learning_rate": 0.001, "loss": 0.397, "step": 6007 }, { "epoch": 0.1657739439947299, "grad_norm": 0.0070941089652478695, "learning_rate": 0.001, "loss": 0.3705, "step": 6008 }, { "epoch": 0.16580153619579427, "grad_norm": 0.0025819384027272463, "learning_rate": 0.001, "loss": 0.3838, "step": 6009 }, { "epoch": 0.16582912839685862, "grad_norm": 0.003531514434143901, "learning_rate": 0.001, "loss": 0.383, "step": 6010 }, { "epoch": 0.165856720597923, "grad_norm": 0.006012933328747749, "learning_rate": 0.001, "loss": 0.383, "step": 6011 }, { "epoch": 0.16588431279898735, "grad_norm": 0.0036130910739302635, "learning_rate": 0.001, "loss": 0.3882, "step": 6012 }, { "epoch": 0.16591190500005173, "grad_norm": 0.0030107556376606226, "learning_rate": 0.001, "loss": 0.3775, "step": 6013 }, { "epoch": 0.16593949720111612, "grad_norm": 0.0032329028472304344, "learning_rate": 0.001, "loss": 0.3742, "step": 6014 }, { "epoch": 0.16596708940218047, "grad_norm": 0.002732783555984497, "learning_rate": 0.001, "loss": 0.4053, "step": 6015 }, { "epoch": 0.16599468160324485, "grad_norm": 0.004081232473254204, "learning_rate": 0.001, "loss": 0.3952, "step": 6016 }, { "epoch": 0.1660222738043092, "grad_norm": 0.1196216568350792, "learning_rate": 0.001, "loss": 0.3624, "step": 6017 }, { "epoch": 0.16604986600537358, "grad_norm": 0.005355612374842167, "learning_rate": 0.001, "loss": 0.3804, "step": 6018 }, { "epoch": 0.16607745820643796, "grad_norm": 0.003674236126244068, "learning_rate": 0.001, "loss": 0.378, "step": 6019 }, { "epoch": 0.1661050504075023, "grad_norm": 0.002634497359395027, "learning_rate": 0.001, "loss": 0.3923, "step": 6020 }, { "epoch": 0.1661326426085667, "grad_norm": 0.0034477454610168934, "learning_rate": 0.001, "loss": 0.3945, "step": 6021 }, { "epoch": 0.16616023480963105, "grad_norm": 0.006275518331676722, "learning_rate": 0.001, "loss": 0.39, "step": 6022 }, { "epoch": 0.16618782701069543, "grad_norm": 0.0067554921843111515, "learning_rate": 0.001, "loss": 0.3807, "step": 6023 }, { "epoch": 0.1662154192117598, "grad_norm": 0.0043805669993162155, "learning_rate": 0.001, "loss": 0.4091, "step": 6024 }, { "epoch": 0.16624301141282416, "grad_norm": 0.0032864839304238558, "learning_rate": 0.001, "loss": 0.3718, "step": 6025 }, { "epoch": 0.16627060361388854, "grad_norm": 0.0033326733391731977, "learning_rate": 0.001, "loss": 0.3709, "step": 6026 }, { "epoch": 0.1662981958149529, "grad_norm": 0.00480731762945652, "learning_rate": 0.001, "loss": 0.4116, "step": 6027 }, { "epoch": 0.16632578801601727, "grad_norm": 0.0029019401408731937, "learning_rate": 0.001, "loss": 0.4049, "step": 6028 }, { "epoch": 0.16635338021708165, "grad_norm": 0.002836265368387103, "learning_rate": 0.001, "loss": 0.3866, "step": 6029 }, { "epoch": 0.166380972418146, "grad_norm": 0.002911055227741599, "learning_rate": 0.001, "loss": 0.4002, "step": 6030 }, { "epoch": 0.1664085646192104, "grad_norm": 0.002560385735705495, "learning_rate": 0.001, "loss": 0.3841, "step": 6031 }, { "epoch": 0.16643615682027474, "grad_norm": 0.003303447039797902, "learning_rate": 0.001, "loss": 0.3975, "step": 6032 }, { "epoch": 0.16646374902133912, "grad_norm": 0.0026523631531745195, "learning_rate": 0.001, "loss": 0.3821, "step": 6033 }, { "epoch": 0.1664913412224035, "grad_norm": 0.0037518537137657404, "learning_rate": 0.001, "loss": 0.4, "step": 6034 }, { "epoch": 0.16651893342346785, "grad_norm": 0.0027246454264968634, "learning_rate": 0.001, "loss": 0.4288, "step": 6035 }, { "epoch": 0.16654652562453223, "grad_norm": 0.012342111207544804, "learning_rate": 0.001, "loss": 0.4144, "step": 6036 }, { "epoch": 0.16657411782559658, "grad_norm": 0.005223266314715147, "learning_rate": 0.001, "loss": 0.4332, "step": 6037 }, { "epoch": 0.16660171002666097, "grad_norm": 0.003073178231716156, "learning_rate": 0.001, "loss": 0.4051, "step": 6038 }, { "epoch": 0.16662930222772535, "grad_norm": 0.004849819000810385, "learning_rate": 0.001, "loss": 0.4385, "step": 6039 }, { "epoch": 0.1666568944287897, "grad_norm": 0.002936877077445388, "learning_rate": 0.001, "loss": 0.3801, "step": 6040 }, { "epoch": 0.16668448662985408, "grad_norm": 0.002828566124662757, "learning_rate": 0.001, "loss": 0.3962, "step": 6041 }, { "epoch": 0.16671207883091843, "grad_norm": 0.0030576810240745544, "learning_rate": 0.001, "loss": 0.4031, "step": 6042 }, { "epoch": 0.1667396710319828, "grad_norm": 0.0025680421385914087, "learning_rate": 0.001, "loss": 0.4002, "step": 6043 }, { "epoch": 0.1667672632330472, "grad_norm": 0.004450993146747351, "learning_rate": 0.001, "loss": 0.3943, "step": 6044 }, { "epoch": 0.16679485543411154, "grad_norm": 0.0038969400338828564, "learning_rate": 0.001, "loss": 0.4425, "step": 6045 }, { "epoch": 0.16682244763517592, "grad_norm": 0.003842557780444622, "learning_rate": 0.001, "loss": 0.4202, "step": 6046 }, { "epoch": 0.16685003983624028, "grad_norm": 0.002472433727234602, "learning_rate": 0.001, "loss": 0.391, "step": 6047 }, { "epoch": 0.16687763203730466, "grad_norm": 0.00535456370562315, "learning_rate": 0.001, "loss": 0.4123, "step": 6048 }, { "epoch": 0.16690522423836904, "grad_norm": 0.002608702052384615, "learning_rate": 0.001, "loss": 0.4108, "step": 6049 }, { "epoch": 0.1669328164394334, "grad_norm": 0.0029743602499365807, "learning_rate": 0.001, "loss": 0.3424, "step": 6050 }, { "epoch": 0.16696040864049777, "grad_norm": 0.0025020604953169823, "learning_rate": 0.001, "loss": 0.3778, "step": 6051 }, { "epoch": 0.16698800084156212, "grad_norm": 0.0027822526171803474, "learning_rate": 0.001, "loss": 0.4124, "step": 6052 }, { "epoch": 0.1670155930426265, "grad_norm": 0.0028931559063494205, "learning_rate": 0.001, "loss": 0.4171, "step": 6053 }, { "epoch": 0.16704318524369088, "grad_norm": 0.0022724780719727278, "learning_rate": 0.001, "loss": 0.3975, "step": 6054 }, { "epoch": 0.16707077744475524, "grad_norm": 0.004654435440897942, "learning_rate": 0.001, "loss": 0.3738, "step": 6055 }, { "epoch": 0.16709836964581962, "grad_norm": 0.002734872279688716, "learning_rate": 0.001, "loss": 0.4039, "step": 6056 }, { "epoch": 0.16712596184688397, "grad_norm": 0.0025022958870977163, "learning_rate": 0.001, "loss": 0.4061, "step": 6057 }, { "epoch": 0.16715355404794835, "grad_norm": 0.004890537820756435, "learning_rate": 0.001, "loss": 0.4065, "step": 6058 }, { "epoch": 0.16718114624901273, "grad_norm": 0.0023612927179783583, "learning_rate": 0.001, "loss": 0.3985, "step": 6059 }, { "epoch": 0.16720873845007708, "grad_norm": 0.002075622323900461, "learning_rate": 0.001, "loss": 0.4118, "step": 6060 }, { "epoch": 0.16723633065114146, "grad_norm": 0.004314557649195194, "learning_rate": 0.001, "loss": 0.4039, "step": 6061 }, { "epoch": 0.16726392285220582, "grad_norm": 0.0022840569727122784, "learning_rate": 0.001, "loss": 0.3853, "step": 6062 }, { "epoch": 0.1672915150532702, "grad_norm": 0.0027527734637260437, "learning_rate": 0.001, "loss": 0.3942, "step": 6063 }, { "epoch": 0.16731910725433458, "grad_norm": 0.0024163657799363136, "learning_rate": 0.001, "loss": 0.4241, "step": 6064 }, { "epoch": 0.16734669945539893, "grad_norm": 0.0026521470863372087, "learning_rate": 0.001, "loss": 0.4254, "step": 6065 }, { "epoch": 0.1673742916564633, "grad_norm": 0.00407033134251833, "learning_rate": 0.001, "loss": 0.3867, "step": 6066 }, { "epoch": 0.16740188385752766, "grad_norm": 0.0028122677467763424, "learning_rate": 0.001, "loss": 0.3722, "step": 6067 }, { "epoch": 0.16742947605859204, "grad_norm": 0.0021444158628582954, "learning_rate": 0.001, "loss": 0.432, "step": 6068 }, { "epoch": 0.16745706825965642, "grad_norm": 0.004711539018899202, "learning_rate": 0.001, "loss": 0.3673, "step": 6069 }, { "epoch": 0.16748466046072077, "grad_norm": 0.005842708982527256, "learning_rate": 0.001, "loss": 0.3871, "step": 6070 }, { "epoch": 0.16751225266178515, "grad_norm": 0.005441974848508835, "learning_rate": 0.001, "loss": 0.4022, "step": 6071 }, { "epoch": 0.1675398448628495, "grad_norm": 0.005875125993043184, "learning_rate": 0.001, "loss": 0.408, "step": 6072 }, { "epoch": 0.1675674370639139, "grad_norm": 0.002280894201248884, "learning_rate": 0.001, "loss": 0.4282, "step": 6073 }, { "epoch": 0.16759502926497824, "grad_norm": 0.0026473281905055046, "learning_rate": 0.001, "loss": 0.371, "step": 6074 }, { "epoch": 0.16762262146604262, "grad_norm": 0.006329023279249668, "learning_rate": 0.001, "loss": 0.4011, "step": 6075 }, { "epoch": 0.167650213667107, "grad_norm": 0.0029098165687173605, "learning_rate": 0.001, "loss": 0.3979, "step": 6076 }, { "epoch": 0.16767780586817135, "grad_norm": 0.005095341708511114, "learning_rate": 0.001, "loss": 0.4137, "step": 6077 }, { "epoch": 0.16770539806923573, "grad_norm": 0.002626903122290969, "learning_rate": 0.001, "loss": 0.3767, "step": 6078 }, { "epoch": 0.1677329902703001, "grad_norm": 0.006261000409722328, "learning_rate": 0.001, "loss": 0.4167, "step": 6079 }, { "epoch": 0.16776058247136447, "grad_norm": 0.0027065426111221313, "learning_rate": 0.001, "loss": 0.3699, "step": 6080 }, { "epoch": 0.16778817467242885, "grad_norm": 0.0033599373418837786, "learning_rate": 0.001, "loss": 0.3882, "step": 6081 }, { "epoch": 0.1678157668734932, "grad_norm": 0.0021663676016032696, "learning_rate": 0.001, "loss": 0.3641, "step": 6082 }, { "epoch": 0.16784335907455758, "grad_norm": 0.003881096374243498, "learning_rate": 0.001, "loss": 0.3787, "step": 6083 }, { "epoch": 0.16787095127562193, "grad_norm": 0.0030875105876475573, "learning_rate": 0.001, "loss": 0.3872, "step": 6084 }, { "epoch": 0.1678985434766863, "grad_norm": 0.003538950812071562, "learning_rate": 0.001, "loss": 0.4172, "step": 6085 }, { "epoch": 0.1679261356777507, "grad_norm": 0.003060020739212632, "learning_rate": 0.001, "loss": 0.4165, "step": 6086 }, { "epoch": 0.16795372787881505, "grad_norm": 0.0032102216500788927, "learning_rate": 0.001, "loss": 0.4264, "step": 6087 }, { "epoch": 0.16798132007987943, "grad_norm": 0.004532152321189642, "learning_rate": 0.001, "loss": 0.3611, "step": 6088 }, { "epoch": 0.16800891228094378, "grad_norm": 0.002951999893411994, "learning_rate": 0.001, "loss": 0.3728, "step": 6089 }, { "epoch": 0.16803650448200816, "grad_norm": 0.002537887077778578, "learning_rate": 0.001, "loss": 0.3939, "step": 6090 }, { "epoch": 0.16806409668307254, "grad_norm": 0.003645245684310794, "learning_rate": 0.001, "loss": 0.4323, "step": 6091 }, { "epoch": 0.1680916888841369, "grad_norm": 0.0038180460687726736, "learning_rate": 0.001, "loss": 0.3968, "step": 6092 }, { "epoch": 0.16811928108520127, "grad_norm": 0.002475427696481347, "learning_rate": 0.001, "loss": 0.4211, "step": 6093 }, { "epoch": 0.16814687328626562, "grad_norm": 0.017842723056674004, "learning_rate": 0.001, "loss": 0.3785, "step": 6094 }, { "epoch": 0.16817446548733, "grad_norm": 0.0030736385378986597, "learning_rate": 0.001, "loss": 0.4232, "step": 6095 }, { "epoch": 0.16820205768839439, "grad_norm": 0.004124650731682777, "learning_rate": 0.001, "loss": 0.424, "step": 6096 }, { "epoch": 0.16822964988945874, "grad_norm": 0.002600730862468481, "learning_rate": 0.001, "loss": 0.4044, "step": 6097 }, { "epoch": 0.16825724209052312, "grad_norm": 0.003001442411914468, "learning_rate": 0.001, "loss": 0.4263, "step": 6098 }, { "epoch": 0.16828483429158747, "grad_norm": 0.002522877650335431, "learning_rate": 0.001, "loss": 0.4041, "step": 6099 }, { "epoch": 0.16831242649265185, "grad_norm": 0.0038972371257841587, "learning_rate": 0.001, "loss": 0.3812, "step": 6100 }, { "epoch": 0.16834001869371623, "grad_norm": 0.0026751782279461622, "learning_rate": 0.001, "loss": 0.3849, "step": 6101 }, { "epoch": 0.16836761089478058, "grad_norm": 0.003704581642523408, "learning_rate": 0.001, "loss": 0.3573, "step": 6102 }, { "epoch": 0.16839520309584496, "grad_norm": 0.00222760159522295, "learning_rate": 0.001, "loss": 0.4439, "step": 6103 }, { "epoch": 0.16842279529690932, "grad_norm": 0.0037216665223240852, "learning_rate": 0.001, "loss": 0.3748, "step": 6104 }, { "epoch": 0.1684503874979737, "grad_norm": 0.0030143833719193935, "learning_rate": 0.001, "loss": 0.3928, "step": 6105 }, { "epoch": 0.16847797969903808, "grad_norm": 0.0032768959645181894, "learning_rate": 0.001, "loss": 0.3986, "step": 6106 }, { "epoch": 0.16850557190010243, "grad_norm": 0.0028334499802440405, "learning_rate": 0.001, "loss": 0.4071, "step": 6107 }, { "epoch": 0.1685331641011668, "grad_norm": 0.004226780030876398, "learning_rate": 0.001, "loss": 0.354, "step": 6108 }, { "epoch": 0.16856075630223116, "grad_norm": 0.0034058301243931055, "learning_rate": 0.001, "loss": 0.407, "step": 6109 }, { "epoch": 0.16858834850329554, "grad_norm": 0.003791308030486107, "learning_rate": 0.001, "loss": 0.3822, "step": 6110 }, { "epoch": 0.16861594070435992, "grad_norm": 0.0050668418407440186, "learning_rate": 0.001, "loss": 0.395, "step": 6111 }, { "epoch": 0.16864353290542428, "grad_norm": 0.01103867869824171, "learning_rate": 0.001, "loss": 0.3519, "step": 6112 }, { "epoch": 0.16867112510648866, "grad_norm": 0.003474986646324396, "learning_rate": 0.001, "loss": 0.405, "step": 6113 }, { "epoch": 0.168698717307553, "grad_norm": 0.0034254579804837704, "learning_rate": 0.001, "loss": 0.4403, "step": 6114 }, { "epoch": 0.1687263095086174, "grad_norm": 0.0021024683956056833, "learning_rate": 0.001, "loss": 0.4198, "step": 6115 }, { "epoch": 0.16875390170968177, "grad_norm": 0.0043007307685911655, "learning_rate": 0.001, "loss": 0.4036, "step": 6116 }, { "epoch": 0.16878149391074612, "grad_norm": 0.004201627802103758, "learning_rate": 0.001, "loss": 0.3919, "step": 6117 }, { "epoch": 0.1688090861118105, "grad_norm": 0.006456395611166954, "learning_rate": 0.001, "loss": 0.364, "step": 6118 }, { "epoch": 0.16883667831287485, "grad_norm": 0.006011181510984898, "learning_rate": 0.001, "loss": 0.3847, "step": 6119 }, { "epoch": 0.16886427051393924, "grad_norm": 0.0036535318940877914, "learning_rate": 0.001, "loss": 0.3934, "step": 6120 }, { "epoch": 0.16889186271500362, "grad_norm": 0.0032456032931804657, "learning_rate": 0.001, "loss": 0.4035, "step": 6121 }, { "epoch": 0.16891945491606797, "grad_norm": 0.0025099399499595165, "learning_rate": 0.001, "loss": 0.4001, "step": 6122 }, { "epoch": 0.16894704711713235, "grad_norm": 0.003860413795337081, "learning_rate": 0.001, "loss": 0.3662, "step": 6123 }, { "epoch": 0.1689746393181967, "grad_norm": 0.004212185274809599, "learning_rate": 0.001, "loss": 0.3793, "step": 6124 }, { "epoch": 0.16900223151926108, "grad_norm": 0.0037831286899745464, "learning_rate": 0.001, "loss": 0.3896, "step": 6125 }, { "epoch": 0.16902982372032546, "grad_norm": 0.002503293799236417, "learning_rate": 0.001, "loss": 0.3809, "step": 6126 }, { "epoch": 0.16905741592138981, "grad_norm": 0.0032346874941140413, "learning_rate": 0.001, "loss": 0.3859, "step": 6127 }, { "epoch": 0.1690850081224542, "grad_norm": 0.0029911173041909933, "learning_rate": 0.001, "loss": 0.3975, "step": 6128 }, { "epoch": 0.16911260032351855, "grad_norm": 0.003199785714969039, "learning_rate": 0.001, "loss": 0.3726, "step": 6129 }, { "epoch": 0.16914019252458293, "grad_norm": 0.0029822427313774824, "learning_rate": 0.001, "loss": 0.4123, "step": 6130 }, { "epoch": 0.1691677847256473, "grad_norm": 0.0030586915090680122, "learning_rate": 0.001, "loss": 0.4171, "step": 6131 }, { "epoch": 0.16919537692671166, "grad_norm": 0.0046739643439650536, "learning_rate": 0.001, "loss": 0.3746, "step": 6132 }, { "epoch": 0.16922296912777604, "grad_norm": 0.0033701432403177023, "learning_rate": 0.001, "loss": 0.3778, "step": 6133 }, { "epoch": 0.1692505613288404, "grad_norm": 0.003320742631331086, "learning_rate": 0.001, "loss": 0.3892, "step": 6134 }, { "epoch": 0.16927815352990477, "grad_norm": 0.005310896318405867, "learning_rate": 0.001, "loss": 0.4297, "step": 6135 }, { "epoch": 0.16930574573096915, "grad_norm": 0.003215103643015027, "learning_rate": 0.001, "loss": 0.3855, "step": 6136 }, { "epoch": 0.1693333379320335, "grad_norm": 0.002435446949675679, "learning_rate": 0.001, "loss": 0.4226, "step": 6137 }, { "epoch": 0.1693609301330979, "grad_norm": 0.009125195443630219, "learning_rate": 0.001, "loss": 0.3945, "step": 6138 }, { "epoch": 0.16938852233416224, "grad_norm": 0.002583020832389593, "learning_rate": 0.001, "loss": 0.4324, "step": 6139 }, { "epoch": 0.16941611453522662, "grad_norm": 0.003045174991711974, "learning_rate": 0.001, "loss": 0.3868, "step": 6140 }, { "epoch": 0.169443706736291, "grad_norm": 0.0020076248329132795, "learning_rate": 0.001, "loss": 0.3943, "step": 6141 }, { "epoch": 0.16947129893735535, "grad_norm": 0.003063742769882083, "learning_rate": 0.001, "loss": 0.4329, "step": 6142 }, { "epoch": 0.16949889113841973, "grad_norm": 0.00650634104385972, "learning_rate": 0.001, "loss": 0.4143, "step": 6143 }, { "epoch": 0.16952648333948409, "grad_norm": 0.0031983335502445698, "learning_rate": 0.001, "loss": 0.4299, "step": 6144 }, { "epoch": 0.16955407554054847, "grad_norm": 0.0037359180860221386, "learning_rate": 0.001, "loss": 0.3994, "step": 6145 }, { "epoch": 0.16958166774161285, "grad_norm": 0.003823197679594159, "learning_rate": 0.001, "loss": 0.3726, "step": 6146 }, { "epoch": 0.1696092599426772, "grad_norm": 0.0030885476153343916, "learning_rate": 0.001, "loss": 0.3947, "step": 6147 }, { "epoch": 0.16963685214374158, "grad_norm": 0.005116578657180071, "learning_rate": 0.001, "loss": 0.3788, "step": 6148 }, { "epoch": 0.16966444434480593, "grad_norm": 0.003294056048616767, "learning_rate": 0.001, "loss": 0.428, "step": 6149 }, { "epoch": 0.1696920365458703, "grad_norm": 0.002693452872335911, "learning_rate": 0.001, "loss": 0.389, "step": 6150 }, { "epoch": 0.1697196287469347, "grad_norm": 0.0032077941577881575, "learning_rate": 0.001, "loss": 0.3958, "step": 6151 }, { "epoch": 0.16974722094799904, "grad_norm": 0.002029215916991234, "learning_rate": 0.001, "loss": 0.4398, "step": 6152 }, { "epoch": 0.16977481314906342, "grad_norm": 0.002842904767021537, "learning_rate": 0.001, "loss": 0.4213, "step": 6153 }, { "epoch": 0.16980240535012778, "grad_norm": 0.004416587762534618, "learning_rate": 0.001, "loss": 0.3518, "step": 6154 }, { "epoch": 0.16982999755119216, "grad_norm": 0.0031072101555764675, "learning_rate": 0.001, "loss": 0.3979, "step": 6155 }, { "epoch": 0.16985758975225654, "grad_norm": 0.006052012555301189, "learning_rate": 0.001, "loss": 0.4004, "step": 6156 }, { "epoch": 0.1698851819533209, "grad_norm": 0.012233547866344452, "learning_rate": 0.001, "loss": 0.3828, "step": 6157 }, { "epoch": 0.16991277415438527, "grad_norm": 0.004168129060417414, "learning_rate": 0.001, "loss": 0.4157, "step": 6158 }, { "epoch": 0.16994036635544962, "grad_norm": 0.0026695001870393753, "learning_rate": 0.001, "loss": 0.4204, "step": 6159 }, { "epoch": 0.169967958556514, "grad_norm": 0.0034213713370263577, "learning_rate": 0.001, "loss": 0.3999, "step": 6160 }, { "epoch": 0.16999555075757838, "grad_norm": 0.004756170324981213, "learning_rate": 0.001, "loss": 0.3741, "step": 6161 }, { "epoch": 0.17002314295864274, "grad_norm": 0.0028778575360774994, "learning_rate": 0.001, "loss": 0.4134, "step": 6162 }, { "epoch": 0.17005073515970712, "grad_norm": 0.003623120253905654, "learning_rate": 0.001, "loss": 0.4147, "step": 6163 }, { "epoch": 0.17007832736077147, "grad_norm": 0.0027707270346581936, "learning_rate": 0.001, "loss": 0.3961, "step": 6164 }, { "epoch": 0.17010591956183585, "grad_norm": 0.0021031824871897697, "learning_rate": 0.001, "loss": 0.4003, "step": 6165 }, { "epoch": 0.1701335117629002, "grad_norm": 0.0030407633166760206, "learning_rate": 0.001, "loss": 0.4014, "step": 6166 }, { "epoch": 0.17016110396396458, "grad_norm": 0.004170509055256844, "learning_rate": 0.001, "loss": 0.3621, "step": 6167 }, { "epoch": 0.17018869616502896, "grad_norm": 0.0025970737915486097, "learning_rate": 0.001, "loss": 0.4042, "step": 6168 }, { "epoch": 0.17021628836609332, "grad_norm": 0.003734288504347205, "learning_rate": 0.001, "loss": 0.4398, "step": 6169 }, { "epoch": 0.1702438805671577, "grad_norm": 0.00284449546597898, "learning_rate": 0.001, "loss": 0.4262, "step": 6170 }, { "epoch": 0.17027147276822205, "grad_norm": 0.0026834469754248857, "learning_rate": 0.001, "loss": 0.3816, "step": 6171 }, { "epoch": 0.17029906496928643, "grad_norm": 0.002561383182182908, "learning_rate": 0.001, "loss": 0.4185, "step": 6172 }, { "epoch": 0.1703266571703508, "grad_norm": 0.008724918588995934, "learning_rate": 0.001, "loss": 0.3888, "step": 6173 }, { "epoch": 0.17035424937141516, "grad_norm": 0.004458330571651459, "learning_rate": 0.001, "loss": 0.4107, "step": 6174 }, { "epoch": 0.17038184157247954, "grad_norm": 0.004813158418983221, "learning_rate": 0.001, "loss": 0.4391, "step": 6175 }, { "epoch": 0.1704094337735439, "grad_norm": 0.002779042813926935, "learning_rate": 0.001, "loss": 0.3619, "step": 6176 }, { "epoch": 0.17043702597460827, "grad_norm": 0.004016479942947626, "learning_rate": 0.001, "loss": 0.3834, "step": 6177 }, { "epoch": 0.17046461817567266, "grad_norm": 0.0036288578994572163, "learning_rate": 0.001, "loss": 0.4317, "step": 6178 }, { "epoch": 0.170492210376737, "grad_norm": 0.003530343994498253, "learning_rate": 0.001, "loss": 0.4049, "step": 6179 }, { "epoch": 0.1705198025778014, "grad_norm": 0.005636625923216343, "learning_rate": 0.001, "loss": 0.3926, "step": 6180 }, { "epoch": 0.17054739477886574, "grad_norm": 0.004850632511079311, "learning_rate": 0.001, "loss": 0.4102, "step": 6181 }, { "epoch": 0.17057498697993012, "grad_norm": 0.0022970670834183693, "learning_rate": 0.001, "loss": 0.3997, "step": 6182 }, { "epoch": 0.1706025791809945, "grad_norm": 0.0032087513245642185, "learning_rate": 0.001, "loss": 0.3718, "step": 6183 }, { "epoch": 0.17063017138205885, "grad_norm": 0.0027670126873999834, "learning_rate": 0.001, "loss": 0.4282, "step": 6184 }, { "epoch": 0.17065776358312323, "grad_norm": 0.003269513137638569, "learning_rate": 0.001, "loss": 0.4121, "step": 6185 }, { "epoch": 0.1706853557841876, "grad_norm": 0.0030671104323118925, "learning_rate": 0.001, "loss": 0.4107, "step": 6186 }, { "epoch": 0.17071294798525197, "grad_norm": 0.002466262085363269, "learning_rate": 0.001, "loss": 0.4055, "step": 6187 }, { "epoch": 0.17074054018631635, "grad_norm": 0.0033186296932399273, "learning_rate": 0.001, "loss": 0.4113, "step": 6188 }, { "epoch": 0.1707681323873807, "grad_norm": 0.004772027488797903, "learning_rate": 0.001, "loss": 0.3852, "step": 6189 }, { "epoch": 0.17079572458844508, "grad_norm": 0.003858543001115322, "learning_rate": 0.001, "loss": 0.3884, "step": 6190 }, { "epoch": 0.17082331678950943, "grad_norm": 0.003998824395239353, "learning_rate": 0.001, "loss": 0.3572, "step": 6191 }, { "epoch": 0.1708509089905738, "grad_norm": 0.0029393411241471767, "learning_rate": 0.001, "loss": 0.3608, "step": 6192 }, { "epoch": 0.1708785011916382, "grad_norm": 0.002925847191363573, "learning_rate": 0.001, "loss": 0.4047, "step": 6193 }, { "epoch": 0.17090609339270255, "grad_norm": 0.006174467504024506, "learning_rate": 0.001, "loss": 0.3995, "step": 6194 }, { "epoch": 0.17093368559376693, "grad_norm": 0.003490880597382784, "learning_rate": 0.001, "loss": 0.4366, "step": 6195 }, { "epoch": 0.17096127779483128, "grad_norm": 0.00242701661773026, "learning_rate": 0.001, "loss": 0.3756, "step": 6196 }, { "epoch": 0.17098886999589566, "grad_norm": 0.0028819721192121506, "learning_rate": 0.001, "loss": 0.4108, "step": 6197 }, { "epoch": 0.17101646219696004, "grad_norm": 0.0025861901231110096, "learning_rate": 0.001, "loss": 0.4214, "step": 6198 }, { "epoch": 0.1710440543980244, "grad_norm": 0.0020953891798853874, "learning_rate": 0.001, "loss": 0.4142, "step": 6199 }, { "epoch": 0.17107164659908877, "grad_norm": 0.0024096069391816854, "learning_rate": 0.001, "loss": 0.3922, "step": 6200 }, { "epoch": 0.17109923880015313, "grad_norm": 0.0030896812677383423, "learning_rate": 0.001, "loss": 0.4042, "step": 6201 }, { "epoch": 0.1711268310012175, "grad_norm": 0.0025605044793337584, "learning_rate": 0.001, "loss": 0.3839, "step": 6202 }, { "epoch": 0.17115442320228189, "grad_norm": 0.0032270110677927732, "learning_rate": 0.001, "loss": 0.4318, "step": 6203 }, { "epoch": 0.17118201540334624, "grad_norm": 0.002061746781691909, "learning_rate": 0.001, "loss": 0.3874, "step": 6204 }, { "epoch": 0.17120960760441062, "grad_norm": 0.002208675490692258, "learning_rate": 0.001, "loss": 0.4531, "step": 6205 }, { "epoch": 0.17123719980547497, "grad_norm": 0.0019300546264275908, "learning_rate": 0.001, "loss": 0.3975, "step": 6206 }, { "epoch": 0.17126479200653935, "grad_norm": 0.002864664187654853, "learning_rate": 0.001, "loss": 0.4051, "step": 6207 }, { "epoch": 0.17129238420760373, "grad_norm": 0.003815412288531661, "learning_rate": 0.001, "loss": 0.4082, "step": 6208 }, { "epoch": 0.17131997640866808, "grad_norm": 0.0021196226589381695, "learning_rate": 0.001, "loss": 0.401, "step": 6209 }, { "epoch": 0.17134756860973246, "grad_norm": 0.0040994128212332726, "learning_rate": 0.001, "loss": 0.4043, "step": 6210 }, { "epoch": 0.17137516081079682, "grad_norm": 0.0021242250222712755, "learning_rate": 0.001, "loss": 0.4115, "step": 6211 }, { "epoch": 0.1714027530118612, "grad_norm": 0.0023374794982373714, "learning_rate": 0.001, "loss": 0.4438, "step": 6212 }, { "epoch": 0.17143034521292558, "grad_norm": 0.004651496186852455, "learning_rate": 0.001, "loss": 0.3917, "step": 6213 }, { "epoch": 0.17145793741398993, "grad_norm": 0.0039611500687897205, "learning_rate": 0.001, "loss": 0.3808, "step": 6214 }, { "epoch": 0.1714855296150543, "grad_norm": 0.008256969973444939, "learning_rate": 0.001, "loss": 0.3935, "step": 6215 }, { "epoch": 0.17151312181611866, "grad_norm": 0.010161311365664005, "learning_rate": 0.001, "loss": 0.4106, "step": 6216 }, { "epoch": 0.17154071401718304, "grad_norm": 0.010829792357981205, "learning_rate": 0.001, "loss": 0.4025, "step": 6217 }, { "epoch": 0.17156830621824742, "grad_norm": 0.0036437201779335737, "learning_rate": 0.001, "loss": 0.3786, "step": 6218 }, { "epoch": 0.17159589841931178, "grad_norm": 0.0040598101913928986, "learning_rate": 0.001, "loss": 0.3926, "step": 6219 }, { "epoch": 0.17162349062037616, "grad_norm": 0.0032655976247042418, "learning_rate": 0.001, "loss": 0.4099, "step": 6220 }, { "epoch": 0.1716510828214405, "grad_norm": 0.0032314627896994352, "learning_rate": 0.001, "loss": 0.4076, "step": 6221 }, { "epoch": 0.1716786750225049, "grad_norm": 0.0044107455760240555, "learning_rate": 0.001, "loss": 0.3608, "step": 6222 }, { "epoch": 0.17170626722356927, "grad_norm": 0.0073016672395169735, "learning_rate": 0.001, "loss": 0.4034, "step": 6223 }, { "epoch": 0.17173385942463362, "grad_norm": 0.0031636636704206467, "learning_rate": 0.001, "loss": 0.3902, "step": 6224 }, { "epoch": 0.171761451625698, "grad_norm": 0.0030360522214323282, "learning_rate": 0.001, "loss": 0.3553, "step": 6225 }, { "epoch": 0.17178904382676236, "grad_norm": 0.0032519621308892965, "learning_rate": 0.001, "loss": 0.4174, "step": 6226 }, { "epoch": 0.17181663602782674, "grad_norm": 0.002763562835752964, "learning_rate": 0.001, "loss": 0.3958, "step": 6227 }, { "epoch": 0.17184422822889112, "grad_norm": 0.002963263774290681, "learning_rate": 0.001, "loss": 0.4079, "step": 6228 }, { "epoch": 0.17187182042995547, "grad_norm": 0.0156992357224226, "learning_rate": 0.001, "loss": 0.4353, "step": 6229 }, { "epoch": 0.17189941263101985, "grad_norm": 0.009238180704414845, "learning_rate": 0.001, "loss": 0.3851, "step": 6230 }, { "epoch": 0.1719270048320842, "grad_norm": 0.003196639008820057, "learning_rate": 0.001, "loss": 0.416, "step": 6231 }, { "epoch": 0.17195459703314858, "grad_norm": 0.004371176473796368, "learning_rate": 0.001, "loss": 0.4253, "step": 6232 }, { "epoch": 0.17198218923421296, "grad_norm": 0.004302786663174629, "learning_rate": 0.001, "loss": 0.3993, "step": 6233 }, { "epoch": 0.17200978143527731, "grad_norm": 0.0027876824606209993, "learning_rate": 0.001, "loss": 0.4164, "step": 6234 }, { "epoch": 0.1720373736363417, "grad_norm": 0.004181624855846167, "learning_rate": 0.001, "loss": 0.4076, "step": 6235 }, { "epoch": 0.17206496583740605, "grad_norm": 0.0028202803805470467, "learning_rate": 0.001, "loss": 0.3772, "step": 6236 }, { "epoch": 0.17209255803847043, "grad_norm": 0.002387853804975748, "learning_rate": 0.001, "loss": 0.4437, "step": 6237 }, { "epoch": 0.1721201502395348, "grad_norm": 0.0061591207049787045, "learning_rate": 0.001, "loss": 0.3678, "step": 6238 }, { "epoch": 0.17214774244059916, "grad_norm": 0.002573385601863265, "learning_rate": 0.001, "loss": 0.3835, "step": 6239 }, { "epoch": 0.17217533464166354, "grad_norm": 0.0026830765418708324, "learning_rate": 0.001, "loss": 0.3773, "step": 6240 }, { "epoch": 0.1722029268427279, "grad_norm": 0.005540953949093819, "learning_rate": 0.001, "loss": 0.4075, "step": 6241 }, { "epoch": 0.17223051904379227, "grad_norm": 0.0054961638525128365, "learning_rate": 0.001, "loss": 0.4079, "step": 6242 }, { "epoch": 0.17225811124485665, "grad_norm": 0.007950017228722572, "learning_rate": 0.001, "loss": 0.3939, "step": 6243 }, { "epoch": 0.172285703445921, "grad_norm": 0.005001327488571405, "learning_rate": 0.001, "loss": 0.4595, "step": 6244 }, { "epoch": 0.1723132956469854, "grad_norm": 0.011175988242030144, "learning_rate": 0.001, "loss": 0.3986, "step": 6245 }, { "epoch": 0.17234088784804974, "grad_norm": 0.004182237666100264, "learning_rate": 0.001, "loss": 0.4204, "step": 6246 }, { "epoch": 0.17236848004911412, "grad_norm": 0.004050822462886572, "learning_rate": 0.001, "loss": 0.4092, "step": 6247 }, { "epoch": 0.1723960722501785, "grad_norm": 0.0025094212032854557, "learning_rate": 0.001, "loss": 0.4113, "step": 6248 }, { "epoch": 0.17242366445124285, "grad_norm": 0.0029853296000510454, "learning_rate": 0.001, "loss": 0.4068, "step": 6249 }, { "epoch": 0.17245125665230723, "grad_norm": 0.0035753315314650536, "learning_rate": 0.001, "loss": 0.3865, "step": 6250 }, { "epoch": 0.17247884885337159, "grad_norm": 0.0028684705030173063, "learning_rate": 0.001, "loss": 0.3864, "step": 6251 }, { "epoch": 0.17250644105443597, "grad_norm": 0.0019036736339330673, "learning_rate": 0.001, "loss": 0.3883, "step": 6252 }, { "epoch": 0.17253403325550035, "grad_norm": 0.0022934586741030216, "learning_rate": 0.001, "loss": 0.4147, "step": 6253 }, { "epoch": 0.1725616254565647, "grad_norm": 0.0033066177275031805, "learning_rate": 0.001, "loss": 0.4162, "step": 6254 }, { "epoch": 0.17258921765762908, "grad_norm": 0.004688805900514126, "learning_rate": 0.001, "loss": 0.3955, "step": 6255 }, { "epoch": 0.17261680985869343, "grad_norm": 0.0033551587257534266, "learning_rate": 0.001, "loss": 0.4143, "step": 6256 }, { "epoch": 0.1726444020597578, "grad_norm": 0.002868784125894308, "learning_rate": 0.001, "loss": 0.3965, "step": 6257 }, { "epoch": 0.17267199426082216, "grad_norm": 0.0017792254220694304, "learning_rate": 0.001, "loss": 0.3981, "step": 6258 }, { "epoch": 0.17269958646188655, "grad_norm": 0.0052290805615484715, "learning_rate": 0.001, "loss": 0.374, "step": 6259 }, { "epoch": 0.17272717866295093, "grad_norm": 0.003975987434387207, "learning_rate": 0.001, "loss": 0.3784, "step": 6260 }, { "epoch": 0.17275477086401528, "grad_norm": 0.005119245033711195, "learning_rate": 0.001, "loss": 0.383, "step": 6261 }, { "epoch": 0.17278236306507966, "grad_norm": 0.009563916362822056, "learning_rate": 0.001, "loss": 0.4202, "step": 6262 }, { "epoch": 0.172809955266144, "grad_norm": 0.006325340364128351, "learning_rate": 0.001, "loss": 0.4045, "step": 6263 }, { "epoch": 0.1728375474672084, "grad_norm": 0.00598013773560524, "learning_rate": 0.001, "loss": 0.414, "step": 6264 }, { "epoch": 0.17286513966827277, "grad_norm": 0.0035727466456592083, "learning_rate": 0.001, "loss": 0.3876, "step": 6265 }, { "epoch": 0.17289273186933712, "grad_norm": 0.009252941235899925, "learning_rate": 0.001, "loss": 0.4447, "step": 6266 }, { "epoch": 0.1729203240704015, "grad_norm": 0.0041562700644135475, "learning_rate": 0.001, "loss": 0.4291, "step": 6267 }, { "epoch": 0.17294791627146586, "grad_norm": 0.0030006521847099066, "learning_rate": 0.001, "loss": 0.3489, "step": 6268 }, { "epoch": 0.17297550847253024, "grad_norm": 0.002229264471679926, "learning_rate": 0.001, "loss": 0.4225, "step": 6269 }, { "epoch": 0.17300310067359462, "grad_norm": 0.004136356525123119, "learning_rate": 0.001, "loss": 0.3893, "step": 6270 }, { "epoch": 0.17303069287465897, "grad_norm": 0.005907285492867231, "learning_rate": 0.001, "loss": 0.3709, "step": 6271 }, { "epoch": 0.17305828507572335, "grad_norm": 0.003040108596906066, "learning_rate": 0.001, "loss": 0.391, "step": 6272 }, { "epoch": 0.1730858772767877, "grad_norm": 0.0026024484541267157, "learning_rate": 0.001, "loss": 0.4067, "step": 6273 }, { "epoch": 0.17311346947785208, "grad_norm": 0.002952686743810773, "learning_rate": 0.001, "loss": 0.3804, "step": 6274 }, { "epoch": 0.17314106167891646, "grad_norm": 0.004544638562947512, "learning_rate": 0.001, "loss": 0.3958, "step": 6275 }, { "epoch": 0.17316865387998082, "grad_norm": 0.004679075442254543, "learning_rate": 0.001, "loss": 0.3623, "step": 6276 }, { "epoch": 0.1731962460810452, "grad_norm": 0.002047475427389145, "learning_rate": 0.001, "loss": 0.4014, "step": 6277 }, { "epoch": 0.17322383828210955, "grad_norm": 0.005213032476603985, "learning_rate": 0.001, "loss": 0.3915, "step": 6278 }, { "epoch": 0.17325143048317393, "grad_norm": 0.00405911635607481, "learning_rate": 0.001, "loss": 0.4133, "step": 6279 }, { "epoch": 0.1732790226842383, "grad_norm": 0.002308881375938654, "learning_rate": 0.001, "loss": 0.3932, "step": 6280 }, { "epoch": 0.17330661488530266, "grad_norm": 0.007890172302722931, "learning_rate": 0.001, "loss": 0.4045, "step": 6281 }, { "epoch": 0.17333420708636704, "grad_norm": 0.0034512749407440424, "learning_rate": 0.001, "loss": 0.42, "step": 6282 }, { "epoch": 0.1733617992874314, "grad_norm": 0.007768754381686449, "learning_rate": 0.001, "loss": 0.3901, "step": 6283 }, { "epoch": 0.17338939148849578, "grad_norm": 0.004081700462847948, "learning_rate": 0.001, "loss": 0.3718, "step": 6284 }, { "epoch": 0.17341698368956016, "grad_norm": 0.003280284348875284, "learning_rate": 0.001, "loss": 0.417, "step": 6285 }, { "epoch": 0.1734445758906245, "grad_norm": 0.0031469378154724836, "learning_rate": 0.001, "loss": 0.3955, "step": 6286 }, { "epoch": 0.1734721680916889, "grad_norm": 0.002462326781824231, "learning_rate": 0.001, "loss": 0.4204, "step": 6287 }, { "epoch": 0.17349976029275324, "grad_norm": 0.0022118764463812113, "learning_rate": 0.001, "loss": 0.3788, "step": 6288 }, { "epoch": 0.17352735249381762, "grad_norm": 0.0026676817797124386, "learning_rate": 0.001, "loss": 0.3755, "step": 6289 }, { "epoch": 0.173554944694882, "grad_norm": 0.002649841830134392, "learning_rate": 0.001, "loss": 0.4007, "step": 6290 }, { "epoch": 0.17358253689594635, "grad_norm": 0.002229656558483839, "learning_rate": 0.001, "loss": 0.4231, "step": 6291 }, { "epoch": 0.17361012909701073, "grad_norm": 0.0036838806699961424, "learning_rate": 0.001, "loss": 0.4267, "step": 6292 }, { "epoch": 0.1736377212980751, "grad_norm": 0.015469814650714397, "learning_rate": 0.001, "loss": 0.3901, "step": 6293 }, { "epoch": 0.17366531349913947, "grad_norm": 0.002747813006862998, "learning_rate": 0.001, "loss": 0.4272, "step": 6294 }, { "epoch": 0.17369290570020385, "grad_norm": 0.0029808697290718555, "learning_rate": 0.001, "loss": 0.3918, "step": 6295 }, { "epoch": 0.1737204979012682, "grad_norm": 0.0032657973933964968, "learning_rate": 0.001, "loss": 0.4121, "step": 6296 }, { "epoch": 0.17374809010233258, "grad_norm": 0.005452743265777826, "learning_rate": 0.001, "loss": 0.4293, "step": 6297 }, { "epoch": 0.17377568230339693, "grad_norm": 0.00484059751033783, "learning_rate": 0.001, "loss": 0.4184, "step": 6298 }, { "epoch": 0.1738032745044613, "grad_norm": 0.003019709372892976, "learning_rate": 0.001, "loss": 0.3924, "step": 6299 }, { "epoch": 0.1738308667055257, "grad_norm": 0.003449161071330309, "learning_rate": 0.001, "loss": 0.3817, "step": 6300 }, { "epoch": 0.17385845890659005, "grad_norm": 0.004382924642413855, "learning_rate": 0.001, "loss": 0.3602, "step": 6301 }, { "epoch": 0.17388605110765443, "grad_norm": 0.00298452191054821, "learning_rate": 0.001, "loss": 0.3835, "step": 6302 }, { "epoch": 0.17391364330871878, "grad_norm": 0.002635273849591613, "learning_rate": 0.001, "loss": 0.4109, "step": 6303 }, { "epoch": 0.17394123550978316, "grad_norm": 0.0019398960284888744, "learning_rate": 0.001, "loss": 0.4097, "step": 6304 }, { "epoch": 0.17396882771084754, "grad_norm": 0.002571891061961651, "learning_rate": 0.001, "loss": 0.3784, "step": 6305 }, { "epoch": 0.1739964199119119, "grad_norm": 0.004911772906780243, "learning_rate": 0.001, "loss": 0.4232, "step": 6306 }, { "epoch": 0.17402401211297627, "grad_norm": 0.004217732232064009, "learning_rate": 0.001, "loss": 0.4172, "step": 6307 }, { "epoch": 0.17405160431404063, "grad_norm": 0.002490875544026494, "learning_rate": 0.001, "loss": 0.4339, "step": 6308 }, { "epoch": 0.174079196515105, "grad_norm": 0.002398621989414096, "learning_rate": 0.001, "loss": 0.4136, "step": 6309 }, { "epoch": 0.17410678871616939, "grad_norm": 0.0025024251081049442, "learning_rate": 0.001, "loss": 0.4176, "step": 6310 }, { "epoch": 0.17413438091723374, "grad_norm": 0.003449508221819997, "learning_rate": 0.001, "loss": 0.3632, "step": 6311 }, { "epoch": 0.17416197311829812, "grad_norm": 0.003372189588844776, "learning_rate": 0.001, "loss": 0.4212, "step": 6312 }, { "epoch": 0.17418956531936247, "grad_norm": 0.004607321694493294, "learning_rate": 0.001, "loss": 0.3907, "step": 6313 }, { "epoch": 0.17421715752042685, "grad_norm": 0.00431500980630517, "learning_rate": 0.001, "loss": 0.419, "step": 6314 }, { "epoch": 0.17424474972149123, "grad_norm": 0.0045218681916594505, "learning_rate": 0.001, "loss": 0.339, "step": 6315 }, { "epoch": 0.17427234192255558, "grad_norm": 0.005656406749039888, "learning_rate": 0.001, "loss": 0.3859, "step": 6316 }, { "epoch": 0.17429993412361997, "grad_norm": 0.0031431603711098433, "learning_rate": 0.001, "loss": 0.3871, "step": 6317 }, { "epoch": 0.17432752632468432, "grad_norm": 0.0022704950533807278, "learning_rate": 0.001, "loss": 0.398, "step": 6318 }, { "epoch": 0.1743551185257487, "grad_norm": 0.003445026697590947, "learning_rate": 0.001, "loss": 0.3907, "step": 6319 }, { "epoch": 0.17438271072681308, "grad_norm": 0.002719600684940815, "learning_rate": 0.001, "loss": 0.3847, "step": 6320 }, { "epoch": 0.17441030292787743, "grad_norm": 0.0020257963333278894, "learning_rate": 0.001, "loss": 0.3971, "step": 6321 }, { "epoch": 0.1744378951289418, "grad_norm": 0.0022240818943828344, "learning_rate": 0.001, "loss": 0.4141, "step": 6322 }, { "epoch": 0.17446548733000616, "grad_norm": 0.002934374613687396, "learning_rate": 0.001, "loss": 0.4139, "step": 6323 }, { "epoch": 0.17449307953107054, "grad_norm": 0.006465549115091562, "learning_rate": 0.001, "loss": 0.3782, "step": 6324 }, { "epoch": 0.17452067173213492, "grad_norm": 0.005728098098188639, "learning_rate": 0.001, "loss": 0.3999, "step": 6325 }, { "epoch": 0.17454826393319928, "grad_norm": 0.002171339699998498, "learning_rate": 0.001, "loss": 0.4177, "step": 6326 }, { "epoch": 0.17457585613426366, "grad_norm": 0.0026950135361403227, "learning_rate": 0.001, "loss": 0.3974, "step": 6327 }, { "epoch": 0.174603448335328, "grad_norm": 0.006035445258021355, "learning_rate": 0.001, "loss": 0.4045, "step": 6328 }, { "epoch": 0.1746310405363924, "grad_norm": 0.0035918874200433493, "learning_rate": 0.001, "loss": 0.3913, "step": 6329 }, { "epoch": 0.17465863273745677, "grad_norm": 0.002868028124794364, "learning_rate": 0.001, "loss": 0.4279, "step": 6330 }, { "epoch": 0.17468622493852112, "grad_norm": 0.00462915375828743, "learning_rate": 0.001, "loss": 0.405, "step": 6331 }, { "epoch": 0.1747138171395855, "grad_norm": 0.0024865546729415655, "learning_rate": 0.001, "loss": 0.3877, "step": 6332 }, { "epoch": 0.17474140934064986, "grad_norm": 0.002470286563038826, "learning_rate": 0.001, "loss": 0.4129, "step": 6333 }, { "epoch": 0.17476900154171424, "grad_norm": 0.003509331261739135, "learning_rate": 0.001, "loss": 0.3881, "step": 6334 }, { "epoch": 0.17479659374277862, "grad_norm": 0.0032379806507378817, "learning_rate": 0.001, "loss": 0.4061, "step": 6335 }, { "epoch": 0.17482418594384297, "grad_norm": 0.003039590548723936, "learning_rate": 0.001, "loss": 0.3989, "step": 6336 }, { "epoch": 0.17485177814490735, "grad_norm": 0.0033747265115380287, "learning_rate": 0.001, "loss": 0.4049, "step": 6337 }, { "epoch": 0.1748793703459717, "grad_norm": 0.005611809901893139, "learning_rate": 0.001, "loss": 0.4113, "step": 6338 }, { "epoch": 0.17490696254703608, "grad_norm": 0.002599924337118864, "learning_rate": 0.001, "loss": 0.4066, "step": 6339 }, { "epoch": 0.17493455474810046, "grad_norm": 0.0026452033780515194, "learning_rate": 0.001, "loss": 0.3799, "step": 6340 }, { "epoch": 0.17496214694916482, "grad_norm": 0.003480355953797698, "learning_rate": 0.001, "loss": 0.3805, "step": 6341 }, { "epoch": 0.1749897391502292, "grad_norm": 0.0024892655201256275, "learning_rate": 0.001, "loss": 0.4077, "step": 6342 }, { "epoch": 0.17501733135129355, "grad_norm": 0.0024968746583908796, "learning_rate": 0.001, "loss": 0.4035, "step": 6343 }, { "epoch": 0.17504492355235793, "grad_norm": 0.00571107491850853, "learning_rate": 0.001, "loss": 0.398, "step": 6344 }, { "epoch": 0.1750725157534223, "grad_norm": 0.0021744274999946356, "learning_rate": 0.001, "loss": 0.4078, "step": 6345 }, { "epoch": 0.17510010795448666, "grad_norm": 0.004079717211425304, "learning_rate": 0.001, "loss": 0.4082, "step": 6346 }, { "epoch": 0.17512770015555104, "grad_norm": 0.0027901085559278727, "learning_rate": 0.001, "loss": 0.3885, "step": 6347 }, { "epoch": 0.1751552923566154, "grad_norm": 0.0030881750863045454, "learning_rate": 0.001, "loss": 0.4215, "step": 6348 }, { "epoch": 0.17518288455767977, "grad_norm": 0.002648904686793685, "learning_rate": 0.001, "loss": 0.4207, "step": 6349 }, { "epoch": 0.17521047675874415, "grad_norm": 0.004519915673881769, "learning_rate": 0.001, "loss": 0.413, "step": 6350 }, { "epoch": 0.1752380689598085, "grad_norm": 0.004600946791470051, "learning_rate": 0.001, "loss": 0.3957, "step": 6351 }, { "epoch": 0.1752656611608729, "grad_norm": 0.003022734308615327, "learning_rate": 0.001, "loss": 0.4198, "step": 6352 }, { "epoch": 0.17529325336193724, "grad_norm": 0.004520618822425604, "learning_rate": 0.001, "loss": 0.3863, "step": 6353 }, { "epoch": 0.17532084556300162, "grad_norm": 0.0052781859412789345, "learning_rate": 0.001, "loss": 0.3592, "step": 6354 }, { "epoch": 0.17534843776406597, "grad_norm": 0.0028431678656488657, "learning_rate": 0.001, "loss": 0.4286, "step": 6355 }, { "epoch": 0.17537602996513035, "grad_norm": 0.004005215130746365, "learning_rate": 0.001, "loss": 0.4177, "step": 6356 }, { "epoch": 0.17540362216619473, "grad_norm": 0.0038050967268645763, "learning_rate": 0.001, "loss": 0.3892, "step": 6357 }, { "epoch": 0.1754312143672591, "grad_norm": 0.01202372182160616, "learning_rate": 0.001, "loss": 0.4263, "step": 6358 }, { "epoch": 0.17545880656832347, "grad_norm": 0.0020817206241190434, "learning_rate": 0.001, "loss": 0.4242, "step": 6359 }, { "epoch": 0.17548639876938782, "grad_norm": 0.0022702061105519533, "learning_rate": 0.001, "loss": 0.4136, "step": 6360 }, { "epoch": 0.1755139909704522, "grad_norm": 0.002957484917715192, "learning_rate": 0.001, "loss": 0.3985, "step": 6361 }, { "epoch": 0.17554158317151658, "grad_norm": 0.002842365065589547, "learning_rate": 0.001, "loss": 0.4062, "step": 6362 }, { "epoch": 0.17556917537258093, "grad_norm": 0.002651175484061241, "learning_rate": 0.001, "loss": 0.448, "step": 6363 }, { "epoch": 0.1755967675736453, "grad_norm": 0.0024952066596597433, "learning_rate": 0.001, "loss": 0.4306, "step": 6364 }, { "epoch": 0.17562435977470967, "grad_norm": 0.0025687795132398605, "learning_rate": 0.001, "loss": 0.3934, "step": 6365 }, { "epoch": 0.17565195197577405, "grad_norm": 0.002511841943487525, "learning_rate": 0.001, "loss": 0.4229, "step": 6366 }, { "epoch": 0.17567954417683843, "grad_norm": 0.00360535248182714, "learning_rate": 0.001, "loss": 0.4178, "step": 6367 }, { "epoch": 0.17570713637790278, "grad_norm": 0.002814951818436384, "learning_rate": 0.001, "loss": 0.3839, "step": 6368 }, { "epoch": 0.17573472857896716, "grad_norm": 0.003035743487998843, "learning_rate": 0.001, "loss": 0.426, "step": 6369 }, { "epoch": 0.1757623207800315, "grad_norm": 0.0033583652693778276, "learning_rate": 0.001, "loss": 0.3715, "step": 6370 }, { "epoch": 0.1757899129810959, "grad_norm": 0.0023555420339107513, "learning_rate": 0.001, "loss": 0.3874, "step": 6371 }, { "epoch": 0.17581750518216027, "grad_norm": 0.0036680963821709156, "learning_rate": 0.001, "loss": 0.4161, "step": 6372 }, { "epoch": 0.17584509738322462, "grad_norm": 0.00232534552924335, "learning_rate": 0.001, "loss": 0.422, "step": 6373 }, { "epoch": 0.175872689584289, "grad_norm": 0.0050485520623624325, "learning_rate": 0.001, "loss": 0.3911, "step": 6374 }, { "epoch": 0.17590028178535336, "grad_norm": 0.004004286136478186, "learning_rate": 0.001, "loss": 0.3798, "step": 6375 }, { "epoch": 0.17592787398641774, "grad_norm": 0.003267053049057722, "learning_rate": 0.001, "loss": 0.4085, "step": 6376 }, { "epoch": 0.17595546618748212, "grad_norm": 0.002155203837901354, "learning_rate": 0.001, "loss": 0.4274, "step": 6377 }, { "epoch": 0.17598305838854647, "grad_norm": 0.030459165573120117, "learning_rate": 0.001, "loss": 0.379, "step": 6378 }, { "epoch": 0.17601065058961085, "grad_norm": 0.007769315037876368, "learning_rate": 0.001, "loss": 0.3776, "step": 6379 }, { "epoch": 0.1760382427906752, "grad_norm": 0.00399011978879571, "learning_rate": 0.001, "loss": 0.4016, "step": 6380 }, { "epoch": 0.17606583499173958, "grad_norm": 0.003226799890398979, "learning_rate": 0.001, "loss": 0.4185, "step": 6381 }, { "epoch": 0.17609342719280396, "grad_norm": 0.005244703497737646, "learning_rate": 0.001, "loss": 0.3881, "step": 6382 }, { "epoch": 0.17612101939386832, "grad_norm": 0.00664431881159544, "learning_rate": 0.001, "loss": 0.3848, "step": 6383 }, { "epoch": 0.1761486115949327, "grad_norm": 0.003033186076208949, "learning_rate": 0.001, "loss": 0.3989, "step": 6384 }, { "epoch": 0.17617620379599705, "grad_norm": 0.0022742394357919693, "learning_rate": 0.001, "loss": 0.4217, "step": 6385 }, { "epoch": 0.17620379599706143, "grad_norm": 0.0029898136854171753, "learning_rate": 0.001, "loss": 0.4422, "step": 6386 }, { "epoch": 0.1762313881981258, "grad_norm": 0.0022048363462090492, "learning_rate": 0.001, "loss": 0.4039, "step": 6387 }, { "epoch": 0.17625898039919016, "grad_norm": 0.0021618627943098545, "learning_rate": 0.001, "loss": 0.4049, "step": 6388 }, { "epoch": 0.17628657260025454, "grad_norm": 0.0029654805548489094, "learning_rate": 0.001, "loss": 0.3764, "step": 6389 }, { "epoch": 0.1763141648013189, "grad_norm": 0.0045374371111392975, "learning_rate": 0.001, "loss": 0.4074, "step": 6390 }, { "epoch": 0.17634175700238328, "grad_norm": 0.00429152837023139, "learning_rate": 0.001, "loss": 0.3708, "step": 6391 }, { "epoch": 0.17636934920344766, "grad_norm": 0.004207782447338104, "learning_rate": 0.001, "loss": 0.4119, "step": 6392 }, { "epoch": 0.176396941404512, "grad_norm": 0.0025363874156028032, "learning_rate": 0.001, "loss": 0.4166, "step": 6393 }, { "epoch": 0.1764245336055764, "grad_norm": 0.004385404288768768, "learning_rate": 0.001, "loss": 0.3839, "step": 6394 }, { "epoch": 0.17645212580664074, "grad_norm": 0.0033825349528342485, "learning_rate": 0.001, "loss": 0.407, "step": 6395 }, { "epoch": 0.17647971800770512, "grad_norm": 0.002470629056915641, "learning_rate": 0.001, "loss": 0.3783, "step": 6396 }, { "epoch": 0.1765073102087695, "grad_norm": 0.004038370680063963, "learning_rate": 0.001, "loss": 0.4126, "step": 6397 }, { "epoch": 0.17653490240983385, "grad_norm": 0.002262383932247758, "learning_rate": 0.001, "loss": 0.44, "step": 6398 }, { "epoch": 0.17656249461089824, "grad_norm": 0.0023851392325013876, "learning_rate": 0.001, "loss": 0.3765, "step": 6399 }, { "epoch": 0.1765900868119626, "grad_norm": 0.0039879628457129, "learning_rate": 0.001, "loss": 0.3875, "step": 6400 }, { "epoch": 0.17661767901302697, "grad_norm": 0.0036031249910593033, "learning_rate": 0.001, "loss": 0.4164, "step": 6401 }, { "epoch": 0.17664527121409135, "grad_norm": 0.0038508297875523567, "learning_rate": 0.001, "loss": 0.3642, "step": 6402 }, { "epoch": 0.1766728634151557, "grad_norm": 0.0030231124255806208, "learning_rate": 0.001, "loss": 0.439, "step": 6403 }, { "epoch": 0.17670045561622008, "grad_norm": 0.003226878121495247, "learning_rate": 0.001, "loss": 0.3718, "step": 6404 }, { "epoch": 0.17672804781728443, "grad_norm": 0.003266223007813096, "learning_rate": 0.001, "loss": 0.3595, "step": 6405 }, { "epoch": 0.17675564001834881, "grad_norm": 0.003785705426707864, "learning_rate": 0.001, "loss": 0.4091, "step": 6406 }, { "epoch": 0.1767832322194132, "grad_norm": 0.0025163183454424143, "learning_rate": 0.001, "loss": 0.4141, "step": 6407 }, { "epoch": 0.17681082442047755, "grad_norm": 0.002644954714924097, "learning_rate": 0.001, "loss": 0.4054, "step": 6408 }, { "epoch": 0.17683841662154193, "grad_norm": 0.003993362188339233, "learning_rate": 0.001, "loss": 0.3773, "step": 6409 }, { "epoch": 0.17686600882260628, "grad_norm": 0.0027130364906042814, "learning_rate": 0.001, "loss": 0.3985, "step": 6410 }, { "epoch": 0.17689360102367066, "grad_norm": 0.005328681785613298, "learning_rate": 0.001, "loss": 0.3954, "step": 6411 }, { "epoch": 0.17692119322473504, "grad_norm": 0.0027323930989950895, "learning_rate": 0.001, "loss": 0.4151, "step": 6412 }, { "epoch": 0.1769487854257994, "grad_norm": 0.002667306223884225, "learning_rate": 0.001, "loss": 0.3945, "step": 6413 }, { "epoch": 0.17697637762686377, "grad_norm": 0.0038074366748332977, "learning_rate": 0.001, "loss": 0.4259, "step": 6414 }, { "epoch": 0.17700396982792813, "grad_norm": 0.002215777989476919, "learning_rate": 0.001, "loss": 0.4414, "step": 6415 }, { "epoch": 0.1770315620289925, "grad_norm": 0.002434387104585767, "learning_rate": 0.001, "loss": 0.3959, "step": 6416 }, { "epoch": 0.1770591542300569, "grad_norm": 0.0024976793210953474, "learning_rate": 0.001, "loss": 0.4113, "step": 6417 }, { "epoch": 0.17708674643112124, "grad_norm": 0.0029137188103049994, "learning_rate": 0.001, "loss": 0.4001, "step": 6418 }, { "epoch": 0.17711433863218562, "grad_norm": 0.002975389827042818, "learning_rate": 0.001, "loss": 0.3774, "step": 6419 }, { "epoch": 0.17714193083324997, "grad_norm": 0.002931037684902549, "learning_rate": 0.001, "loss": 0.4205, "step": 6420 }, { "epoch": 0.17716952303431435, "grad_norm": 0.004573407117277384, "learning_rate": 0.001, "loss": 0.4018, "step": 6421 }, { "epoch": 0.17719711523537873, "grad_norm": 0.003066561883315444, "learning_rate": 0.001, "loss": 0.4224, "step": 6422 }, { "epoch": 0.17722470743644309, "grad_norm": 0.003729255637153983, "learning_rate": 0.001, "loss": 0.4228, "step": 6423 }, { "epoch": 0.17725229963750747, "grad_norm": 0.0029258355498313904, "learning_rate": 0.001, "loss": 0.3862, "step": 6424 }, { "epoch": 0.17727989183857182, "grad_norm": 0.0034123340155929327, "learning_rate": 0.001, "loss": 0.405, "step": 6425 }, { "epoch": 0.1773074840396362, "grad_norm": 0.0038247809279710054, "learning_rate": 0.001, "loss": 0.4374, "step": 6426 }, { "epoch": 0.17733507624070058, "grad_norm": 0.003323949873447418, "learning_rate": 0.001, "loss": 0.3942, "step": 6427 }, { "epoch": 0.17736266844176493, "grad_norm": 0.01532609574496746, "learning_rate": 0.001, "loss": 0.4212, "step": 6428 }, { "epoch": 0.1773902606428293, "grad_norm": 0.0025842315517365932, "learning_rate": 0.001, "loss": 0.4241, "step": 6429 }, { "epoch": 0.17741785284389366, "grad_norm": 0.003932729829102755, "learning_rate": 0.001, "loss": 0.4104, "step": 6430 }, { "epoch": 0.17744544504495804, "grad_norm": 0.00350461108610034, "learning_rate": 0.001, "loss": 0.3995, "step": 6431 }, { "epoch": 0.17747303724602242, "grad_norm": 0.0030087281484156847, "learning_rate": 0.001, "loss": 0.4106, "step": 6432 }, { "epoch": 0.17750062944708678, "grad_norm": 0.006132917013019323, "learning_rate": 0.001, "loss": 0.354, "step": 6433 }, { "epoch": 0.17752822164815116, "grad_norm": 0.0028762738220393658, "learning_rate": 0.001, "loss": 0.4173, "step": 6434 }, { "epoch": 0.1775558138492155, "grad_norm": 0.0023419486824423075, "learning_rate": 0.001, "loss": 0.376, "step": 6435 }, { "epoch": 0.1775834060502799, "grad_norm": 0.0026978617534041405, "learning_rate": 0.001, "loss": 0.3891, "step": 6436 }, { "epoch": 0.17761099825134427, "grad_norm": 0.003220965852960944, "learning_rate": 0.001, "loss": 0.4197, "step": 6437 }, { "epoch": 0.17763859045240862, "grad_norm": 0.013487125746905804, "learning_rate": 0.001, "loss": 0.4159, "step": 6438 }, { "epoch": 0.177666182653473, "grad_norm": 0.0024956711567938328, "learning_rate": 0.001, "loss": 0.3742, "step": 6439 }, { "epoch": 0.17769377485453736, "grad_norm": 0.002173262881115079, "learning_rate": 0.001, "loss": 0.4185, "step": 6440 }, { "epoch": 0.17772136705560174, "grad_norm": 0.0020064099226146936, "learning_rate": 0.001, "loss": 0.4062, "step": 6441 }, { "epoch": 0.17774895925666612, "grad_norm": 0.002172301523387432, "learning_rate": 0.001, "loss": 0.4002, "step": 6442 }, { "epoch": 0.17777655145773047, "grad_norm": 0.0030487473122775555, "learning_rate": 0.001, "loss": 0.4101, "step": 6443 }, { "epoch": 0.17780414365879485, "grad_norm": 0.0025476363953202963, "learning_rate": 0.001, "loss": 0.4063, "step": 6444 }, { "epoch": 0.1778317358598592, "grad_norm": 0.002622386207804084, "learning_rate": 0.001, "loss": 0.4083, "step": 6445 }, { "epoch": 0.17785932806092358, "grad_norm": 0.00240062247030437, "learning_rate": 0.001, "loss": 0.3893, "step": 6446 }, { "epoch": 0.17788692026198794, "grad_norm": 0.0036130244843661785, "learning_rate": 0.001, "loss": 0.4177, "step": 6447 }, { "epoch": 0.17791451246305232, "grad_norm": 0.002018724335357547, "learning_rate": 0.001, "loss": 0.4125, "step": 6448 }, { "epoch": 0.1779421046641167, "grad_norm": 0.0024812505580484867, "learning_rate": 0.001, "loss": 0.3876, "step": 6449 }, { "epoch": 0.17796969686518105, "grad_norm": 0.002273386111482978, "learning_rate": 0.001, "loss": 0.3907, "step": 6450 }, { "epoch": 0.17799728906624543, "grad_norm": 0.0027766439598053694, "learning_rate": 0.001, "loss": 0.3812, "step": 6451 }, { "epoch": 0.17802488126730978, "grad_norm": 0.003901482792571187, "learning_rate": 0.001, "loss": 0.3847, "step": 6452 }, { "epoch": 0.17805247346837416, "grad_norm": 0.003208085196092725, "learning_rate": 0.001, "loss": 0.4277, "step": 6453 }, { "epoch": 0.17808006566943854, "grad_norm": 0.0029840737115591764, "learning_rate": 0.001, "loss": 0.4254, "step": 6454 }, { "epoch": 0.1781076578705029, "grad_norm": 0.00228834873996675, "learning_rate": 0.001, "loss": 0.3943, "step": 6455 }, { "epoch": 0.17813525007156727, "grad_norm": 0.0035069435834884644, "learning_rate": 0.001, "loss": 0.3772, "step": 6456 }, { "epoch": 0.17816284227263163, "grad_norm": 0.0046169045381248, "learning_rate": 0.001, "loss": 0.3904, "step": 6457 }, { "epoch": 0.178190434473696, "grad_norm": 0.002904290799051523, "learning_rate": 0.001, "loss": 0.4014, "step": 6458 }, { "epoch": 0.1782180266747604, "grad_norm": 0.004634745419025421, "learning_rate": 0.001, "loss": 0.3864, "step": 6459 }, { "epoch": 0.17824561887582474, "grad_norm": 0.004808775614947081, "learning_rate": 0.001, "loss": 0.3519, "step": 6460 }, { "epoch": 0.17827321107688912, "grad_norm": 0.005992877297103405, "learning_rate": 0.001, "loss": 0.3487, "step": 6461 }, { "epoch": 0.17830080327795347, "grad_norm": 0.003151120152324438, "learning_rate": 0.001, "loss": 0.3892, "step": 6462 }, { "epoch": 0.17832839547901785, "grad_norm": 0.0020630250219255686, "learning_rate": 0.001, "loss": 0.4114, "step": 6463 }, { "epoch": 0.17835598768008223, "grad_norm": 0.0031087875831872225, "learning_rate": 0.001, "loss": 0.3838, "step": 6464 }, { "epoch": 0.1783835798811466, "grad_norm": 0.001903343596495688, "learning_rate": 0.001, "loss": 0.4283, "step": 6465 }, { "epoch": 0.17841117208221097, "grad_norm": 0.0029442012310028076, "learning_rate": 0.001, "loss": 0.4059, "step": 6466 }, { "epoch": 0.17843876428327532, "grad_norm": 0.003048170590773225, "learning_rate": 0.001, "loss": 0.3653, "step": 6467 }, { "epoch": 0.1784663564843397, "grad_norm": 0.002289197174832225, "learning_rate": 0.001, "loss": 0.4146, "step": 6468 }, { "epoch": 0.17849394868540408, "grad_norm": 0.001974297221750021, "learning_rate": 0.001, "loss": 0.4418, "step": 6469 }, { "epoch": 0.17852154088646843, "grad_norm": 0.0037421276792883873, "learning_rate": 0.001, "loss": 0.3631, "step": 6470 }, { "epoch": 0.1785491330875328, "grad_norm": 0.0027026592288166285, "learning_rate": 0.001, "loss": 0.4292, "step": 6471 }, { "epoch": 0.17857672528859717, "grad_norm": 0.0024960192386060953, "learning_rate": 0.001, "loss": 0.4296, "step": 6472 }, { "epoch": 0.17860431748966155, "grad_norm": 0.0029041236266493797, "learning_rate": 0.001, "loss": 0.3639, "step": 6473 }, { "epoch": 0.17863190969072593, "grad_norm": 0.003906071186065674, "learning_rate": 0.001, "loss": 0.4274, "step": 6474 }, { "epoch": 0.17865950189179028, "grad_norm": 0.002730944426730275, "learning_rate": 0.001, "loss": 0.3775, "step": 6475 }, { "epoch": 0.17868709409285466, "grad_norm": 0.002463391749188304, "learning_rate": 0.001, "loss": 0.4227, "step": 6476 }, { "epoch": 0.178714686293919, "grad_norm": 0.003006384475156665, "learning_rate": 0.001, "loss": 0.3643, "step": 6477 }, { "epoch": 0.1787422784949834, "grad_norm": 0.002436541486531496, "learning_rate": 0.001, "loss": 0.3971, "step": 6478 }, { "epoch": 0.17876987069604777, "grad_norm": 0.007043438032269478, "learning_rate": 0.001, "loss": 0.42, "step": 6479 }, { "epoch": 0.17879746289711212, "grad_norm": 0.0045332033187150955, "learning_rate": 0.001, "loss": 0.3988, "step": 6480 }, { "epoch": 0.1788250550981765, "grad_norm": 0.006813266780227423, "learning_rate": 0.001, "loss": 0.3835, "step": 6481 }, { "epoch": 0.17885264729924086, "grad_norm": 0.003106794785708189, "learning_rate": 0.001, "loss": 0.4195, "step": 6482 }, { "epoch": 0.17888023950030524, "grad_norm": 0.003665417432785034, "learning_rate": 0.001, "loss": 0.4124, "step": 6483 }, { "epoch": 0.17890783170136962, "grad_norm": 0.002157231792807579, "learning_rate": 0.001, "loss": 0.4128, "step": 6484 }, { "epoch": 0.17893542390243397, "grad_norm": 0.0032816650345921516, "learning_rate": 0.001, "loss": 0.3797, "step": 6485 }, { "epoch": 0.17896301610349835, "grad_norm": 0.003755836049094796, "learning_rate": 0.001, "loss": 0.3927, "step": 6486 }, { "epoch": 0.1789906083045627, "grad_norm": 0.0028953254222869873, "learning_rate": 0.001, "loss": 0.3926, "step": 6487 }, { "epoch": 0.17901820050562708, "grad_norm": 0.004839729517698288, "learning_rate": 0.001, "loss": 0.3948, "step": 6488 }, { "epoch": 0.17904579270669146, "grad_norm": 0.003079955466091633, "learning_rate": 0.001, "loss": 0.4396, "step": 6489 }, { "epoch": 0.17907338490775582, "grad_norm": 0.0024841863196343184, "learning_rate": 0.001, "loss": 0.3973, "step": 6490 }, { "epoch": 0.1791009771088202, "grad_norm": 0.0028867730870842934, "learning_rate": 0.001, "loss": 0.3779, "step": 6491 }, { "epoch": 0.17912856930988455, "grad_norm": 0.0027325463015586138, "learning_rate": 0.001, "loss": 0.4084, "step": 6492 }, { "epoch": 0.17915616151094893, "grad_norm": 0.006061290390789509, "learning_rate": 0.001, "loss": 0.4036, "step": 6493 }, { "epoch": 0.1791837537120133, "grad_norm": 0.002831445774063468, "learning_rate": 0.001, "loss": 0.3903, "step": 6494 }, { "epoch": 0.17921134591307766, "grad_norm": 0.004074187949299812, "learning_rate": 0.001, "loss": 0.3793, "step": 6495 }, { "epoch": 0.17923893811414204, "grad_norm": 0.0037040761671960354, "learning_rate": 0.001, "loss": 0.3831, "step": 6496 }, { "epoch": 0.1792665303152064, "grad_norm": 0.0032522703986614943, "learning_rate": 0.001, "loss": 0.4091, "step": 6497 }, { "epoch": 0.17929412251627078, "grad_norm": 0.003379018511623144, "learning_rate": 0.001, "loss": 0.3869, "step": 6498 }, { "epoch": 0.17932171471733516, "grad_norm": 0.004454959649592638, "learning_rate": 0.001, "loss": 0.3864, "step": 6499 }, { "epoch": 0.1793493069183995, "grad_norm": 0.006603573448956013, "learning_rate": 0.001, "loss": 0.378, "step": 6500 }, { "epoch": 0.1793493069183995, "eval_runtime": 25.1324, "eval_samples_per_second": 1.273, "eval_steps_per_second": 0.159, "step": 6500 }, { "epoch": 0.1793768991194639, "grad_norm": 0.002346447203308344, "learning_rate": 0.001, "loss": 0.4272, "step": 6501 }, { "epoch": 0.17940449132052824, "grad_norm": 0.004839559551328421, "learning_rate": 0.001, "loss": 0.3956, "step": 6502 }, { "epoch": 0.17943208352159262, "grad_norm": 0.0033710715360939503, "learning_rate": 0.001, "loss": 0.4238, "step": 6503 }, { "epoch": 0.179459675722657, "grad_norm": 0.003859972581267357, "learning_rate": 0.001, "loss": 0.4184, "step": 6504 }, { "epoch": 0.17948726792372136, "grad_norm": 0.004193128552287817, "learning_rate": 0.001, "loss": 0.443, "step": 6505 }, { "epoch": 0.17951486012478574, "grad_norm": 0.0029107420705258846, "learning_rate": 0.001, "loss": 0.4284, "step": 6506 }, { "epoch": 0.1795424523258501, "grad_norm": 0.0028658658266067505, "learning_rate": 0.001, "loss": 0.4214, "step": 6507 }, { "epoch": 0.17957004452691447, "grad_norm": 0.0021101958118379116, "learning_rate": 0.001, "loss": 0.4653, "step": 6508 }, { "epoch": 0.17959763672797885, "grad_norm": 0.0034610512666404247, "learning_rate": 0.001, "loss": 0.3924, "step": 6509 }, { "epoch": 0.1796252289290432, "grad_norm": 0.004026256036013365, "learning_rate": 0.001, "loss": 0.3748, "step": 6510 }, { "epoch": 0.17965282113010758, "grad_norm": 0.004078659228980541, "learning_rate": 0.001, "loss": 0.3741, "step": 6511 }, { "epoch": 0.17968041333117193, "grad_norm": 0.006540537811815739, "learning_rate": 0.001, "loss": 0.3947, "step": 6512 }, { "epoch": 0.17970800553223631, "grad_norm": 0.002590343588963151, "learning_rate": 0.001, "loss": 0.3711, "step": 6513 }, { "epoch": 0.1797355977333007, "grad_norm": 0.005247985944151878, "learning_rate": 0.001, "loss": 0.4139, "step": 6514 }, { "epoch": 0.17976318993436505, "grad_norm": 0.0027244454249739647, "learning_rate": 0.001, "loss": 0.3928, "step": 6515 }, { "epoch": 0.17979078213542943, "grad_norm": 0.002999567426741123, "learning_rate": 0.001, "loss": 0.3676, "step": 6516 }, { "epoch": 0.17981837433649378, "grad_norm": 0.002379565965384245, "learning_rate": 0.001, "loss": 0.3766, "step": 6517 }, { "epoch": 0.17984596653755816, "grad_norm": 0.0031703212298452854, "learning_rate": 0.001, "loss": 0.4199, "step": 6518 }, { "epoch": 0.17987355873862254, "grad_norm": 0.005782125052064657, "learning_rate": 0.001, "loss": 0.4311, "step": 6519 }, { "epoch": 0.1799011509396869, "grad_norm": 0.002240552334114909, "learning_rate": 0.001, "loss": 0.3879, "step": 6520 }, { "epoch": 0.17992874314075127, "grad_norm": 0.0027389421593397856, "learning_rate": 0.001, "loss": 0.3973, "step": 6521 }, { "epoch": 0.17995633534181563, "grad_norm": 0.0032193311490118504, "learning_rate": 0.001, "loss": 0.389, "step": 6522 }, { "epoch": 0.17998392754288, "grad_norm": 0.002220897702500224, "learning_rate": 0.001, "loss": 0.3952, "step": 6523 }, { "epoch": 0.1800115197439444, "grad_norm": 0.0031423268374055624, "learning_rate": 0.001, "loss": 0.3548, "step": 6524 }, { "epoch": 0.18003911194500874, "grad_norm": 0.0029932213947176933, "learning_rate": 0.001, "loss": 0.4019, "step": 6525 }, { "epoch": 0.18006670414607312, "grad_norm": 0.0042558046989142895, "learning_rate": 0.001, "loss": 0.4004, "step": 6526 }, { "epoch": 0.18009429634713747, "grad_norm": 0.006126770284026861, "learning_rate": 0.001, "loss": 0.392, "step": 6527 }, { "epoch": 0.18012188854820185, "grad_norm": 0.0038677749689668417, "learning_rate": 0.001, "loss": 0.3591, "step": 6528 }, { "epoch": 0.18014948074926623, "grad_norm": 0.004701892379671335, "learning_rate": 0.001, "loss": 0.4048, "step": 6529 }, { "epoch": 0.18017707295033059, "grad_norm": 0.002978444332256913, "learning_rate": 0.001, "loss": 0.3636, "step": 6530 }, { "epoch": 0.18020466515139497, "grad_norm": 0.003622008254751563, "learning_rate": 0.001, "loss": 0.3938, "step": 6531 }, { "epoch": 0.18023225735245932, "grad_norm": 0.004150434397161007, "learning_rate": 0.001, "loss": 0.3921, "step": 6532 }, { "epoch": 0.1802598495535237, "grad_norm": 0.006446919869631529, "learning_rate": 0.001, "loss": 0.3843, "step": 6533 }, { "epoch": 0.18028744175458808, "grad_norm": 0.0027283939998596907, "learning_rate": 0.001, "loss": 0.3764, "step": 6534 }, { "epoch": 0.18031503395565243, "grad_norm": 0.0035466288682073355, "learning_rate": 0.001, "loss": 0.4308, "step": 6535 }, { "epoch": 0.1803426261567168, "grad_norm": 0.0033509554341435432, "learning_rate": 0.001, "loss": 0.3809, "step": 6536 }, { "epoch": 0.18037021835778116, "grad_norm": 0.0052613504230976105, "learning_rate": 0.001, "loss": 0.3677, "step": 6537 }, { "epoch": 0.18039781055884554, "grad_norm": 0.0029608176555484533, "learning_rate": 0.001, "loss": 0.4364, "step": 6538 }, { "epoch": 0.18042540275990993, "grad_norm": 0.002105949679389596, "learning_rate": 0.001, "loss": 0.3944, "step": 6539 }, { "epoch": 0.18045299496097428, "grad_norm": 0.002690407680347562, "learning_rate": 0.001, "loss": 0.3894, "step": 6540 }, { "epoch": 0.18048058716203866, "grad_norm": 0.0024390683975070715, "learning_rate": 0.001, "loss": 0.4373, "step": 6541 }, { "epoch": 0.180508179363103, "grad_norm": 0.0034981141798198223, "learning_rate": 0.001, "loss": 0.3952, "step": 6542 }, { "epoch": 0.1805357715641674, "grad_norm": 0.003039612900465727, "learning_rate": 0.001, "loss": 0.3999, "step": 6543 }, { "epoch": 0.18056336376523174, "grad_norm": 0.00274271029047668, "learning_rate": 0.001, "loss": 0.3842, "step": 6544 }, { "epoch": 0.18059095596629612, "grad_norm": 0.002432740991935134, "learning_rate": 0.001, "loss": 0.4024, "step": 6545 }, { "epoch": 0.1806185481673605, "grad_norm": 0.003491653362289071, "learning_rate": 0.001, "loss": 0.4019, "step": 6546 }, { "epoch": 0.18064614036842486, "grad_norm": 0.0032173325307667255, "learning_rate": 0.001, "loss": 0.4048, "step": 6547 }, { "epoch": 0.18067373256948924, "grad_norm": 0.0029747970402240753, "learning_rate": 0.001, "loss": 0.3918, "step": 6548 }, { "epoch": 0.1807013247705536, "grad_norm": 0.007667601108551025, "learning_rate": 0.001, "loss": 0.3944, "step": 6549 }, { "epoch": 0.18072891697161797, "grad_norm": 0.003929935861378908, "learning_rate": 0.001, "loss": 0.3994, "step": 6550 }, { "epoch": 0.18075650917268235, "grad_norm": 0.004057936370372772, "learning_rate": 0.001, "loss": 0.4289, "step": 6551 }, { "epoch": 0.1807841013737467, "grad_norm": 0.003355666296556592, "learning_rate": 0.001, "loss": 0.3934, "step": 6552 }, { "epoch": 0.18081169357481108, "grad_norm": 0.0018296247581019998, "learning_rate": 0.001, "loss": 0.4068, "step": 6553 }, { "epoch": 0.18083928577587544, "grad_norm": 0.0028984597884118557, "learning_rate": 0.001, "loss": 0.4098, "step": 6554 }, { "epoch": 0.18086687797693982, "grad_norm": 0.004249675665050745, "learning_rate": 0.001, "loss": 0.3854, "step": 6555 }, { "epoch": 0.1808944701780042, "grad_norm": 0.002462320262566209, "learning_rate": 0.001, "loss": 0.4299, "step": 6556 }, { "epoch": 0.18092206237906855, "grad_norm": 0.003266765736043453, "learning_rate": 0.001, "loss": 0.3725, "step": 6557 }, { "epoch": 0.18094965458013293, "grad_norm": 0.002418224699795246, "learning_rate": 0.001, "loss": 0.4059, "step": 6558 }, { "epoch": 0.18097724678119728, "grad_norm": 0.002268604701384902, "learning_rate": 0.001, "loss": 0.4535, "step": 6559 }, { "epoch": 0.18100483898226166, "grad_norm": 0.0030840514227747917, "learning_rate": 0.001, "loss": 0.4107, "step": 6560 }, { "epoch": 0.18103243118332604, "grad_norm": 0.003670538542792201, "learning_rate": 0.001, "loss": 0.3636, "step": 6561 }, { "epoch": 0.1810600233843904, "grad_norm": 0.003579066600650549, "learning_rate": 0.001, "loss": 0.3943, "step": 6562 }, { "epoch": 0.18108761558545478, "grad_norm": 0.008917649276554585, "learning_rate": 0.001, "loss": 0.4046, "step": 6563 }, { "epoch": 0.18111520778651913, "grad_norm": 0.004259804729372263, "learning_rate": 0.001, "loss": 0.4289, "step": 6564 }, { "epoch": 0.1811427999875835, "grad_norm": 0.0026127335149794817, "learning_rate": 0.001, "loss": 0.4319, "step": 6565 }, { "epoch": 0.1811703921886479, "grad_norm": 0.00313868117518723, "learning_rate": 0.001, "loss": 0.3858, "step": 6566 }, { "epoch": 0.18119798438971224, "grad_norm": 0.002577871782705188, "learning_rate": 0.001, "loss": 0.4148, "step": 6567 }, { "epoch": 0.18122557659077662, "grad_norm": 0.0029946148861199617, "learning_rate": 0.001, "loss": 0.3963, "step": 6568 }, { "epoch": 0.18125316879184097, "grad_norm": 0.0025365573819726706, "learning_rate": 0.001, "loss": 0.3689, "step": 6569 }, { "epoch": 0.18128076099290535, "grad_norm": 0.002418767660856247, "learning_rate": 0.001, "loss": 0.3896, "step": 6570 }, { "epoch": 0.18130835319396973, "grad_norm": 0.003545231418684125, "learning_rate": 0.001, "loss": 0.3633, "step": 6571 }, { "epoch": 0.1813359453950341, "grad_norm": 0.0031614559702575207, "learning_rate": 0.001, "loss": 0.4208, "step": 6572 }, { "epoch": 0.18136353759609847, "grad_norm": 0.004500371403992176, "learning_rate": 0.001, "loss": 0.3797, "step": 6573 }, { "epoch": 0.18139112979716282, "grad_norm": 0.004762687720358372, "learning_rate": 0.001, "loss": 0.3754, "step": 6574 }, { "epoch": 0.1814187219982272, "grad_norm": 0.0032746128272265196, "learning_rate": 0.001, "loss": 0.3721, "step": 6575 }, { "epoch": 0.18144631419929158, "grad_norm": 0.002015149686485529, "learning_rate": 0.001, "loss": 0.4016, "step": 6576 }, { "epoch": 0.18147390640035593, "grad_norm": 0.003133730962872505, "learning_rate": 0.001, "loss": 0.3765, "step": 6577 }, { "epoch": 0.1815014986014203, "grad_norm": 0.002634577453136444, "learning_rate": 0.001, "loss": 0.4255, "step": 6578 }, { "epoch": 0.18152909080248467, "grad_norm": 0.002629198832437396, "learning_rate": 0.001, "loss": 0.3698, "step": 6579 }, { "epoch": 0.18155668300354905, "grad_norm": 0.0025706025771796703, "learning_rate": 0.001, "loss": 0.4115, "step": 6580 }, { "epoch": 0.18158427520461343, "grad_norm": 0.0024760281667113304, "learning_rate": 0.001, "loss": 0.3772, "step": 6581 }, { "epoch": 0.18161186740567778, "grad_norm": 0.002749704523012042, "learning_rate": 0.001, "loss": 0.441, "step": 6582 }, { "epoch": 0.18163945960674216, "grad_norm": 0.004137085285037756, "learning_rate": 0.001, "loss": 0.401, "step": 6583 }, { "epoch": 0.1816670518078065, "grad_norm": 0.003452888922765851, "learning_rate": 0.001, "loss": 0.3951, "step": 6584 }, { "epoch": 0.1816946440088709, "grad_norm": 0.00280118640512228, "learning_rate": 0.001, "loss": 0.4069, "step": 6585 }, { "epoch": 0.18172223620993527, "grad_norm": 0.002511856146156788, "learning_rate": 0.001, "loss": 0.3994, "step": 6586 }, { "epoch": 0.18174982841099963, "grad_norm": 0.003266239771619439, "learning_rate": 0.001, "loss": 0.3831, "step": 6587 }, { "epoch": 0.181777420612064, "grad_norm": 0.004074828699231148, "learning_rate": 0.001, "loss": 0.4011, "step": 6588 }, { "epoch": 0.18180501281312836, "grad_norm": 0.0032096183858811855, "learning_rate": 0.001, "loss": 0.3851, "step": 6589 }, { "epoch": 0.18183260501419274, "grad_norm": 0.004038103390485048, "learning_rate": 0.001, "loss": 0.3757, "step": 6590 }, { "epoch": 0.18186019721525712, "grad_norm": 0.002833410631865263, "learning_rate": 0.001, "loss": 0.412, "step": 6591 }, { "epoch": 0.18188778941632147, "grad_norm": 0.0042181131429970264, "learning_rate": 0.001, "loss": 0.4179, "step": 6592 }, { "epoch": 0.18191538161738585, "grad_norm": 0.0023462646640837193, "learning_rate": 0.001, "loss": 0.4394, "step": 6593 }, { "epoch": 0.1819429738184502, "grad_norm": 0.00263848970644176, "learning_rate": 0.001, "loss": 0.4032, "step": 6594 }, { "epoch": 0.18197056601951458, "grad_norm": 0.0036028139293193817, "learning_rate": 0.001, "loss": 0.4056, "step": 6595 }, { "epoch": 0.18199815822057896, "grad_norm": 0.0028711268678307533, "learning_rate": 0.001, "loss": 0.3963, "step": 6596 }, { "epoch": 0.18202575042164332, "grad_norm": 0.0030903166625648737, "learning_rate": 0.001, "loss": 0.4274, "step": 6597 }, { "epoch": 0.1820533426227077, "grad_norm": 0.0024692462757229805, "learning_rate": 0.001, "loss": 0.3792, "step": 6598 }, { "epoch": 0.18208093482377205, "grad_norm": 0.0031899542082101107, "learning_rate": 0.001, "loss": 0.3994, "step": 6599 }, { "epoch": 0.18210852702483643, "grad_norm": 0.003478053957223892, "learning_rate": 0.001, "loss": 0.399, "step": 6600 }, { "epoch": 0.1821361192259008, "grad_norm": 0.00227729300968349, "learning_rate": 0.001, "loss": 0.3974, "step": 6601 }, { "epoch": 0.18216371142696516, "grad_norm": 0.002498425543308258, "learning_rate": 0.001, "loss": 0.3593, "step": 6602 }, { "epoch": 0.18219130362802954, "grad_norm": 0.0035188214387744665, "learning_rate": 0.001, "loss": 0.3785, "step": 6603 }, { "epoch": 0.1822188958290939, "grad_norm": 0.0029362791683524847, "learning_rate": 0.001, "loss": 0.3971, "step": 6604 }, { "epoch": 0.18224648803015828, "grad_norm": 0.0034354175440967083, "learning_rate": 0.001, "loss": 0.3582, "step": 6605 }, { "epoch": 0.18227408023122266, "grad_norm": 0.0026759831234812737, "learning_rate": 0.001, "loss": 0.3607, "step": 6606 }, { "epoch": 0.182301672432287, "grad_norm": 0.002816120395436883, "learning_rate": 0.001, "loss": 0.3837, "step": 6607 }, { "epoch": 0.1823292646333514, "grad_norm": 0.0035022348165512085, "learning_rate": 0.001, "loss": 0.4098, "step": 6608 }, { "epoch": 0.18235685683441574, "grad_norm": 0.0034114073496311903, "learning_rate": 0.001, "loss": 0.3654, "step": 6609 }, { "epoch": 0.18238444903548012, "grad_norm": 0.0029557389207184315, "learning_rate": 0.001, "loss": 0.3873, "step": 6610 }, { "epoch": 0.1824120412365445, "grad_norm": 0.0028255372308194637, "learning_rate": 0.001, "loss": 0.4066, "step": 6611 }, { "epoch": 0.18243963343760886, "grad_norm": 0.003345831297338009, "learning_rate": 0.001, "loss": 0.3974, "step": 6612 }, { "epoch": 0.18246722563867324, "grad_norm": 0.0033113721292465925, "learning_rate": 0.001, "loss": 0.3894, "step": 6613 }, { "epoch": 0.1824948178397376, "grad_norm": 0.0022858104202896357, "learning_rate": 0.001, "loss": 0.4601, "step": 6614 }, { "epoch": 0.18252241004080197, "grad_norm": 0.0037697155494242907, "learning_rate": 0.001, "loss": 0.3842, "step": 6615 }, { "epoch": 0.18255000224186635, "grad_norm": 0.003321718657389283, "learning_rate": 0.001, "loss": 0.3957, "step": 6616 }, { "epoch": 0.1825775944429307, "grad_norm": 0.00823116209357977, "learning_rate": 0.001, "loss": 0.408, "step": 6617 }, { "epoch": 0.18260518664399508, "grad_norm": 0.009032117202877998, "learning_rate": 0.001, "loss": 0.4064, "step": 6618 }, { "epoch": 0.18263277884505943, "grad_norm": 0.012609437108039856, "learning_rate": 0.001, "loss": 0.4159, "step": 6619 }, { "epoch": 0.18266037104612382, "grad_norm": 0.0026686121709644794, "learning_rate": 0.001, "loss": 0.428, "step": 6620 }, { "epoch": 0.1826879632471882, "grad_norm": 0.004215455148369074, "learning_rate": 0.001, "loss": 0.412, "step": 6621 }, { "epoch": 0.18271555544825255, "grad_norm": 0.004420561250299215, "learning_rate": 0.001, "loss": 0.3867, "step": 6622 }, { "epoch": 0.18274314764931693, "grad_norm": 0.004728737287223339, "learning_rate": 0.001, "loss": 0.39, "step": 6623 }, { "epoch": 0.18277073985038128, "grad_norm": 0.0077790976502001286, "learning_rate": 0.001, "loss": 0.4597, "step": 6624 }, { "epoch": 0.18279833205144566, "grad_norm": 0.005704334005713463, "learning_rate": 0.001, "loss": 0.38, "step": 6625 }, { "epoch": 0.18282592425251004, "grad_norm": 0.0057289209216833115, "learning_rate": 0.001, "loss": 0.4078, "step": 6626 }, { "epoch": 0.1828535164535744, "grad_norm": 0.0031269195023924112, "learning_rate": 0.001, "loss": 0.3863, "step": 6627 }, { "epoch": 0.18288110865463877, "grad_norm": 0.005130524281412363, "learning_rate": 0.001, "loss": 0.3633, "step": 6628 }, { "epoch": 0.18290870085570313, "grad_norm": 0.003109897952526808, "learning_rate": 0.001, "loss": 0.4098, "step": 6629 }, { "epoch": 0.1829362930567675, "grad_norm": 0.003781312145292759, "learning_rate": 0.001, "loss": 0.4178, "step": 6630 }, { "epoch": 0.1829638852578319, "grad_norm": 0.002132704248651862, "learning_rate": 0.001, "loss": 0.4127, "step": 6631 }, { "epoch": 0.18299147745889624, "grad_norm": 0.0032692537643015385, "learning_rate": 0.001, "loss": 0.4029, "step": 6632 }, { "epoch": 0.18301906965996062, "grad_norm": 0.004832074046134949, "learning_rate": 0.001, "loss": 0.4062, "step": 6633 }, { "epoch": 0.18304666186102497, "grad_norm": 0.002927541034296155, "learning_rate": 0.001, "loss": 0.4106, "step": 6634 }, { "epoch": 0.18307425406208935, "grad_norm": 0.0030651381239295006, "learning_rate": 0.001, "loss": 0.4064, "step": 6635 }, { "epoch": 0.1831018462631537, "grad_norm": 0.005420647095888853, "learning_rate": 0.001, "loss": 0.3958, "step": 6636 }, { "epoch": 0.1831294384642181, "grad_norm": 0.0034076806623488665, "learning_rate": 0.001, "loss": 0.4476, "step": 6637 }, { "epoch": 0.18315703066528247, "grad_norm": 0.0023421344812959433, "learning_rate": 0.001, "loss": 0.4285, "step": 6638 }, { "epoch": 0.18318462286634682, "grad_norm": 0.0025167351122945547, "learning_rate": 0.001, "loss": 0.3947, "step": 6639 }, { "epoch": 0.1832122150674112, "grad_norm": 0.0035126416478306055, "learning_rate": 0.001, "loss": 0.3813, "step": 6640 }, { "epoch": 0.18323980726847555, "grad_norm": 0.002516387961804867, "learning_rate": 0.001, "loss": 0.4018, "step": 6641 }, { "epoch": 0.18326739946953993, "grad_norm": 0.0032357322052121162, "learning_rate": 0.001, "loss": 0.3613, "step": 6642 }, { "epoch": 0.1832949916706043, "grad_norm": 0.0042640650644898415, "learning_rate": 0.001, "loss": 0.4065, "step": 6643 }, { "epoch": 0.18332258387166867, "grad_norm": 0.003650575876235962, "learning_rate": 0.001, "loss": 0.3848, "step": 6644 }, { "epoch": 0.18335017607273305, "grad_norm": 0.00383795821107924, "learning_rate": 0.001, "loss": 0.4154, "step": 6645 }, { "epoch": 0.1833777682737974, "grad_norm": 0.003271985799074173, "learning_rate": 0.001, "loss": 0.4281, "step": 6646 }, { "epoch": 0.18340536047486178, "grad_norm": 0.004597705788910389, "learning_rate": 0.001, "loss": 0.3947, "step": 6647 }, { "epoch": 0.18343295267592616, "grad_norm": 0.0030701288487762213, "learning_rate": 0.001, "loss": 0.394, "step": 6648 }, { "epoch": 0.1834605448769905, "grad_norm": 0.002716483548283577, "learning_rate": 0.001, "loss": 0.3562, "step": 6649 }, { "epoch": 0.1834881370780549, "grad_norm": 0.00313906604424119, "learning_rate": 0.001, "loss": 0.4024, "step": 6650 }, { "epoch": 0.18351572927911924, "grad_norm": 0.002156095113605261, "learning_rate": 0.001, "loss": 0.4083, "step": 6651 }, { "epoch": 0.18354332148018362, "grad_norm": 0.005598701070994139, "learning_rate": 0.001, "loss": 0.394, "step": 6652 }, { "epoch": 0.183570913681248, "grad_norm": 0.002826511859893799, "learning_rate": 0.001, "loss": 0.4202, "step": 6653 }, { "epoch": 0.18359850588231236, "grad_norm": 0.004248370416462421, "learning_rate": 0.001, "loss": 0.4006, "step": 6654 }, { "epoch": 0.18362609808337674, "grad_norm": 0.002483610762283206, "learning_rate": 0.001, "loss": 0.3963, "step": 6655 }, { "epoch": 0.1836536902844411, "grad_norm": 0.003579988144338131, "learning_rate": 0.001, "loss": 0.4175, "step": 6656 }, { "epoch": 0.18368128248550547, "grad_norm": 0.002827717922627926, "learning_rate": 0.001, "loss": 0.393, "step": 6657 }, { "epoch": 0.18370887468656985, "grad_norm": 0.001899820170365274, "learning_rate": 0.001, "loss": 0.4096, "step": 6658 }, { "epoch": 0.1837364668876342, "grad_norm": 0.0031157906632870436, "learning_rate": 0.001, "loss": 0.3807, "step": 6659 }, { "epoch": 0.18376405908869858, "grad_norm": 0.0026786159723997116, "learning_rate": 0.001, "loss": 0.3914, "step": 6660 }, { "epoch": 0.18379165128976294, "grad_norm": 0.003711652010679245, "learning_rate": 0.001, "loss": 0.3857, "step": 6661 }, { "epoch": 0.18381924349082732, "grad_norm": 0.009293664246797562, "learning_rate": 0.001, "loss": 0.3962, "step": 6662 }, { "epoch": 0.1838468356918917, "grad_norm": 0.005516475066542625, "learning_rate": 0.001, "loss": 0.3988, "step": 6663 }, { "epoch": 0.18387442789295605, "grad_norm": 0.003258306998759508, "learning_rate": 0.001, "loss": 0.3742, "step": 6664 }, { "epoch": 0.18390202009402043, "grad_norm": 0.0026423600502312183, "learning_rate": 0.001, "loss": 0.4059, "step": 6665 }, { "epoch": 0.18392961229508478, "grad_norm": 0.0038184835575520992, "learning_rate": 0.001, "loss": 0.3844, "step": 6666 }, { "epoch": 0.18395720449614916, "grad_norm": 0.007692532613873482, "learning_rate": 0.001, "loss": 0.3629, "step": 6667 }, { "epoch": 0.18398479669721354, "grad_norm": 0.003459090832620859, "learning_rate": 0.001, "loss": 0.372, "step": 6668 }, { "epoch": 0.1840123888982779, "grad_norm": 0.002234878484159708, "learning_rate": 0.001, "loss": 0.3923, "step": 6669 }, { "epoch": 0.18403998109934228, "grad_norm": 0.004345033783465624, "learning_rate": 0.001, "loss": 0.4232, "step": 6670 }, { "epoch": 0.18406757330040663, "grad_norm": 0.0024329267907887697, "learning_rate": 0.001, "loss": 0.4063, "step": 6671 }, { "epoch": 0.184095165501471, "grad_norm": 0.004201768897473812, "learning_rate": 0.001, "loss": 0.3964, "step": 6672 }, { "epoch": 0.1841227577025354, "grad_norm": 0.0029748522210866213, "learning_rate": 0.001, "loss": 0.3698, "step": 6673 }, { "epoch": 0.18415034990359974, "grad_norm": 0.0033948393538594246, "learning_rate": 0.001, "loss": 0.4151, "step": 6674 }, { "epoch": 0.18417794210466412, "grad_norm": 0.004755482543259859, "learning_rate": 0.001, "loss": 0.3606, "step": 6675 }, { "epoch": 0.18420553430572847, "grad_norm": 0.004203738644719124, "learning_rate": 0.001, "loss": 0.3899, "step": 6676 }, { "epoch": 0.18423312650679285, "grad_norm": 0.0028434142004698515, "learning_rate": 0.001, "loss": 0.4026, "step": 6677 }, { "epoch": 0.18426071870785724, "grad_norm": 0.0036475297529250383, "learning_rate": 0.001, "loss": 0.3939, "step": 6678 }, { "epoch": 0.1842883109089216, "grad_norm": 0.0032815190497785807, "learning_rate": 0.001, "loss": 0.3731, "step": 6679 }, { "epoch": 0.18431590310998597, "grad_norm": 0.0025924514047801495, "learning_rate": 0.001, "loss": 0.4364, "step": 6680 }, { "epoch": 0.18434349531105032, "grad_norm": 0.003826566506177187, "learning_rate": 0.001, "loss": 0.3723, "step": 6681 }, { "epoch": 0.1843710875121147, "grad_norm": 0.002758833346888423, "learning_rate": 0.001, "loss": 0.4023, "step": 6682 }, { "epoch": 0.18439867971317908, "grad_norm": 0.003747826674953103, "learning_rate": 0.001, "loss": 0.3938, "step": 6683 }, { "epoch": 0.18442627191424343, "grad_norm": 0.005826046224683523, "learning_rate": 0.001, "loss": 0.3837, "step": 6684 }, { "epoch": 0.18445386411530781, "grad_norm": 0.002600571606308222, "learning_rate": 0.001, "loss": 0.4378, "step": 6685 }, { "epoch": 0.18448145631637217, "grad_norm": 0.0024368218146264553, "learning_rate": 0.001, "loss": 0.3911, "step": 6686 }, { "epoch": 0.18450904851743655, "grad_norm": 0.0028827337082475424, "learning_rate": 0.001, "loss": 0.4253, "step": 6687 }, { "epoch": 0.18453664071850093, "grad_norm": 0.0031948471441864967, "learning_rate": 0.001, "loss": 0.3473, "step": 6688 }, { "epoch": 0.18456423291956528, "grad_norm": 0.0033101870212703943, "learning_rate": 0.001, "loss": 0.3909, "step": 6689 }, { "epoch": 0.18459182512062966, "grad_norm": 0.00872339028865099, "learning_rate": 0.001, "loss": 0.3876, "step": 6690 }, { "epoch": 0.184619417321694, "grad_norm": 0.0024567311629652977, "learning_rate": 0.001, "loss": 0.4228, "step": 6691 }, { "epoch": 0.1846470095227584, "grad_norm": 0.0036271214485168457, "learning_rate": 0.001, "loss": 0.3931, "step": 6692 }, { "epoch": 0.18467460172382277, "grad_norm": 0.004172285553067923, "learning_rate": 0.001, "loss": 0.3968, "step": 6693 }, { "epoch": 0.18470219392488713, "grad_norm": 0.002208105055615306, "learning_rate": 0.001, "loss": 0.3876, "step": 6694 }, { "epoch": 0.1847297861259515, "grad_norm": 0.00242190295830369, "learning_rate": 0.001, "loss": 0.4018, "step": 6695 }, { "epoch": 0.18475737832701586, "grad_norm": 0.002299925545230508, "learning_rate": 0.001, "loss": 0.3984, "step": 6696 }, { "epoch": 0.18478497052808024, "grad_norm": 0.0044403825886547565, "learning_rate": 0.001, "loss": 0.3807, "step": 6697 }, { "epoch": 0.18481256272914462, "grad_norm": 0.002557777799665928, "learning_rate": 0.001, "loss": 0.3971, "step": 6698 }, { "epoch": 0.18484015493020897, "grad_norm": 0.003170351032167673, "learning_rate": 0.001, "loss": 0.3567, "step": 6699 }, { "epoch": 0.18486774713127335, "grad_norm": 0.00265632476657629, "learning_rate": 0.001, "loss": 0.4175, "step": 6700 }, { "epoch": 0.1848953393323377, "grad_norm": 0.00452793575823307, "learning_rate": 0.001, "loss": 0.413, "step": 6701 }, { "epoch": 0.18492293153340209, "grad_norm": 0.003011427354067564, "learning_rate": 0.001, "loss": 0.3934, "step": 6702 }, { "epoch": 0.18495052373446647, "grad_norm": 0.0024044597521424294, "learning_rate": 0.001, "loss": 0.3874, "step": 6703 }, { "epoch": 0.18497811593553082, "grad_norm": 0.0035888811107724905, "learning_rate": 0.001, "loss": 0.4016, "step": 6704 }, { "epoch": 0.1850057081365952, "grad_norm": 0.002353749005123973, "learning_rate": 0.001, "loss": 0.4378, "step": 6705 }, { "epoch": 0.18503330033765955, "grad_norm": 0.002476689638569951, "learning_rate": 0.001, "loss": 0.3886, "step": 6706 }, { "epoch": 0.18506089253872393, "grad_norm": 0.002863020868971944, "learning_rate": 0.001, "loss": 0.3952, "step": 6707 }, { "epoch": 0.1850884847397883, "grad_norm": 0.003525110660120845, "learning_rate": 0.001, "loss": 0.3827, "step": 6708 }, { "epoch": 0.18511607694085266, "grad_norm": 0.0031487741507589817, "learning_rate": 0.001, "loss": 0.4206, "step": 6709 }, { "epoch": 0.18514366914191704, "grad_norm": 0.0025411078240722418, "learning_rate": 0.001, "loss": 0.3931, "step": 6710 }, { "epoch": 0.1851712613429814, "grad_norm": 0.003697034902870655, "learning_rate": 0.001, "loss": 0.3566, "step": 6711 }, { "epoch": 0.18519885354404578, "grad_norm": 0.0027804269921034575, "learning_rate": 0.001, "loss": 0.3887, "step": 6712 }, { "epoch": 0.18522644574511016, "grad_norm": 0.0025478231254965067, "learning_rate": 0.001, "loss": 0.379, "step": 6713 }, { "epoch": 0.1852540379461745, "grad_norm": 0.005976776126772165, "learning_rate": 0.001, "loss": 0.4248, "step": 6714 }, { "epoch": 0.1852816301472389, "grad_norm": 0.004181606695055962, "learning_rate": 0.001, "loss": 0.4066, "step": 6715 }, { "epoch": 0.18530922234830324, "grad_norm": 0.002260936889797449, "learning_rate": 0.001, "loss": 0.4213, "step": 6716 }, { "epoch": 0.18533681454936762, "grad_norm": 0.004087687470018864, "learning_rate": 0.001, "loss": 0.3988, "step": 6717 }, { "epoch": 0.185364406750432, "grad_norm": 0.004004262387752533, "learning_rate": 0.001, "loss": 0.3629, "step": 6718 }, { "epoch": 0.18539199895149636, "grad_norm": 0.0027805129066109657, "learning_rate": 0.001, "loss": 0.3721, "step": 6719 }, { "epoch": 0.18541959115256074, "grad_norm": 0.0028537947218865156, "learning_rate": 0.001, "loss": 0.4114, "step": 6720 }, { "epoch": 0.1854471833536251, "grad_norm": 0.002309284871444106, "learning_rate": 0.001, "loss": 0.3947, "step": 6721 }, { "epoch": 0.18547477555468947, "grad_norm": 0.002640241291373968, "learning_rate": 0.001, "loss": 0.371, "step": 6722 }, { "epoch": 0.18550236775575385, "grad_norm": 0.0032795467413961887, "learning_rate": 0.001, "loss": 0.3957, "step": 6723 }, { "epoch": 0.1855299599568182, "grad_norm": 0.0028754386585205793, "learning_rate": 0.001, "loss": 0.4008, "step": 6724 }, { "epoch": 0.18555755215788258, "grad_norm": 0.003849068423733115, "learning_rate": 0.001, "loss": 0.3489, "step": 6725 }, { "epoch": 0.18558514435894694, "grad_norm": 0.0026304719503968954, "learning_rate": 0.001, "loss": 0.3889, "step": 6726 }, { "epoch": 0.18561273656001132, "grad_norm": 0.004114130511879921, "learning_rate": 0.001, "loss": 0.3534, "step": 6727 }, { "epoch": 0.1856403287610757, "grad_norm": 0.0047980137169361115, "learning_rate": 0.001, "loss": 0.4002, "step": 6728 }, { "epoch": 0.18566792096214005, "grad_norm": 0.0033457186073064804, "learning_rate": 0.001, "loss": 0.4017, "step": 6729 }, { "epoch": 0.18569551316320443, "grad_norm": 0.0024908706545829773, "learning_rate": 0.001, "loss": 0.3874, "step": 6730 }, { "epoch": 0.18572310536426878, "grad_norm": 0.003977627027779818, "learning_rate": 0.001, "loss": 0.4041, "step": 6731 }, { "epoch": 0.18575069756533316, "grad_norm": 0.002345160348340869, "learning_rate": 0.001, "loss": 0.4135, "step": 6732 }, { "epoch": 0.18577828976639751, "grad_norm": 0.0025549698621034622, "learning_rate": 0.001, "loss": 0.3838, "step": 6733 }, { "epoch": 0.1858058819674619, "grad_norm": 0.002513099228963256, "learning_rate": 0.001, "loss": 0.398, "step": 6734 }, { "epoch": 0.18583347416852627, "grad_norm": 0.00270891678519547, "learning_rate": 0.001, "loss": 0.4206, "step": 6735 }, { "epoch": 0.18586106636959063, "grad_norm": 0.002633742755278945, "learning_rate": 0.001, "loss": 0.3934, "step": 6736 }, { "epoch": 0.185888658570655, "grad_norm": 0.0025300132110714912, "learning_rate": 0.001, "loss": 0.4546, "step": 6737 }, { "epoch": 0.18591625077171936, "grad_norm": 0.002625576453283429, "learning_rate": 0.001, "loss": 0.3991, "step": 6738 }, { "epoch": 0.18594384297278374, "grad_norm": 0.002719470066949725, "learning_rate": 0.001, "loss": 0.3899, "step": 6739 }, { "epoch": 0.18597143517384812, "grad_norm": 0.002173792105168104, "learning_rate": 0.001, "loss": 0.388, "step": 6740 }, { "epoch": 0.18599902737491247, "grad_norm": 0.0022414301056414843, "learning_rate": 0.001, "loss": 0.4405, "step": 6741 }, { "epoch": 0.18602661957597685, "grad_norm": 0.0032112672924995422, "learning_rate": 0.001, "loss": 0.3803, "step": 6742 }, { "epoch": 0.1860542117770412, "grad_norm": 0.002975397277623415, "learning_rate": 0.001, "loss": 0.4099, "step": 6743 }, { "epoch": 0.1860818039781056, "grad_norm": 0.0024268808774650097, "learning_rate": 0.001, "loss": 0.3698, "step": 6744 }, { "epoch": 0.18610939617916997, "grad_norm": 0.0031276950612664223, "learning_rate": 0.001, "loss": 0.4216, "step": 6745 }, { "epoch": 0.18613698838023432, "grad_norm": 0.0024137012660503387, "learning_rate": 0.001, "loss": 0.4036, "step": 6746 }, { "epoch": 0.1861645805812987, "grad_norm": 0.002811797196045518, "learning_rate": 0.001, "loss": 0.4148, "step": 6747 }, { "epoch": 0.18619217278236305, "grad_norm": 0.004455705638974905, "learning_rate": 0.001, "loss": 0.394, "step": 6748 }, { "epoch": 0.18621976498342743, "grad_norm": 0.0033044186420738697, "learning_rate": 0.001, "loss": 0.4225, "step": 6749 }, { "epoch": 0.1862473571844918, "grad_norm": 0.0033213391434401274, "learning_rate": 0.001, "loss": 0.38, "step": 6750 }, { "epoch": 0.18627494938555617, "grad_norm": 0.002652913797646761, "learning_rate": 0.001, "loss": 0.3944, "step": 6751 }, { "epoch": 0.18630254158662055, "grad_norm": 0.003949091769754887, "learning_rate": 0.001, "loss": 0.3979, "step": 6752 }, { "epoch": 0.1863301337876849, "grad_norm": 0.002760807517915964, "learning_rate": 0.001, "loss": 0.4183, "step": 6753 }, { "epoch": 0.18635772598874928, "grad_norm": 0.0035468130372464657, "learning_rate": 0.001, "loss": 0.3776, "step": 6754 }, { "epoch": 0.18638531818981366, "grad_norm": 0.005636704154312611, "learning_rate": 0.001, "loss": 0.3868, "step": 6755 }, { "epoch": 0.186412910390878, "grad_norm": 0.002743509830906987, "learning_rate": 0.001, "loss": 0.4324, "step": 6756 }, { "epoch": 0.1864405025919424, "grad_norm": 0.0033133768010884523, "learning_rate": 0.001, "loss": 0.4075, "step": 6757 }, { "epoch": 0.18646809479300674, "grad_norm": 0.004021006636321545, "learning_rate": 0.001, "loss": 0.3698, "step": 6758 }, { "epoch": 0.18649568699407112, "grad_norm": 0.0037046188954263926, "learning_rate": 0.001, "loss": 0.3946, "step": 6759 }, { "epoch": 0.1865232791951355, "grad_norm": 0.0019486859673634171, "learning_rate": 0.001, "loss": 0.4049, "step": 6760 }, { "epoch": 0.18655087139619986, "grad_norm": 0.005325522273778915, "learning_rate": 0.001, "loss": 0.4075, "step": 6761 }, { "epoch": 0.18657846359726424, "grad_norm": 0.003112213918939233, "learning_rate": 0.001, "loss": 0.4059, "step": 6762 }, { "epoch": 0.1866060557983286, "grad_norm": 0.003908544313162565, "learning_rate": 0.001, "loss": 0.4135, "step": 6763 }, { "epoch": 0.18663364799939297, "grad_norm": 0.0025811195373535156, "learning_rate": 0.001, "loss": 0.4025, "step": 6764 }, { "epoch": 0.18666124020045735, "grad_norm": 0.002601012121886015, "learning_rate": 0.001, "loss": 0.3972, "step": 6765 }, { "epoch": 0.1866888324015217, "grad_norm": 0.0023691104725003242, "learning_rate": 0.001, "loss": 0.4034, "step": 6766 }, { "epoch": 0.18671642460258608, "grad_norm": 0.0021417406387627125, "learning_rate": 0.001, "loss": 0.3635, "step": 6767 }, { "epoch": 0.18674401680365044, "grad_norm": 0.0022483544889837503, "learning_rate": 0.001, "loss": 0.415, "step": 6768 }, { "epoch": 0.18677160900471482, "grad_norm": 0.0032693545799702406, "learning_rate": 0.001, "loss": 0.4228, "step": 6769 }, { "epoch": 0.1867992012057792, "grad_norm": 0.002980321878567338, "learning_rate": 0.001, "loss": 0.4426, "step": 6770 }, { "epoch": 0.18682679340684355, "grad_norm": 0.0027730639558285475, "learning_rate": 0.001, "loss": 0.4281, "step": 6771 }, { "epoch": 0.18685438560790793, "grad_norm": 0.002590013900771737, "learning_rate": 0.001, "loss": 0.3585, "step": 6772 }, { "epoch": 0.18688197780897228, "grad_norm": 0.00434154225513339, "learning_rate": 0.001, "loss": 0.3845, "step": 6773 }, { "epoch": 0.18690957001003666, "grad_norm": 0.002608590293675661, "learning_rate": 0.001, "loss": 0.3641, "step": 6774 }, { "epoch": 0.18693716221110104, "grad_norm": 0.002211198676377535, "learning_rate": 0.001, "loss": 0.3956, "step": 6775 }, { "epoch": 0.1869647544121654, "grad_norm": 0.002349904738366604, "learning_rate": 0.001, "loss": 0.3773, "step": 6776 }, { "epoch": 0.18699234661322978, "grad_norm": 0.002399194985628128, "learning_rate": 0.001, "loss": 0.4271, "step": 6777 }, { "epoch": 0.18701993881429413, "grad_norm": 0.0026427030097693205, "learning_rate": 0.001, "loss": 0.386, "step": 6778 }, { "epoch": 0.1870475310153585, "grad_norm": 0.00331774540245533, "learning_rate": 0.001, "loss": 0.4142, "step": 6779 }, { "epoch": 0.1870751232164229, "grad_norm": 0.003382153809070587, "learning_rate": 0.001, "loss": 0.4269, "step": 6780 }, { "epoch": 0.18710271541748724, "grad_norm": 0.002654004842042923, "learning_rate": 0.001, "loss": 0.4131, "step": 6781 }, { "epoch": 0.18713030761855162, "grad_norm": 0.005028711166232824, "learning_rate": 0.001, "loss": 0.3986, "step": 6782 }, { "epoch": 0.18715789981961597, "grad_norm": 0.002557139378041029, "learning_rate": 0.001, "loss": 0.384, "step": 6783 }, { "epoch": 0.18718549202068036, "grad_norm": 0.003317068563774228, "learning_rate": 0.001, "loss": 0.4058, "step": 6784 }, { "epoch": 0.18721308422174474, "grad_norm": 0.007066572085022926, "learning_rate": 0.001, "loss": 0.4051, "step": 6785 }, { "epoch": 0.1872406764228091, "grad_norm": 0.002671597758308053, "learning_rate": 0.001, "loss": 0.402, "step": 6786 }, { "epoch": 0.18726826862387347, "grad_norm": 0.0031658501829952, "learning_rate": 0.001, "loss": 0.4196, "step": 6787 }, { "epoch": 0.18729586082493782, "grad_norm": 0.004446005914360285, "learning_rate": 0.001, "loss": 0.387, "step": 6788 }, { "epoch": 0.1873234530260022, "grad_norm": 0.004098923876881599, "learning_rate": 0.001, "loss": 0.3955, "step": 6789 }, { "epoch": 0.18735104522706658, "grad_norm": 0.010191929526627064, "learning_rate": 0.001, "loss": 0.4261, "step": 6790 }, { "epoch": 0.18737863742813093, "grad_norm": 0.0034303830470889807, "learning_rate": 0.001, "loss": 0.3878, "step": 6791 }, { "epoch": 0.18740622962919531, "grad_norm": 0.0028527514077723026, "learning_rate": 0.001, "loss": 0.4453, "step": 6792 }, { "epoch": 0.18743382183025967, "grad_norm": 0.003259077901020646, "learning_rate": 0.001, "loss": 0.3772, "step": 6793 }, { "epoch": 0.18746141403132405, "grad_norm": 0.003400780726224184, "learning_rate": 0.001, "loss": 0.3775, "step": 6794 }, { "epoch": 0.18748900623238843, "grad_norm": 0.0031762244179844856, "learning_rate": 0.001, "loss": 0.398, "step": 6795 }, { "epoch": 0.18751659843345278, "grad_norm": 0.0025605822447687387, "learning_rate": 0.001, "loss": 0.4467, "step": 6796 }, { "epoch": 0.18754419063451716, "grad_norm": 0.0029185418970882893, "learning_rate": 0.001, "loss": 0.4008, "step": 6797 }, { "epoch": 0.1875717828355815, "grad_norm": 0.0023754434660077095, "learning_rate": 0.001, "loss": 0.4238, "step": 6798 }, { "epoch": 0.1875993750366459, "grad_norm": 0.002524258103221655, "learning_rate": 0.001, "loss": 0.4156, "step": 6799 }, { "epoch": 0.18762696723771027, "grad_norm": 0.0024060863070189953, "learning_rate": 0.001, "loss": 0.3629, "step": 6800 }, { "epoch": 0.18765455943877463, "grad_norm": 0.0025536813773214817, "learning_rate": 0.001, "loss": 0.3831, "step": 6801 }, { "epoch": 0.187682151639839, "grad_norm": 0.0032576583325862885, "learning_rate": 0.001, "loss": 0.4294, "step": 6802 }, { "epoch": 0.18770974384090336, "grad_norm": 0.004546053241938353, "learning_rate": 0.001, "loss": 0.3852, "step": 6803 }, { "epoch": 0.18773733604196774, "grad_norm": 0.0041918703354895115, "learning_rate": 0.001, "loss": 0.3851, "step": 6804 }, { "epoch": 0.18776492824303212, "grad_norm": 0.0028781883884221315, "learning_rate": 0.001, "loss": 0.4044, "step": 6805 }, { "epoch": 0.18779252044409647, "grad_norm": 0.0031794614624232054, "learning_rate": 0.001, "loss": 0.413, "step": 6806 }, { "epoch": 0.18782011264516085, "grad_norm": 0.0023967409506440163, "learning_rate": 0.001, "loss": 0.3906, "step": 6807 }, { "epoch": 0.1878477048462252, "grad_norm": 0.0029673967510461807, "learning_rate": 0.001, "loss": 0.3996, "step": 6808 }, { "epoch": 0.18787529704728959, "grad_norm": 0.0028512347489595413, "learning_rate": 0.001, "loss": 0.4211, "step": 6809 }, { "epoch": 0.18790288924835397, "grad_norm": 0.0031782910227775574, "learning_rate": 0.001, "loss": 0.4095, "step": 6810 }, { "epoch": 0.18793048144941832, "grad_norm": 0.00338121154345572, "learning_rate": 0.001, "loss": 0.3941, "step": 6811 }, { "epoch": 0.1879580736504827, "grad_norm": 0.002484740223735571, "learning_rate": 0.001, "loss": 0.4319, "step": 6812 }, { "epoch": 0.18798566585154705, "grad_norm": 0.0032946360297501087, "learning_rate": 0.001, "loss": 0.3824, "step": 6813 }, { "epoch": 0.18801325805261143, "grad_norm": 0.003030691295862198, "learning_rate": 0.001, "loss": 0.4301, "step": 6814 }, { "epoch": 0.1880408502536758, "grad_norm": 0.004018679261207581, "learning_rate": 0.001, "loss": 0.404, "step": 6815 }, { "epoch": 0.18806844245474016, "grad_norm": 0.004374852403998375, "learning_rate": 0.001, "loss": 0.3796, "step": 6816 }, { "epoch": 0.18809603465580454, "grad_norm": 0.003533913753926754, "learning_rate": 0.001, "loss": 0.4152, "step": 6817 }, { "epoch": 0.1881236268568689, "grad_norm": 0.0031644238624721766, "learning_rate": 0.001, "loss": 0.4073, "step": 6818 }, { "epoch": 0.18815121905793328, "grad_norm": 0.003066874807700515, "learning_rate": 0.001, "loss": 0.4544, "step": 6819 }, { "epoch": 0.18817881125899766, "grad_norm": 0.0024850773625075817, "learning_rate": 0.001, "loss": 0.4152, "step": 6820 }, { "epoch": 0.188206403460062, "grad_norm": 0.002926696091890335, "learning_rate": 0.001, "loss": 0.3464, "step": 6821 }, { "epoch": 0.1882339956611264, "grad_norm": 0.00351322372443974, "learning_rate": 0.001, "loss": 0.3975, "step": 6822 }, { "epoch": 0.18826158786219074, "grad_norm": 0.0029151032213121653, "learning_rate": 0.001, "loss": 0.3988, "step": 6823 }, { "epoch": 0.18828918006325512, "grad_norm": 0.004052076023072004, "learning_rate": 0.001, "loss": 0.3899, "step": 6824 }, { "epoch": 0.18831677226431948, "grad_norm": 0.00337521662004292, "learning_rate": 0.001, "loss": 0.4122, "step": 6825 }, { "epoch": 0.18834436446538386, "grad_norm": 0.003744855523109436, "learning_rate": 0.001, "loss": 0.3723, "step": 6826 }, { "epoch": 0.18837195666644824, "grad_norm": 0.0029044130351394415, "learning_rate": 0.001, "loss": 0.388, "step": 6827 }, { "epoch": 0.1883995488675126, "grad_norm": 0.003070064587518573, "learning_rate": 0.001, "loss": 0.397, "step": 6828 }, { "epoch": 0.18842714106857697, "grad_norm": 0.0030712231528013945, "learning_rate": 0.001, "loss": 0.4087, "step": 6829 }, { "epoch": 0.18845473326964132, "grad_norm": 0.0035601078998297453, "learning_rate": 0.001, "loss": 0.3992, "step": 6830 }, { "epoch": 0.1884823254707057, "grad_norm": 0.0034437361173331738, "learning_rate": 0.001, "loss": 0.4109, "step": 6831 }, { "epoch": 0.18850991767177008, "grad_norm": 0.003156115999445319, "learning_rate": 0.001, "loss": 0.4126, "step": 6832 }, { "epoch": 0.18853750987283444, "grad_norm": 0.0028527495451271534, "learning_rate": 0.001, "loss": 0.425, "step": 6833 }, { "epoch": 0.18856510207389882, "grad_norm": 0.0044526634737849236, "learning_rate": 0.001, "loss": 0.3881, "step": 6834 }, { "epoch": 0.18859269427496317, "grad_norm": 0.006967521738260984, "learning_rate": 0.001, "loss": 0.3879, "step": 6835 }, { "epoch": 0.18862028647602755, "grad_norm": 0.0034451198298484087, "learning_rate": 0.001, "loss": 0.4003, "step": 6836 }, { "epoch": 0.18864787867709193, "grad_norm": 0.004877714905887842, "learning_rate": 0.001, "loss": 0.4206, "step": 6837 }, { "epoch": 0.18867547087815628, "grad_norm": 0.00429321825504303, "learning_rate": 0.001, "loss": 0.3752, "step": 6838 }, { "epoch": 0.18870306307922066, "grad_norm": 0.0028502768836915493, "learning_rate": 0.001, "loss": 0.3603, "step": 6839 }, { "epoch": 0.18873065528028501, "grad_norm": 0.0023765196092426777, "learning_rate": 0.001, "loss": 0.4287, "step": 6840 }, { "epoch": 0.1887582474813494, "grad_norm": 0.0028856396675109863, "learning_rate": 0.001, "loss": 0.3819, "step": 6841 }, { "epoch": 0.18878583968241378, "grad_norm": 0.004223294090479612, "learning_rate": 0.001, "loss": 0.4277, "step": 6842 }, { "epoch": 0.18881343188347813, "grad_norm": 0.003582458943128586, "learning_rate": 0.001, "loss": 0.3699, "step": 6843 }, { "epoch": 0.1888410240845425, "grad_norm": 0.004069041460752487, "learning_rate": 0.001, "loss": 0.4196, "step": 6844 }, { "epoch": 0.18886861628560686, "grad_norm": 0.004946923349052668, "learning_rate": 0.001, "loss": 0.4151, "step": 6845 }, { "epoch": 0.18889620848667124, "grad_norm": 0.002650062320753932, "learning_rate": 0.001, "loss": 0.3993, "step": 6846 }, { "epoch": 0.18892380068773562, "grad_norm": 0.002152357017621398, "learning_rate": 0.001, "loss": 0.452, "step": 6847 }, { "epoch": 0.18895139288879997, "grad_norm": 0.0030690866988152266, "learning_rate": 0.001, "loss": 0.3782, "step": 6848 }, { "epoch": 0.18897898508986435, "grad_norm": 0.0022116086911410093, "learning_rate": 0.001, "loss": 0.4015, "step": 6849 }, { "epoch": 0.1890065772909287, "grad_norm": 0.0023306317161768675, "learning_rate": 0.001, "loss": 0.4093, "step": 6850 }, { "epoch": 0.1890341694919931, "grad_norm": 0.002590792253613472, "learning_rate": 0.001, "loss": 0.3973, "step": 6851 }, { "epoch": 0.18906176169305747, "grad_norm": 0.005286765284836292, "learning_rate": 0.001, "loss": 0.4247, "step": 6852 }, { "epoch": 0.18908935389412182, "grad_norm": 0.006548671051859856, "learning_rate": 0.001, "loss": 0.4021, "step": 6853 }, { "epoch": 0.1891169460951862, "grad_norm": 0.002917979843914509, "learning_rate": 0.001, "loss": 0.4222, "step": 6854 }, { "epoch": 0.18914453829625055, "grad_norm": 0.002655414631590247, "learning_rate": 0.001, "loss": 0.3637, "step": 6855 }, { "epoch": 0.18917213049731493, "grad_norm": 0.003176578553393483, "learning_rate": 0.001, "loss": 0.3973, "step": 6856 }, { "epoch": 0.1891997226983793, "grad_norm": 0.0026494793128222227, "learning_rate": 0.001, "loss": 0.4069, "step": 6857 }, { "epoch": 0.18922731489944367, "grad_norm": 0.0027524055913090706, "learning_rate": 0.001, "loss": 0.4352, "step": 6858 }, { "epoch": 0.18925490710050805, "grad_norm": 0.0025463078636676073, "learning_rate": 0.001, "loss": 0.4272, "step": 6859 }, { "epoch": 0.1892824993015724, "grad_norm": 0.011269212700426579, "learning_rate": 0.001, "loss": 0.4013, "step": 6860 }, { "epoch": 0.18931009150263678, "grad_norm": 0.003725286340340972, "learning_rate": 0.001, "loss": 0.4062, "step": 6861 }, { "epoch": 0.18933768370370116, "grad_norm": 0.0022669502068310976, "learning_rate": 0.001, "loss": 0.4321, "step": 6862 }, { "epoch": 0.1893652759047655, "grad_norm": 0.0022833060938864946, "learning_rate": 0.001, "loss": 0.391, "step": 6863 }, { "epoch": 0.1893928681058299, "grad_norm": 0.0027603027410805225, "learning_rate": 0.001, "loss": 0.4338, "step": 6864 }, { "epoch": 0.18942046030689424, "grad_norm": 0.00340470764786005, "learning_rate": 0.001, "loss": 0.3815, "step": 6865 }, { "epoch": 0.18944805250795863, "grad_norm": 0.003243909915909171, "learning_rate": 0.001, "loss": 0.3784, "step": 6866 }, { "epoch": 0.189475644709023, "grad_norm": 0.003641802351921797, "learning_rate": 0.001, "loss": 0.3896, "step": 6867 }, { "epoch": 0.18950323691008736, "grad_norm": 0.00207584910094738, "learning_rate": 0.001, "loss": 0.4182, "step": 6868 }, { "epoch": 0.18953082911115174, "grad_norm": 0.0021775367204099894, "learning_rate": 0.001, "loss": 0.4068, "step": 6869 }, { "epoch": 0.1895584213122161, "grad_norm": 0.0023639260325580835, "learning_rate": 0.001, "loss": 0.3917, "step": 6870 }, { "epoch": 0.18958601351328047, "grad_norm": 0.0024233164731413126, "learning_rate": 0.001, "loss": 0.4213, "step": 6871 }, { "epoch": 0.18961360571434485, "grad_norm": 0.0024376865476369858, "learning_rate": 0.001, "loss": 0.4345, "step": 6872 }, { "epoch": 0.1896411979154092, "grad_norm": 0.004064356442540884, "learning_rate": 0.001, "loss": 0.4221, "step": 6873 }, { "epoch": 0.18966879011647358, "grad_norm": 0.0038676555268466473, "learning_rate": 0.001, "loss": 0.3978, "step": 6874 }, { "epoch": 0.18969638231753794, "grad_norm": 0.002870697295293212, "learning_rate": 0.001, "loss": 0.3892, "step": 6875 }, { "epoch": 0.18972397451860232, "grad_norm": 0.005860270466655493, "learning_rate": 0.001, "loss": 0.3993, "step": 6876 }, { "epoch": 0.1897515667196667, "grad_norm": 0.0024599696043878794, "learning_rate": 0.001, "loss": 0.4116, "step": 6877 }, { "epoch": 0.18977915892073105, "grad_norm": 0.002083956263959408, "learning_rate": 0.001, "loss": 0.4314, "step": 6878 }, { "epoch": 0.18980675112179543, "grad_norm": 0.0021180741023272276, "learning_rate": 0.001, "loss": 0.3981, "step": 6879 }, { "epoch": 0.18983434332285978, "grad_norm": 0.0028299293480813503, "learning_rate": 0.001, "loss": 0.3764, "step": 6880 }, { "epoch": 0.18986193552392416, "grad_norm": 0.009525451809167862, "learning_rate": 0.001, "loss": 0.362, "step": 6881 }, { "epoch": 0.18988952772498854, "grad_norm": 0.0036288495175540447, "learning_rate": 0.001, "loss": 0.3884, "step": 6882 }, { "epoch": 0.1899171199260529, "grad_norm": 0.0025045403745025396, "learning_rate": 0.001, "loss": 0.4469, "step": 6883 }, { "epoch": 0.18994471212711728, "grad_norm": 0.006380067672580481, "learning_rate": 0.001, "loss": 0.4178, "step": 6884 }, { "epoch": 0.18997230432818163, "grad_norm": 0.0035281891468912363, "learning_rate": 0.001, "loss": 0.3987, "step": 6885 }, { "epoch": 0.189999896529246, "grad_norm": 0.012035722844302654, "learning_rate": 0.001, "loss": 0.4396, "step": 6886 }, { "epoch": 0.1900274887303104, "grad_norm": 0.004180245101451874, "learning_rate": 0.001, "loss": 0.3978, "step": 6887 }, { "epoch": 0.19005508093137474, "grad_norm": 0.002991945017129183, "learning_rate": 0.001, "loss": 0.3904, "step": 6888 }, { "epoch": 0.19008267313243912, "grad_norm": 0.002699353964999318, "learning_rate": 0.001, "loss": 0.4174, "step": 6889 }, { "epoch": 0.19011026533350348, "grad_norm": 0.009517242200672626, "learning_rate": 0.001, "loss": 0.3863, "step": 6890 }, { "epoch": 0.19013785753456786, "grad_norm": 0.002533604623749852, "learning_rate": 0.001, "loss": 0.419, "step": 6891 }, { "epoch": 0.19016544973563224, "grad_norm": 0.0020676185376942158, "learning_rate": 0.001, "loss": 0.4244, "step": 6892 }, { "epoch": 0.1901930419366966, "grad_norm": 0.0027616149745881557, "learning_rate": 0.001, "loss": 0.4224, "step": 6893 }, { "epoch": 0.19022063413776097, "grad_norm": 0.0025242832489311695, "learning_rate": 0.001, "loss": 0.3775, "step": 6894 }, { "epoch": 0.19024822633882532, "grad_norm": 0.005067579913884401, "learning_rate": 0.001, "loss": 0.3848, "step": 6895 }, { "epoch": 0.1902758185398897, "grad_norm": 0.005015381146222353, "learning_rate": 0.001, "loss": 0.4029, "step": 6896 }, { "epoch": 0.19030341074095408, "grad_norm": 0.010791119188070297, "learning_rate": 0.001, "loss": 0.4127, "step": 6897 }, { "epoch": 0.19033100294201843, "grad_norm": 0.00212705135345459, "learning_rate": 0.001, "loss": 0.4522, "step": 6898 }, { "epoch": 0.19035859514308281, "grad_norm": 0.0018394197104498744, "learning_rate": 0.001, "loss": 0.4393, "step": 6899 }, { "epoch": 0.19038618734414717, "grad_norm": 0.002272665733471513, "learning_rate": 0.001, "loss": 0.4152, "step": 6900 }, { "epoch": 0.19041377954521155, "grad_norm": 0.002068854635581374, "learning_rate": 0.001, "loss": 0.4175, "step": 6901 }, { "epoch": 0.19044137174627593, "grad_norm": 0.003429130418226123, "learning_rate": 0.001, "loss": 0.4015, "step": 6902 }, { "epoch": 0.19046896394734028, "grad_norm": 0.006805672310292721, "learning_rate": 0.001, "loss": 0.4157, "step": 6903 }, { "epoch": 0.19049655614840466, "grad_norm": 0.002861461602151394, "learning_rate": 0.001, "loss": 0.3941, "step": 6904 }, { "epoch": 0.190524148349469, "grad_norm": 0.002513586077839136, "learning_rate": 0.001, "loss": 0.4403, "step": 6905 }, { "epoch": 0.1905517405505334, "grad_norm": 0.003469777060672641, "learning_rate": 0.001, "loss": 0.3866, "step": 6906 }, { "epoch": 0.19057933275159777, "grad_norm": 0.003417365485802293, "learning_rate": 0.001, "loss": 0.3806, "step": 6907 }, { "epoch": 0.19060692495266213, "grad_norm": 0.0020143757574260235, "learning_rate": 0.001, "loss": 0.4343, "step": 6908 }, { "epoch": 0.1906345171537265, "grad_norm": 0.002093646442517638, "learning_rate": 0.001, "loss": 0.4124, "step": 6909 }, { "epoch": 0.19066210935479086, "grad_norm": 0.001849993597716093, "learning_rate": 0.001, "loss": 0.4144, "step": 6910 }, { "epoch": 0.19068970155585524, "grad_norm": 0.002308469032868743, "learning_rate": 0.001, "loss": 0.4141, "step": 6911 }, { "epoch": 0.19071729375691962, "grad_norm": 0.00342297344468534, "learning_rate": 0.001, "loss": 0.3559, "step": 6912 }, { "epoch": 0.19074488595798397, "grad_norm": 0.002367036882787943, "learning_rate": 0.001, "loss": 0.3956, "step": 6913 }, { "epoch": 0.19077247815904835, "grad_norm": 0.003705595852807164, "learning_rate": 0.001, "loss": 0.3972, "step": 6914 }, { "epoch": 0.1908000703601127, "grad_norm": 0.0037233191542327404, "learning_rate": 0.001, "loss": 0.4135, "step": 6915 }, { "epoch": 0.19082766256117709, "grad_norm": 0.0024186784867197275, "learning_rate": 0.001, "loss": 0.4397, "step": 6916 }, { "epoch": 0.19085525476224144, "grad_norm": 0.0020412628073245287, "learning_rate": 0.001, "loss": 0.4353, "step": 6917 }, { "epoch": 0.19088284696330582, "grad_norm": 0.002167558530345559, "learning_rate": 0.001, "loss": 0.4022, "step": 6918 }, { "epoch": 0.1909104391643702, "grad_norm": 0.002329624257981777, "learning_rate": 0.001, "loss": 0.4321, "step": 6919 }, { "epoch": 0.19093803136543455, "grad_norm": 0.0029148284811526537, "learning_rate": 0.001, "loss": 0.395, "step": 6920 }, { "epoch": 0.19096562356649893, "grad_norm": 0.002269695047289133, "learning_rate": 0.001, "loss": 0.4007, "step": 6921 }, { "epoch": 0.19099321576756328, "grad_norm": 0.002682054415345192, "learning_rate": 0.001, "loss": 0.3913, "step": 6922 }, { "epoch": 0.19102080796862766, "grad_norm": 0.0024830945767462254, "learning_rate": 0.001, "loss": 0.4511, "step": 6923 }, { "epoch": 0.19104840016969205, "grad_norm": 0.003123142756521702, "learning_rate": 0.001, "loss": 0.3975, "step": 6924 }, { "epoch": 0.1910759923707564, "grad_norm": 0.004371596500277519, "learning_rate": 0.001, "loss": 0.3956, "step": 6925 }, { "epoch": 0.19110358457182078, "grad_norm": 0.0042950925417244434, "learning_rate": 0.001, "loss": 0.3804, "step": 6926 }, { "epoch": 0.19113117677288513, "grad_norm": 0.0022360682487487793, "learning_rate": 0.001, "loss": 0.4616, "step": 6927 }, { "epoch": 0.1911587689739495, "grad_norm": 0.0026784553192555904, "learning_rate": 0.001, "loss": 0.3839, "step": 6928 }, { "epoch": 0.1911863611750139, "grad_norm": 0.003034649882465601, "learning_rate": 0.001, "loss": 0.4016, "step": 6929 }, { "epoch": 0.19121395337607824, "grad_norm": 0.006058567203581333, "learning_rate": 0.001, "loss": 0.3884, "step": 6930 }, { "epoch": 0.19124154557714262, "grad_norm": 0.003582082223147154, "learning_rate": 0.001, "loss": 0.3453, "step": 6931 }, { "epoch": 0.19126913777820698, "grad_norm": 0.002336689503863454, "learning_rate": 0.001, "loss": 0.4432, "step": 6932 }, { "epoch": 0.19129672997927136, "grad_norm": 0.0033224106300622225, "learning_rate": 0.001, "loss": 0.4139, "step": 6933 }, { "epoch": 0.19132432218033574, "grad_norm": 0.005102561321109533, "learning_rate": 0.001, "loss": 0.4244, "step": 6934 }, { "epoch": 0.1913519143814001, "grad_norm": 0.0050558787770569324, "learning_rate": 0.001, "loss": 0.4118, "step": 6935 }, { "epoch": 0.19137950658246447, "grad_norm": 0.007568387780338526, "learning_rate": 0.001, "loss": 0.3964, "step": 6936 }, { "epoch": 0.19140709878352882, "grad_norm": 0.003738366300240159, "learning_rate": 0.001, "loss": 0.3949, "step": 6937 }, { "epoch": 0.1914346909845932, "grad_norm": 0.0034411989618092775, "learning_rate": 0.001, "loss": 0.4202, "step": 6938 }, { "epoch": 0.19146228318565758, "grad_norm": 0.0027622180059552193, "learning_rate": 0.001, "loss": 0.4015, "step": 6939 }, { "epoch": 0.19148987538672194, "grad_norm": 0.0026892200112342834, "learning_rate": 0.001, "loss": 0.3971, "step": 6940 }, { "epoch": 0.19151746758778632, "grad_norm": 0.002897805068641901, "learning_rate": 0.001, "loss": 0.3971, "step": 6941 }, { "epoch": 0.19154505978885067, "grad_norm": 0.0027761433739215136, "learning_rate": 0.001, "loss": 0.3846, "step": 6942 }, { "epoch": 0.19157265198991505, "grad_norm": 0.0034833746030926704, "learning_rate": 0.001, "loss": 0.3867, "step": 6943 }, { "epoch": 0.19160024419097943, "grad_norm": 0.004619366955012083, "learning_rate": 0.001, "loss": 0.4471, "step": 6944 }, { "epoch": 0.19162783639204378, "grad_norm": 0.003163927001878619, "learning_rate": 0.001, "loss": 0.4286, "step": 6945 }, { "epoch": 0.19165542859310816, "grad_norm": 0.005678548477590084, "learning_rate": 0.001, "loss": 0.3976, "step": 6946 }, { "epoch": 0.19168302079417252, "grad_norm": 0.0026377500034868717, "learning_rate": 0.001, "loss": 0.4158, "step": 6947 }, { "epoch": 0.1917106129952369, "grad_norm": 0.006021553184837103, "learning_rate": 0.001, "loss": 0.3781, "step": 6948 }, { "epoch": 0.19173820519630128, "grad_norm": 0.0024780158419162035, "learning_rate": 0.001, "loss": 0.4146, "step": 6949 }, { "epoch": 0.19176579739736563, "grad_norm": 0.002402723301202059, "learning_rate": 0.001, "loss": 0.4051, "step": 6950 }, { "epoch": 0.19179338959843, "grad_norm": 0.003152204444631934, "learning_rate": 0.001, "loss": 0.351, "step": 6951 }, { "epoch": 0.19182098179949436, "grad_norm": 0.003795337863266468, "learning_rate": 0.001, "loss": 0.3941, "step": 6952 }, { "epoch": 0.19184857400055874, "grad_norm": 0.002820979803800583, "learning_rate": 0.001, "loss": 0.3949, "step": 6953 }, { "epoch": 0.19187616620162312, "grad_norm": 0.003103316528722644, "learning_rate": 0.001, "loss": 0.4087, "step": 6954 }, { "epoch": 0.19190375840268747, "grad_norm": 0.002751655410975218, "learning_rate": 0.001, "loss": 0.4173, "step": 6955 }, { "epoch": 0.19193135060375185, "grad_norm": 0.002747628604993224, "learning_rate": 0.001, "loss": 0.3952, "step": 6956 }, { "epoch": 0.1919589428048162, "grad_norm": 0.0033904961310327053, "learning_rate": 0.001, "loss": 0.382, "step": 6957 }, { "epoch": 0.1919865350058806, "grad_norm": 0.002641551662236452, "learning_rate": 0.001, "loss": 0.3573, "step": 6958 }, { "epoch": 0.19201412720694497, "grad_norm": 0.0025419299490749836, "learning_rate": 0.001, "loss": 0.3792, "step": 6959 }, { "epoch": 0.19204171940800932, "grad_norm": 0.0028294960502535105, "learning_rate": 0.001, "loss": 0.3585, "step": 6960 }, { "epoch": 0.1920693116090737, "grad_norm": 0.0031680443789809942, "learning_rate": 0.001, "loss": 0.4097, "step": 6961 }, { "epoch": 0.19209690381013805, "grad_norm": 0.0024578890297561884, "learning_rate": 0.001, "loss": 0.4042, "step": 6962 }, { "epoch": 0.19212449601120243, "grad_norm": 0.002299124840646982, "learning_rate": 0.001, "loss": 0.4144, "step": 6963 }, { "epoch": 0.19215208821226681, "grad_norm": 0.0031422816682606936, "learning_rate": 0.001, "loss": 0.4339, "step": 6964 }, { "epoch": 0.19217968041333117, "grad_norm": 0.004103971645236015, "learning_rate": 0.001, "loss": 0.4197, "step": 6965 }, { "epoch": 0.19220727261439555, "grad_norm": 0.002797805005684495, "learning_rate": 0.001, "loss": 0.415, "step": 6966 }, { "epoch": 0.1922348648154599, "grad_norm": 0.006955363787710667, "learning_rate": 0.001, "loss": 0.384, "step": 6967 }, { "epoch": 0.19226245701652428, "grad_norm": 0.0027699500788003206, "learning_rate": 0.001, "loss": 0.4067, "step": 6968 }, { "epoch": 0.19229004921758866, "grad_norm": 0.006645936518907547, "learning_rate": 0.001, "loss": 0.3798, "step": 6969 }, { "epoch": 0.192317641418653, "grad_norm": 0.003216095734387636, "learning_rate": 0.001, "loss": 0.4473, "step": 6970 }, { "epoch": 0.1923452336197174, "grad_norm": 0.002484220312908292, "learning_rate": 0.001, "loss": 0.4134, "step": 6971 }, { "epoch": 0.19237282582078175, "grad_norm": 0.0034046086948364973, "learning_rate": 0.001, "loss": 0.4042, "step": 6972 }, { "epoch": 0.19240041802184613, "grad_norm": 0.0025209994055330753, "learning_rate": 0.001, "loss": 0.4586, "step": 6973 }, { "epoch": 0.1924280102229105, "grad_norm": 0.004154059570282698, "learning_rate": 0.001, "loss": 0.3724, "step": 6974 }, { "epoch": 0.19245560242397486, "grad_norm": 0.003831770271062851, "learning_rate": 0.001, "loss": 0.4142, "step": 6975 }, { "epoch": 0.19248319462503924, "grad_norm": 0.005726317409425974, "learning_rate": 0.001, "loss": 0.4296, "step": 6976 }, { "epoch": 0.1925107868261036, "grad_norm": 0.0032162668649107218, "learning_rate": 0.001, "loss": 0.3735, "step": 6977 }, { "epoch": 0.19253837902716797, "grad_norm": 0.0027887041214853525, "learning_rate": 0.001, "loss": 0.4167, "step": 6978 }, { "epoch": 0.19256597122823235, "grad_norm": 0.0031014943961054087, "learning_rate": 0.001, "loss": 0.3785, "step": 6979 }, { "epoch": 0.1925935634292967, "grad_norm": 0.0026730517856776714, "learning_rate": 0.001, "loss": 0.4275, "step": 6980 }, { "epoch": 0.19262115563036109, "grad_norm": 0.002030472969636321, "learning_rate": 0.001, "loss": 0.4078, "step": 6981 }, { "epoch": 0.19264874783142544, "grad_norm": 0.0030617276206612587, "learning_rate": 0.001, "loss": 0.4235, "step": 6982 }, { "epoch": 0.19267634003248982, "grad_norm": 0.0025087720714509487, "learning_rate": 0.001, "loss": 0.4084, "step": 6983 }, { "epoch": 0.1927039322335542, "grad_norm": 0.0020648448262363672, "learning_rate": 0.001, "loss": 0.3989, "step": 6984 }, { "epoch": 0.19273152443461855, "grad_norm": 0.002378196455538273, "learning_rate": 0.001, "loss": 0.4016, "step": 6985 }, { "epoch": 0.19275911663568293, "grad_norm": 0.0027894238010048866, "learning_rate": 0.001, "loss": 0.4082, "step": 6986 }, { "epoch": 0.19278670883674728, "grad_norm": 0.002425672486424446, "learning_rate": 0.001, "loss": 0.3512, "step": 6987 }, { "epoch": 0.19281430103781166, "grad_norm": 0.0021580031607300043, "learning_rate": 0.001, "loss": 0.4396, "step": 6988 }, { "epoch": 0.19284189323887604, "grad_norm": 0.0041187843307852745, "learning_rate": 0.001, "loss": 0.3748, "step": 6989 }, { "epoch": 0.1928694854399404, "grad_norm": 0.0023956988006830215, "learning_rate": 0.001, "loss": 0.4488, "step": 6990 }, { "epoch": 0.19289707764100478, "grad_norm": 0.0022000286262482405, "learning_rate": 0.001, "loss": 0.3773, "step": 6991 }, { "epoch": 0.19292466984206913, "grad_norm": 0.0037109351251274347, "learning_rate": 0.001, "loss": 0.3982, "step": 6992 }, { "epoch": 0.1929522620431335, "grad_norm": 0.0028746852185577154, "learning_rate": 0.001, "loss": 0.3912, "step": 6993 }, { "epoch": 0.1929798542441979, "grad_norm": 0.0026120489928871393, "learning_rate": 0.001, "loss": 0.3846, "step": 6994 }, { "epoch": 0.19300744644526224, "grad_norm": 0.0025121879298239946, "learning_rate": 0.001, "loss": 0.3957, "step": 6995 }, { "epoch": 0.19303503864632662, "grad_norm": 0.0026690547820180655, "learning_rate": 0.001, "loss": 0.4165, "step": 6996 }, { "epoch": 0.19306263084739098, "grad_norm": 0.0021126086357980967, "learning_rate": 0.001, "loss": 0.3714, "step": 6997 }, { "epoch": 0.19309022304845536, "grad_norm": 0.007225458975881338, "learning_rate": 0.001, "loss": 0.4177, "step": 6998 }, { "epoch": 0.19311781524951974, "grad_norm": 0.002150959335267544, "learning_rate": 0.001, "loss": 0.4287, "step": 6999 }, { "epoch": 0.1931454074505841, "grad_norm": 0.002550938166677952, "learning_rate": 0.001, "loss": 0.4182, "step": 7000 }, { "epoch": 0.1931454074505841, "eval_runtime": 24.8881, "eval_samples_per_second": 1.286, "eval_steps_per_second": 0.161, "step": 7000 }, { "epoch": 0.19317299965164847, "grad_norm": 0.00321765523403883, "learning_rate": 0.001, "loss": 0.4241, "step": 7001 }, { "epoch": 0.19320059185271282, "grad_norm": 0.0028067470993846655, "learning_rate": 0.001, "loss": 0.4288, "step": 7002 }, { "epoch": 0.1932281840537772, "grad_norm": 0.002428458072245121, "learning_rate": 0.001, "loss": 0.4036, "step": 7003 }, { "epoch": 0.19325577625484158, "grad_norm": 0.003469350514933467, "learning_rate": 0.001, "loss": 0.3974, "step": 7004 }, { "epoch": 0.19328336845590594, "grad_norm": 0.0026082920376211405, "learning_rate": 0.001, "loss": 0.4113, "step": 7005 }, { "epoch": 0.19331096065697032, "grad_norm": 0.002355298027396202, "learning_rate": 0.001, "loss": 0.4367, "step": 7006 }, { "epoch": 0.19333855285803467, "grad_norm": 0.002677205018699169, "learning_rate": 0.001, "loss": 0.4009, "step": 7007 }, { "epoch": 0.19336614505909905, "grad_norm": 0.002955394797027111, "learning_rate": 0.001, "loss": 0.4261, "step": 7008 }, { "epoch": 0.19339373726016343, "grad_norm": 0.0033979052677750587, "learning_rate": 0.001, "loss": 0.3961, "step": 7009 }, { "epoch": 0.19342132946122778, "grad_norm": 0.004769494291394949, "learning_rate": 0.001, "loss": 0.4022, "step": 7010 }, { "epoch": 0.19344892166229216, "grad_norm": 0.003045122604817152, "learning_rate": 0.001, "loss": 0.4005, "step": 7011 }, { "epoch": 0.19347651386335651, "grad_norm": 0.00237339548766613, "learning_rate": 0.001, "loss": 0.3947, "step": 7012 }, { "epoch": 0.1935041060644209, "grad_norm": 0.002918932121247053, "learning_rate": 0.001, "loss": 0.4166, "step": 7013 }, { "epoch": 0.19353169826548525, "grad_norm": 0.003531453665345907, "learning_rate": 0.001, "loss": 0.4043, "step": 7014 }, { "epoch": 0.19355929046654963, "grad_norm": 0.002957909367978573, "learning_rate": 0.001, "loss": 0.4163, "step": 7015 }, { "epoch": 0.193586882667614, "grad_norm": 0.002795727690681815, "learning_rate": 0.001, "loss": 0.423, "step": 7016 }, { "epoch": 0.19361447486867836, "grad_norm": 0.002575729275122285, "learning_rate": 0.001, "loss": 0.3677, "step": 7017 }, { "epoch": 0.19364206706974274, "grad_norm": 0.011985518038272858, "learning_rate": 0.001, "loss": 0.4104, "step": 7018 }, { "epoch": 0.1936696592708071, "grad_norm": 0.005119143985211849, "learning_rate": 0.001, "loss": 0.3975, "step": 7019 }, { "epoch": 0.19369725147187147, "grad_norm": 0.002983895130455494, "learning_rate": 0.001, "loss": 0.382, "step": 7020 }, { "epoch": 0.19372484367293585, "grad_norm": 0.0021343736443668604, "learning_rate": 0.001, "loss": 0.4559, "step": 7021 }, { "epoch": 0.1937524358740002, "grad_norm": 0.0032467253040522337, "learning_rate": 0.001, "loss": 0.3994, "step": 7022 }, { "epoch": 0.1937800280750646, "grad_norm": 0.0023335155565291643, "learning_rate": 0.001, "loss": 0.4108, "step": 7023 }, { "epoch": 0.19380762027612894, "grad_norm": 0.002503189956769347, "learning_rate": 0.001, "loss": 0.4152, "step": 7024 }, { "epoch": 0.19383521247719332, "grad_norm": 0.002620559884235263, "learning_rate": 0.001, "loss": 0.3917, "step": 7025 }, { "epoch": 0.1938628046782577, "grad_norm": 0.002399984747171402, "learning_rate": 0.001, "loss": 0.3945, "step": 7026 }, { "epoch": 0.19389039687932205, "grad_norm": 0.0024000145494937897, "learning_rate": 0.001, "loss": 0.4327, "step": 7027 }, { "epoch": 0.19391798908038643, "grad_norm": 0.0024858317337930202, "learning_rate": 0.001, "loss": 0.3851, "step": 7028 }, { "epoch": 0.19394558128145079, "grad_norm": 0.0028986751567572355, "learning_rate": 0.001, "loss": 0.3865, "step": 7029 }, { "epoch": 0.19397317348251517, "grad_norm": 0.0020534794311970472, "learning_rate": 0.001, "loss": 0.4198, "step": 7030 }, { "epoch": 0.19400076568357955, "grad_norm": 0.0021818815730512142, "learning_rate": 0.001, "loss": 0.4122, "step": 7031 }, { "epoch": 0.1940283578846439, "grad_norm": 0.003896314650774002, "learning_rate": 0.001, "loss": 0.3739, "step": 7032 }, { "epoch": 0.19405595008570828, "grad_norm": 0.003106046002358198, "learning_rate": 0.001, "loss": 0.4117, "step": 7033 }, { "epoch": 0.19408354228677263, "grad_norm": 0.003942525014281273, "learning_rate": 0.001, "loss": 0.3911, "step": 7034 }, { "epoch": 0.194111134487837, "grad_norm": 0.006781655363738537, "learning_rate": 0.001, "loss": 0.3489, "step": 7035 }, { "epoch": 0.1941387266889014, "grad_norm": 0.0025264392606914043, "learning_rate": 0.001, "loss": 0.403, "step": 7036 }, { "epoch": 0.19416631888996574, "grad_norm": 0.0023680292069911957, "learning_rate": 0.001, "loss": 0.3891, "step": 7037 }, { "epoch": 0.19419391109103012, "grad_norm": 0.0024297498166561127, "learning_rate": 0.001, "loss": 0.3862, "step": 7038 }, { "epoch": 0.19422150329209448, "grad_norm": 0.0028899710159748793, "learning_rate": 0.001, "loss": 0.38, "step": 7039 }, { "epoch": 0.19424909549315886, "grad_norm": 0.00262429122813046, "learning_rate": 0.001, "loss": 0.4411, "step": 7040 }, { "epoch": 0.19427668769422324, "grad_norm": 0.0034140751231461763, "learning_rate": 0.001, "loss": 0.4207, "step": 7041 }, { "epoch": 0.1943042798952876, "grad_norm": 0.003651238512247801, "learning_rate": 0.001, "loss": 0.3855, "step": 7042 }, { "epoch": 0.19433187209635197, "grad_norm": 0.0027614810969680548, "learning_rate": 0.001, "loss": 0.3686, "step": 7043 }, { "epoch": 0.19435946429741632, "grad_norm": 0.0027164684142917395, "learning_rate": 0.001, "loss": 0.4135, "step": 7044 }, { "epoch": 0.1943870564984807, "grad_norm": 0.0031832915265113115, "learning_rate": 0.001, "loss": 0.402, "step": 7045 }, { "epoch": 0.19441464869954508, "grad_norm": 0.0033719497732818127, "learning_rate": 0.001, "loss": 0.4137, "step": 7046 }, { "epoch": 0.19444224090060944, "grad_norm": 0.002569133648648858, "learning_rate": 0.001, "loss": 0.399, "step": 7047 }, { "epoch": 0.19446983310167382, "grad_norm": 0.0024267893750220537, "learning_rate": 0.001, "loss": 0.3761, "step": 7048 }, { "epoch": 0.19449742530273817, "grad_norm": 0.002893506083637476, "learning_rate": 0.001, "loss": 0.4003, "step": 7049 }, { "epoch": 0.19452501750380255, "grad_norm": 0.002697143005207181, "learning_rate": 0.001, "loss": 0.3907, "step": 7050 }, { "epoch": 0.19455260970486693, "grad_norm": 0.0032814224250614643, "learning_rate": 0.001, "loss": 0.4078, "step": 7051 }, { "epoch": 0.19458020190593128, "grad_norm": 0.002781797433272004, "learning_rate": 0.001, "loss": 0.3976, "step": 7052 }, { "epoch": 0.19460779410699566, "grad_norm": 0.003478818805888295, "learning_rate": 0.001, "loss": 0.3787, "step": 7053 }, { "epoch": 0.19463538630806002, "grad_norm": 0.0027507366612553596, "learning_rate": 0.001, "loss": 0.3745, "step": 7054 }, { "epoch": 0.1946629785091244, "grad_norm": 0.0034008813090622425, "learning_rate": 0.001, "loss": 0.3999, "step": 7055 }, { "epoch": 0.19469057071018878, "grad_norm": 0.0030989109072834253, "learning_rate": 0.001, "loss": 0.393, "step": 7056 }, { "epoch": 0.19471816291125313, "grad_norm": 0.003060347633436322, "learning_rate": 0.001, "loss": 0.3682, "step": 7057 }, { "epoch": 0.1947457551123175, "grad_norm": 0.0040310220792889595, "learning_rate": 0.001, "loss": 0.3729, "step": 7058 }, { "epoch": 0.19477334731338186, "grad_norm": 0.0034857376012951136, "learning_rate": 0.001, "loss": 0.3781, "step": 7059 }, { "epoch": 0.19480093951444624, "grad_norm": 0.0030391844920814037, "learning_rate": 0.001, "loss": 0.4159, "step": 7060 }, { "epoch": 0.19482853171551062, "grad_norm": 0.0030424713622778654, "learning_rate": 0.001, "loss": 0.3882, "step": 7061 }, { "epoch": 0.19485612391657497, "grad_norm": 0.0027415102813392878, "learning_rate": 0.001, "loss": 0.394, "step": 7062 }, { "epoch": 0.19488371611763936, "grad_norm": 0.002916666679084301, "learning_rate": 0.001, "loss": 0.4095, "step": 7063 }, { "epoch": 0.1949113083187037, "grad_norm": 0.0023576219100505114, "learning_rate": 0.001, "loss": 0.4512, "step": 7064 }, { "epoch": 0.1949389005197681, "grad_norm": 0.002573948120698333, "learning_rate": 0.001, "loss": 0.3731, "step": 7065 }, { "epoch": 0.19496649272083247, "grad_norm": 0.004156920593231916, "learning_rate": 0.001, "loss": 0.4234, "step": 7066 }, { "epoch": 0.19499408492189682, "grad_norm": 0.0037690969184041023, "learning_rate": 0.001, "loss": 0.3681, "step": 7067 }, { "epoch": 0.1950216771229612, "grad_norm": 0.0031203448306769133, "learning_rate": 0.001, "loss": 0.3632, "step": 7068 }, { "epoch": 0.19504926932402555, "grad_norm": 0.002718303119763732, "learning_rate": 0.001, "loss": 0.4197, "step": 7069 }, { "epoch": 0.19507686152508993, "grad_norm": 0.002880721352994442, "learning_rate": 0.001, "loss": 0.3749, "step": 7070 }, { "epoch": 0.19510445372615431, "grad_norm": 0.0021646865643560886, "learning_rate": 0.001, "loss": 0.3722, "step": 7071 }, { "epoch": 0.19513204592721867, "grad_norm": 0.004858906380832195, "learning_rate": 0.001, "loss": 0.3744, "step": 7072 }, { "epoch": 0.19515963812828305, "grad_norm": 0.006143823266029358, "learning_rate": 0.001, "loss": 0.4083, "step": 7073 }, { "epoch": 0.1951872303293474, "grad_norm": 0.002867503557354212, "learning_rate": 0.001, "loss": 0.3752, "step": 7074 }, { "epoch": 0.19521482253041178, "grad_norm": 0.0057819196954369545, "learning_rate": 0.001, "loss": 0.4177, "step": 7075 }, { "epoch": 0.19524241473147616, "grad_norm": 0.003106197575107217, "learning_rate": 0.001, "loss": 0.4094, "step": 7076 }, { "epoch": 0.1952700069325405, "grad_norm": 0.0021443332079797983, "learning_rate": 0.001, "loss": 0.3717, "step": 7077 }, { "epoch": 0.1952975991336049, "grad_norm": 0.003420140827074647, "learning_rate": 0.001, "loss": 0.3775, "step": 7078 }, { "epoch": 0.19532519133466925, "grad_norm": 0.0017785170348361135, "learning_rate": 0.001, "loss": 0.4202, "step": 7079 }, { "epoch": 0.19535278353573363, "grad_norm": 0.003068318823352456, "learning_rate": 0.001, "loss": 0.3768, "step": 7080 }, { "epoch": 0.195380375736798, "grad_norm": 0.0028662248514592648, "learning_rate": 0.001, "loss": 0.4064, "step": 7081 }, { "epoch": 0.19540796793786236, "grad_norm": 0.002816071966663003, "learning_rate": 0.001, "loss": 0.3827, "step": 7082 }, { "epoch": 0.19543556013892674, "grad_norm": 0.003606053302064538, "learning_rate": 0.001, "loss": 0.3817, "step": 7083 }, { "epoch": 0.1954631523399911, "grad_norm": 0.002966247033327818, "learning_rate": 0.001, "loss": 0.352, "step": 7084 }, { "epoch": 0.19549074454105547, "grad_norm": 0.0033741393126547337, "learning_rate": 0.001, "loss": 0.4045, "step": 7085 }, { "epoch": 0.19551833674211985, "grad_norm": 0.0050892336294054985, "learning_rate": 0.001, "loss": 0.4047, "step": 7086 }, { "epoch": 0.1955459289431842, "grad_norm": 0.0030292568262666464, "learning_rate": 0.001, "loss": 0.4108, "step": 7087 }, { "epoch": 0.19557352114424859, "grad_norm": 0.0051439255475997925, "learning_rate": 0.001, "loss": 0.4007, "step": 7088 }, { "epoch": 0.19560111334531294, "grad_norm": 0.0027856396045535803, "learning_rate": 0.001, "loss": 0.45, "step": 7089 }, { "epoch": 0.19562870554637732, "grad_norm": 0.0030972841195762157, "learning_rate": 0.001, "loss": 0.4173, "step": 7090 }, { "epoch": 0.1956562977474417, "grad_norm": 0.003249438712373376, "learning_rate": 0.001, "loss": 0.3903, "step": 7091 }, { "epoch": 0.19568388994850605, "grad_norm": 0.0027454618830233812, "learning_rate": 0.001, "loss": 0.4095, "step": 7092 }, { "epoch": 0.19571148214957043, "grad_norm": 0.0027419314719736576, "learning_rate": 0.001, "loss": 0.3803, "step": 7093 }, { "epoch": 0.19573907435063478, "grad_norm": 0.0027157592121511698, "learning_rate": 0.001, "loss": 0.3786, "step": 7094 }, { "epoch": 0.19576666655169916, "grad_norm": 0.006076755467802286, "learning_rate": 0.001, "loss": 0.3998, "step": 7095 }, { "epoch": 0.19579425875276354, "grad_norm": 0.0034049993846565485, "learning_rate": 0.001, "loss": 0.3862, "step": 7096 }, { "epoch": 0.1958218509538279, "grad_norm": 0.004467803984880447, "learning_rate": 0.001, "loss": 0.3943, "step": 7097 }, { "epoch": 0.19584944315489228, "grad_norm": 0.0059601617977023125, "learning_rate": 0.001, "loss": 0.3972, "step": 7098 }, { "epoch": 0.19587703535595663, "grad_norm": 0.0025327398907393217, "learning_rate": 0.001, "loss": 0.4213, "step": 7099 }, { "epoch": 0.195904627557021, "grad_norm": 0.002435739152133465, "learning_rate": 0.001, "loss": 0.3729, "step": 7100 }, { "epoch": 0.1959322197580854, "grad_norm": 0.002827326999977231, "learning_rate": 0.001, "loss": 0.4195, "step": 7101 }, { "epoch": 0.19595981195914974, "grad_norm": 0.003259490942582488, "learning_rate": 0.001, "loss": 0.4202, "step": 7102 }, { "epoch": 0.19598740416021412, "grad_norm": 0.0026045122649520636, "learning_rate": 0.001, "loss": 0.3684, "step": 7103 }, { "epoch": 0.19601499636127848, "grad_norm": 0.0032783085480332375, "learning_rate": 0.001, "loss": 0.382, "step": 7104 }, { "epoch": 0.19604258856234286, "grad_norm": 0.004205625504255295, "learning_rate": 0.001, "loss": 0.3918, "step": 7105 }, { "epoch": 0.1960701807634072, "grad_norm": 0.004856889136135578, "learning_rate": 0.001, "loss": 0.4006, "step": 7106 }, { "epoch": 0.1960977729644716, "grad_norm": 0.005419073160737753, "learning_rate": 0.001, "loss": 0.4515, "step": 7107 }, { "epoch": 0.19612536516553597, "grad_norm": 0.002779455156996846, "learning_rate": 0.001, "loss": 0.3753, "step": 7108 }, { "epoch": 0.19615295736660032, "grad_norm": 0.005004864651709795, "learning_rate": 0.001, "loss": 0.3849, "step": 7109 }, { "epoch": 0.1961805495676647, "grad_norm": 0.0031100206542760134, "learning_rate": 0.001, "loss": 0.4016, "step": 7110 }, { "epoch": 0.19620814176872906, "grad_norm": 0.002827596152201295, "learning_rate": 0.001, "loss": 0.3784, "step": 7111 }, { "epoch": 0.19623573396979344, "grad_norm": 0.003027317812666297, "learning_rate": 0.001, "loss": 0.3997, "step": 7112 }, { "epoch": 0.19626332617085782, "grad_norm": 0.004092982038855553, "learning_rate": 0.001, "loss": 0.3499, "step": 7113 }, { "epoch": 0.19629091837192217, "grad_norm": 0.0034322801511734724, "learning_rate": 0.001, "loss": 0.39, "step": 7114 }, { "epoch": 0.19631851057298655, "grad_norm": 0.0031399535946547985, "learning_rate": 0.001, "loss": 0.3798, "step": 7115 }, { "epoch": 0.1963461027740509, "grad_norm": 0.004002594854682684, "learning_rate": 0.001, "loss": 0.4014, "step": 7116 }, { "epoch": 0.19637369497511528, "grad_norm": 0.0028446640353649855, "learning_rate": 0.001, "loss": 0.4235, "step": 7117 }, { "epoch": 0.19640128717617966, "grad_norm": 0.0026851436123251915, "learning_rate": 0.001, "loss": 0.3893, "step": 7118 }, { "epoch": 0.19642887937724401, "grad_norm": 0.004198794718831778, "learning_rate": 0.001, "loss": 0.3929, "step": 7119 }, { "epoch": 0.1964564715783084, "grad_norm": 0.0026877266354858875, "learning_rate": 0.001, "loss": 0.4258, "step": 7120 }, { "epoch": 0.19648406377937275, "grad_norm": 0.0036120673175901175, "learning_rate": 0.001, "loss": 0.4328, "step": 7121 }, { "epoch": 0.19651165598043713, "grad_norm": 0.004937993362545967, "learning_rate": 0.001, "loss": 0.4064, "step": 7122 }, { "epoch": 0.1965392481815015, "grad_norm": 0.0036189809907227755, "learning_rate": 0.001, "loss": 0.3815, "step": 7123 }, { "epoch": 0.19656684038256586, "grad_norm": 0.002798637840896845, "learning_rate": 0.001, "loss": 0.3852, "step": 7124 }, { "epoch": 0.19659443258363024, "grad_norm": 0.005139973945915699, "learning_rate": 0.001, "loss": 0.402, "step": 7125 }, { "epoch": 0.1966220247846946, "grad_norm": 0.006130583584308624, "learning_rate": 0.001, "loss": 0.401, "step": 7126 }, { "epoch": 0.19664961698575897, "grad_norm": 0.005488388240337372, "learning_rate": 0.001, "loss": 0.3898, "step": 7127 }, { "epoch": 0.19667720918682335, "grad_norm": 0.004060831852257252, "learning_rate": 0.001, "loss": 0.4456, "step": 7128 }, { "epoch": 0.1967048013878877, "grad_norm": 0.0029782892670482397, "learning_rate": 0.001, "loss": 0.394, "step": 7129 }, { "epoch": 0.1967323935889521, "grad_norm": 0.0036948262713849545, "learning_rate": 0.001, "loss": 0.4239, "step": 7130 }, { "epoch": 0.19675998579001644, "grad_norm": 0.004071654751896858, "learning_rate": 0.001, "loss": 0.4095, "step": 7131 }, { "epoch": 0.19678757799108082, "grad_norm": 0.0027887504547834396, "learning_rate": 0.001, "loss": 0.404, "step": 7132 }, { "epoch": 0.1968151701921452, "grad_norm": 0.003311131615191698, "learning_rate": 0.001, "loss": 0.3998, "step": 7133 }, { "epoch": 0.19684276239320955, "grad_norm": 0.002101517515257001, "learning_rate": 0.001, "loss": 0.4199, "step": 7134 }, { "epoch": 0.19687035459427393, "grad_norm": 0.002868801588192582, "learning_rate": 0.001, "loss": 0.4191, "step": 7135 }, { "epoch": 0.19689794679533829, "grad_norm": 0.003476998768746853, "learning_rate": 0.001, "loss": 0.4003, "step": 7136 }, { "epoch": 0.19692553899640267, "grad_norm": 0.0028729683253914118, "learning_rate": 0.001, "loss": 0.3974, "step": 7137 }, { "epoch": 0.19695313119746705, "grad_norm": 0.0023318929597735405, "learning_rate": 0.001, "loss": 0.3955, "step": 7138 }, { "epoch": 0.1969807233985314, "grad_norm": 0.0027352285105735064, "learning_rate": 0.001, "loss": 0.3787, "step": 7139 }, { "epoch": 0.19700831559959578, "grad_norm": 0.006381817627698183, "learning_rate": 0.001, "loss": 0.3775, "step": 7140 }, { "epoch": 0.19703590780066013, "grad_norm": 0.0036294516175985336, "learning_rate": 0.001, "loss": 0.3923, "step": 7141 }, { "epoch": 0.1970635000017245, "grad_norm": 0.0023575150407850742, "learning_rate": 0.001, "loss": 0.4076, "step": 7142 }, { "epoch": 0.1970910922027889, "grad_norm": 0.0027103947941213846, "learning_rate": 0.001, "loss": 0.3956, "step": 7143 }, { "epoch": 0.19711868440385324, "grad_norm": 0.003010998945683241, "learning_rate": 0.001, "loss": 0.4132, "step": 7144 }, { "epoch": 0.19714627660491763, "grad_norm": 0.0021617074962705374, "learning_rate": 0.001, "loss": 0.4136, "step": 7145 }, { "epoch": 0.19717386880598198, "grad_norm": 0.008347420953214169, "learning_rate": 0.001, "loss": 0.4099, "step": 7146 }, { "epoch": 0.19720146100704636, "grad_norm": 0.002271142089739442, "learning_rate": 0.001, "loss": 0.3984, "step": 7147 }, { "epoch": 0.19722905320811074, "grad_norm": 0.005453981924802065, "learning_rate": 0.001, "loss": 0.3863, "step": 7148 }, { "epoch": 0.1972566454091751, "grad_norm": 0.003245376283302903, "learning_rate": 0.001, "loss": 0.3939, "step": 7149 }, { "epoch": 0.19728423761023947, "grad_norm": 0.0026226479094475508, "learning_rate": 0.001, "loss": 0.3647, "step": 7150 }, { "epoch": 0.19731182981130382, "grad_norm": 0.003077802946791053, "learning_rate": 0.001, "loss": 0.4184, "step": 7151 }, { "epoch": 0.1973394220123682, "grad_norm": 0.0025755036622285843, "learning_rate": 0.001, "loss": 0.3937, "step": 7152 }, { "epoch": 0.19736701421343258, "grad_norm": 0.0020703410264104605, "learning_rate": 0.001, "loss": 0.4281, "step": 7153 }, { "epoch": 0.19739460641449694, "grad_norm": 0.0022496734745800495, "learning_rate": 0.001, "loss": 0.3912, "step": 7154 }, { "epoch": 0.19742219861556132, "grad_norm": 0.004225686192512512, "learning_rate": 0.001, "loss": 0.3843, "step": 7155 }, { "epoch": 0.19744979081662567, "grad_norm": 0.0025360104627907276, "learning_rate": 0.001, "loss": 0.4148, "step": 7156 }, { "epoch": 0.19747738301769005, "grad_norm": 0.00297233066521585, "learning_rate": 0.001, "loss": 0.4025, "step": 7157 }, { "epoch": 0.19750497521875443, "grad_norm": 0.0028523015789687634, "learning_rate": 0.001, "loss": 0.4131, "step": 7158 }, { "epoch": 0.19753256741981878, "grad_norm": 0.002735694171860814, "learning_rate": 0.001, "loss": 0.4205, "step": 7159 }, { "epoch": 0.19756015962088316, "grad_norm": 0.005362628027796745, "learning_rate": 0.001, "loss": 0.4041, "step": 7160 }, { "epoch": 0.19758775182194752, "grad_norm": 0.0036659168545156717, "learning_rate": 0.001, "loss": 0.4107, "step": 7161 }, { "epoch": 0.1976153440230119, "grad_norm": 0.0036525116302073, "learning_rate": 0.001, "loss": 0.4265, "step": 7162 }, { "epoch": 0.19764293622407628, "grad_norm": 0.002731502987444401, "learning_rate": 0.001, "loss": 0.4075, "step": 7163 }, { "epoch": 0.19767052842514063, "grad_norm": 0.006773337721824646, "learning_rate": 0.001, "loss": 0.4441, "step": 7164 }, { "epoch": 0.197698120626205, "grad_norm": 0.0025534844025969505, "learning_rate": 0.001, "loss": 0.4235, "step": 7165 }, { "epoch": 0.19772571282726936, "grad_norm": 0.003402253147214651, "learning_rate": 0.001, "loss": 0.3757, "step": 7166 }, { "epoch": 0.19775330502833374, "grad_norm": 0.003048243233934045, "learning_rate": 0.001, "loss": 0.3988, "step": 7167 }, { "epoch": 0.19778089722939812, "grad_norm": 0.004743688274174929, "learning_rate": 0.001, "loss": 0.3636, "step": 7168 }, { "epoch": 0.19780848943046248, "grad_norm": 0.0041982936672866344, "learning_rate": 0.001, "loss": 0.3869, "step": 7169 }, { "epoch": 0.19783608163152686, "grad_norm": 0.0023896710481494665, "learning_rate": 0.001, "loss": 0.4214, "step": 7170 }, { "epoch": 0.1978636738325912, "grad_norm": 0.0028549651615321636, "learning_rate": 0.001, "loss": 0.3938, "step": 7171 }, { "epoch": 0.1978912660336556, "grad_norm": 0.002721426310017705, "learning_rate": 0.001, "loss": 0.4072, "step": 7172 }, { "epoch": 0.19791885823471997, "grad_norm": 0.0029002949595451355, "learning_rate": 0.001, "loss": 0.3972, "step": 7173 }, { "epoch": 0.19794645043578432, "grad_norm": 0.00278780166991055, "learning_rate": 0.001, "loss": 0.3967, "step": 7174 }, { "epoch": 0.1979740426368487, "grad_norm": 0.0019508522236719728, "learning_rate": 0.001, "loss": 0.3739, "step": 7175 }, { "epoch": 0.19800163483791305, "grad_norm": 0.002591001568362117, "learning_rate": 0.001, "loss": 0.3928, "step": 7176 }, { "epoch": 0.19802922703897743, "grad_norm": 0.00309981987811625, "learning_rate": 0.001, "loss": 0.3692, "step": 7177 }, { "epoch": 0.19805681924004181, "grad_norm": 0.003413395956158638, "learning_rate": 0.001, "loss": 0.3945, "step": 7178 }, { "epoch": 0.19808441144110617, "grad_norm": 0.0031371319200843573, "learning_rate": 0.001, "loss": 0.3773, "step": 7179 }, { "epoch": 0.19811200364217055, "grad_norm": 0.0040471418760716915, "learning_rate": 0.001, "loss": 0.379, "step": 7180 }, { "epoch": 0.1981395958432349, "grad_norm": 0.002860469277948141, "learning_rate": 0.001, "loss": 0.4273, "step": 7181 }, { "epoch": 0.19816718804429928, "grad_norm": 0.0035744935739785433, "learning_rate": 0.001, "loss": 0.3871, "step": 7182 }, { "epoch": 0.19819478024536366, "grad_norm": 0.0021571393590420485, "learning_rate": 0.001, "loss": 0.4033, "step": 7183 }, { "epoch": 0.198222372446428, "grad_norm": 0.0027684883680194616, "learning_rate": 0.001, "loss": 0.398, "step": 7184 }, { "epoch": 0.1982499646474924, "grad_norm": 0.004502817988395691, "learning_rate": 0.001, "loss": 0.397, "step": 7185 }, { "epoch": 0.19827755684855675, "grad_norm": 0.0026800809428095818, "learning_rate": 0.001, "loss": 0.4417, "step": 7186 }, { "epoch": 0.19830514904962113, "grad_norm": 0.004524301737546921, "learning_rate": 0.001, "loss": 0.392, "step": 7187 }, { "epoch": 0.1983327412506855, "grad_norm": 0.002851566532626748, "learning_rate": 0.001, "loss": 0.3904, "step": 7188 }, { "epoch": 0.19836033345174986, "grad_norm": 0.004874248988926411, "learning_rate": 0.001, "loss": 0.4018, "step": 7189 }, { "epoch": 0.19838792565281424, "grad_norm": 0.0025999664794653654, "learning_rate": 0.001, "loss": 0.4053, "step": 7190 }, { "epoch": 0.1984155178538786, "grad_norm": 0.0053477040491998196, "learning_rate": 0.001, "loss": 0.4298, "step": 7191 }, { "epoch": 0.19844311005494297, "grad_norm": 0.002580095548182726, "learning_rate": 0.001, "loss": 0.3889, "step": 7192 }, { "epoch": 0.19847070225600735, "grad_norm": 0.006665955297648907, "learning_rate": 0.001, "loss": 0.4013, "step": 7193 }, { "epoch": 0.1984982944570717, "grad_norm": 0.002255852334201336, "learning_rate": 0.001, "loss": 0.4159, "step": 7194 }, { "epoch": 0.19852588665813609, "grad_norm": 0.0027929407078772783, "learning_rate": 0.001, "loss": 0.3832, "step": 7195 }, { "epoch": 0.19855347885920044, "grad_norm": 0.002101206686347723, "learning_rate": 0.001, "loss": 0.4199, "step": 7196 }, { "epoch": 0.19858107106026482, "grad_norm": 0.0027232288848608732, "learning_rate": 0.001, "loss": 0.435, "step": 7197 }, { "epoch": 0.1986086632613292, "grad_norm": 0.007063394878059626, "learning_rate": 0.001, "loss": 0.385, "step": 7198 }, { "epoch": 0.19863625546239355, "grad_norm": 0.004317095037549734, "learning_rate": 0.001, "loss": 0.4079, "step": 7199 }, { "epoch": 0.19866384766345793, "grad_norm": 0.0018634272273629904, "learning_rate": 0.001, "loss": 0.3987, "step": 7200 }, { "epoch": 0.19869143986452228, "grad_norm": 0.0029656426049768925, "learning_rate": 0.001, "loss": 0.3785, "step": 7201 }, { "epoch": 0.19871903206558666, "grad_norm": 0.0018411249620839953, "learning_rate": 0.001, "loss": 0.44, "step": 7202 }, { "epoch": 0.19874662426665102, "grad_norm": 0.0027455666568130255, "learning_rate": 0.001, "loss": 0.4132, "step": 7203 }, { "epoch": 0.1987742164677154, "grad_norm": 0.003438533516600728, "learning_rate": 0.001, "loss": 0.423, "step": 7204 }, { "epoch": 0.19880180866877978, "grad_norm": 0.0023013537283986807, "learning_rate": 0.001, "loss": 0.4364, "step": 7205 }, { "epoch": 0.19882940086984413, "grad_norm": 0.003259762190282345, "learning_rate": 0.001, "loss": 0.3781, "step": 7206 }, { "epoch": 0.1988569930709085, "grad_norm": 0.002609924878925085, "learning_rate": 0.001, "loss": 0.4025, "step": 7207 }, { "epoch": 0.19888458527197286, "grad_norm": 0.002380332676693797, "learning_rate": 0.001, "loss": 0.4107, "step": 7208 }, { "epoch": 0.19891217747303724, "grad_norm": 0.0030413048807531595, "learning_rate": 0.001, "loss": 0.3996, "step": 7209 }, { "epoch": 0.19893976967410162, "grad_norm": 0.0022222718689590693, "learning_rate": 0.001, "loss": 0.3882, "step": 7210 }, { "epoch": 0.19896736187516598, "grad_norm": 0.0030871161725372076, "learning_rate": 0.001, "loss": 0.4214, "step": 7211 }, { "epoch": 0.19899495407623036, "grad_norm": 0.005348892416805029, "learning_rate": 0.001, "loss": 0.4101, "step": 7212 }, { "epoch": 0.1990225462772947, "grad_norm": 0.005420180968940258, "learning_rate": 0.001, "loss": 0.4083, "step": 7213 }, { "epoch": 0.1990501384783591, "grad_norm": 0.003726621624082327, "learning_rate": 0.001, "loss": 0.3649, "step": 7214 }, { "epoch": 0.19907773067942347, "grad_norm": 0.0031097896862775087, "learning_rate": 0.001, "loss": 0.4092, "step": 7215 }, { "epoch": 0.19910532288048782, "grad_norm": 0.0029584530275315046, "learning_rate": 0.001, "loss": 0.3914, "step": 7216 }, { "epoch": 0.1991329150815522, "grad_norm": 0.0022907305974513292, "learning_rate": 0.001, "loss": 0.4146, "step": 7217 }, { "epoch": 0.19916050728261656, "grad_norm": 0.0031007654033601284, "learning_rate": 0.001, "loss": 0.3831, "step": 7218 }, { "epoch": 0.19918809948368094, "grad_norm": 0.0021812678314745426, "learning_rate": 0.001, "loss": 0.4274, "step": 7219 }, { "epoch": 0.19921569168474532, "grad_norm": 0.0023966538719832897, "learning_rate": 0.001, "loss": 0.394, "step": 7220 }, { "epoch": 0.19924328388580967, "grad_norm": 0.0031528030522167683, "learning_rate": 0.001, "loss": 0.3718, "step": 7221 }, { "epoch": 0.19927087608687405, "grad_norm": 0.0022123432718217373, "learning_rate": 0.001, "loss": 0.3737, "step": 7222 }, { "epoch": 0.1992984682879384, "grad_norm": 0.0031831758096814156, "learning_rate": 0.001, "loss": 0.3719, "step": 7223 }, { "epoch": 0.19932606048900278, "grad_norm": 0.002463659970089793, "learning_rate": 0.001, "loss": 0.406, "step": 7224 }, { "epoch": 0.19935365269006716, "grad_norm": 0.0026505514979362488, "learning_rate": 0.001, "loss": 0.3892, "step": 7225 }, { "epoch": 0.19938124489113151, "grad_norm": 0.0024356083013117313, "learning_rate": 0.001, "loss": 0.3873, "step": 7226 }, { "epoch": 0.1994088370921959, "grad_norm": 0.009376121684908867, "learning_rate": 0.001, "loss": 0.3801, "step": 7227 }, { "epoch": 0.19943642929326025, "grad_norm": 0.002203370677307248, "learning_rate": 0.001, "loss": 0.4149, "step": 7228 }, { "epoch": 0.19946402149432463, "grad_norm": 0.0029552599880844355, "learning_rate": 0.001, "loss": 0.3918, "step": 7229 }, { "epoch": 0.199491613695389, "grad_norm": 0.0028194712940603495, "learning_rate": 0.001, "loss": 0.3809, "step": 7230 }, { "epoch": 0.19951920589645336, "grad_norm": 0.0036550972145050764, "learning_rate": 0.001, "loss": 0.4238, "step": 7231 }, { "epoch": 0.19954679809751774, "grad_norm": 0.002419488737359643, "learning_rate": 0.001, "loss": 0.378, "step": 7232 }, { "epoch": 0.1995743902985821, "grad_norm": 0.002697533695027232, "learning_rate": 0.001, "loss": 0.4295, "step": 7233 }, { "epoch": 0.19960198249964647, "grad_norm": 0.0025205162819474936, "learning_rate": 0.001, "loss": 0.3649, "step": 7234 }, { "epoch": 0.19962957470071085, "grad_norm": 0.002819119254127145, "learning_rate": 0.001, "loss": 0.3823, "step": 7235 }, { "epoch": 0.1996571669017752, "grad_norm": 0.002348159672692418, "learning_rate": 0.001, "loss": 0.4296, "step": 7236 }, { "epoch": 0.1996847591028396, "grad_norm": 0.002327647991478443, "learning_rate": 0.001, "loss": 0.4258, "step": 7237 }, { "epoch": 0.19971235130390394, "grad_norm": 0.003467888105660677, "learning_rate": 0.001, "loss": 0.375, "step": 7238 }, { "epoch": 0.19973994350496832, "grad_norm": 0.002749866805970669, "learning_rate": 0.001, "loss": 0.3894, "step": 7239 }, { "epoch": 0.1997675357060327, "grad_norm": 0.002804868621751666, "learning_rate": 0.001, "loss": 0.4616, "step": 7240 }, { "epoch": 0.19979512790709705, "grad_norm": 0.0025544483214616776, "learning_rate": 0.001, "loss": 0.4019, "step": 7241 }, { "epoch": 0.19982272010816143, "grad_norm": 0.004032348282635212, "learning_rate": 0.001, "loss": 0.3877, "step": 7242 }, { "epoch": 0.19985031230922579, "grad_norm": 0.0025702861603349447, "learning_rate": 0.001, "loss": 0.4052, "step": 7243 }, { "epoch": 0.19987790451029017, "grad_norm": 0.0029868450947105885, "learning_rate": 0.001, "loss": 0.3724, "step": 7244 }, { "epoch": 0.19990549671135455, "grad_norm": 0.0034900156315416098, "learning_rate": 0.001, "loss": 0.3749, "step": 7245 }, { "epoch": 0.1999330889124189, "grad_norm": 0.004184171557426453, "learning_rate": 0.001, "loss": 0.4015, "step": 7246 }, { "epoch": 0.19996068111348328, "grad_norm": 0.003094634972512722, "learning_rate": 0.001, "loss": 0.3703, "step": 7247 }, { "epoch": 0.19998827331454763, "grad_norm": 0.004218010231852531, "learning_rate": 0.001, "loss": 0.4223, "step": 7248 }, { "epoch": 0.200015865515612, "grad_norm": 0.002849703887477517, "learning_rate": 0.001, "loss": 0.4061, "step": 7249 }, { "epoch": 0.2000434577166764, "grad_norm": 0.002104248385876417, "learning_rate": 0.001, "loss": 0.4376, "step": 7250 }, { "epoch": 0.20007104991774075, "grad_norm": 0.0028338278643786907, "learning_rate": 0.001, "loss": 0.4377, "step": 7251 }, { "epoch": 0.20009864211880513, "grad_norm": 0.0070443847216665745, "learning_rate": 0.001, "loss": 0.3685, "step": 7252 }, { "epoch": 0.20012623431986948, "grad_norm": 0.002888858551159501, "learning_rate": 0.001, "loss": 0.4073, "step": 7253 }, { "epoch": 0.20015382652093386, "grad_norm": 0.0023948305752128363, "learning_rate": 0.001, "loss": 0.4076, "step": 7254 }, { "epoch": 0.20018141872199824, "grad_norm": 0.0023272959515452385, "learning_rate": 0.001, "loss": 0.4205, "step": 7255 }, { "epoch": 0.2002090109230626, "grad_norm": 0.0038700769655406475, "learning_rate": 0.001, "loss": 0.4253, "step": 7256 }, { "epoch": 0.20023660312412697, "grad_norm": 0.002915510907769203, "learning_rate": 0.001, "loss": 0.3901, "step": 7257 }, { "epoch": 0.20026419532519132, "grad_norm": 0.0024133019614964724, "learning_rate": 0.001, "loss": 0.4259, "step": 7258 }, { "epoch": 0.2002917875262557, "grad_norm": 0.002601010724902153, "learning_rate": 0.001, "loss": 0.4087, "step": 7259 }, { "epoch": 0.20031937972732008, "grad_norm": 0.004203200805932283, "learning_rate": 0.001, "loss": 0.3884, "step": 7260 }, { "epoch": 0.20034697192838444, "grad_norm": 0.0025508387479931116, "learning_rate": 0.001, "loss": 0.4179, "step": 7261 }, { "epoch": 0.20037456412944882, "grad_norm": 0.0040724920108914375, "learning_rate": 0.001, "loss": 0.4209, "step": 7262 }, { "epoch": 0.20040215633051317, "grad_norm": 0.003506725886836648, "learning_rate": 0.001, "loss": 0.3999, "step": 7263 }, { "epoch": 0.20042974853157755, "grad_norm": 0.003731567645445466, "learning_rate": 0.001, "loss": 0.4015, "step": 7264 }, { "epoch": 0.20045734073264193, "grad_norm": 0.0050995261408388615, "learning_rate": 0.001, "loss": 0.3871, "step": 7265 }, { "epoch": 0.20048493293370628, "grad_norm": 0.00341955223120749, "learning_rate": 0.001, "loss": 0.4045, "step": 7266 }, { "epoch": 0.20051252513477066, "grad_norm": 0.003466901136562228, "learning_rate": 0.001, "loss": 0.3977, "step": 7267 }, { "epoch": 0.20054011733583502, "grad_norm": 0.0026405456010252237, "learning_rate": 0.001, "loss": 0.4159, "step": 7268 }, { "epoch": 0.2005677095368994, "grad_norm": 0.002517679473385215, "learning_rate": 0.001, "loss": 0.3902, "step": 7269 }, { "epoch": 0.20059530173796378, "grad_norm": 0.002916789846494794, "learning_rate": 0.001, "loss": 0.4504, "step": 7270 }, { "epoch": 0.20062289393902813, "grad_norm": 0.0029924395494163036, "learning_rate": 0.001, "loss": 0.3836, "step": 7271 }, { "epoch": 0.2006504861400925, "grad_norm": 0.004070238210260868, "learning_rate": 0.001, "loss": 0.4133, "step": 7272 }, { "epoch": 0.20067807834115686, "grad_norm": 0.002408135449513793, "learning_rate": 0.001, "loss": 0.4481, "step": 7273 }, { "epoch": 0.20070567054222124, "grad_norm": 0.005004410166293383, "learning_rate": 0.001, "loss": 0.4515, "step": 7274 }, { "epoch": 0.20073326274328562, "grad_norm": 0.002634943462908268, "learning_rate": 0.001, "loss": 0.4032, "step": 7275 }, { "epoch": 0.20076085494434998, "grad_norm": 0.0034329029731452465, "learning_rate": 0.001, "loss": 0.3893, "step": 7276 }, { "epoch": 0.20078844714541436, "grad_norm": 0.003150471020489931, "learning_rate": 0.001, "loss": 0.4292, "step": 7277 }, { "epoch": 0.2008160393464787, "grad_norm": 0.0030349427834153175, "learning_rate": 0.001, "loss": 0.3582, "step": 7278 }, { "epoch": 0.2008436315475431, "grad_norm": 0.0024701536167412996, "learning_rate": 0.001, "loss": 0.3926, "step": 7279 }, { "epoch": 0.20087122374860747, "grad_norm": 0.0022775549441576004, "learning_rate": 0.001, "loss": 0.4324, "step": 7280 }, { "epoch": 0.20089881594967182, "grad_norm": 0.0037725751753896475, "learning_rate": 0.001, "loss": 0.4061, "step": 7281 }, { "epoch": 0.2009264081507362, "grad_norm": 0.0029782191850245, "learning_rate": 0.001, "loss": 0.4382, "step": 7282 }, { "epoch": 0.20095400035180055, "grad_norm": 0.0028811590746045113, "learning_rate": 0.001, "loss": 0.3666, "step": 7283 }, { "epoch": 0.20098159255286493, "grad_norm": 0.0027202339842915535, "learning_rate": 0.001, "loss": 0.4074, "step": 7284 }, { "epoch": 0.20100918475392932, "grad_norm": 0.0026378172915428877, "learning_rate": 0.001, "loss": 0.3846, "step": 7285 }, { "epoch": 0.20103677695499367, "grad_norm": 0.0031549364794045687, "learning_rate": 0.001, "loss": 0.409, "step": 7286 }, { "epoch": 0.20106436915605805, "grad_norm": 0.0036540657747536898, "learning_rate": 0.001, "loss": 0.3454, "step": 7287 }, { "epoch": 0.2010919613571224, "grad_norm": 0.0027373498305678368, "learning_rate": 0.001, "loss": 0.4287, "step": 7288 }, { "epoch": 0.20111955355818678, "grad_norm": 0.0032555065117776394, "learning_rate": 0.001, "loss": 0.3941, "step": 7289 }, { "epoch": 0.20114714575925116, "grad_norm": 0.0048676906153559685, "learning_rate": 0.001, "loss": 0.3526, "step": 7290 }, { "epoch": 0.20117473796031551, "grad_norm": 0.0023562663700431585, "learning_rate": 0.001, "loss": 0.3755, "step": 7291 }, { "epoch": 0.2012023301613799, "grad_norm": 0.00272022164426744, "learning_rate": 0.001, "loss": 0.384, "step": 7292 }, { "epoch": 0.20122992236244425, "grad_norm": 0.002916688099503517, "learning_rate": 0.001, "loss": 0.3676, "step": 7293 }, { "epoch": 0.20125751456350863, "grad_norm": 0.003989284858107567, "learning_rate": 0.001, "loss": 0.3717, "step": 7294 }, { "epoch": 0.20128510676457298, "grad_norm": 0.0029025361873209476, "learning_rate": 0.001, "loss": 0.3997, "step": 7295 }, { "epoch": 0.20131269896563736, "grad_norm": 0.003632181789726019, "learning_rate": 0.001, "loss": 0.4167, "step": 7296 }, { "epoch": 0.20134029116670174, "grad_norm": 0.004145762883126736, "learning_rate": 0.001, "loss": 0.4017, "step": 7297 }, { "epoch": 0.2013678833677661, "grad_norm": 0.0024826403241604567, "learning_rate": 0.001, "loss": 0.4606, "step": 7298 }, { "epoch": 0.20139547556883047, "grad_norm": 0.002547920448705554, "learning_rate": 0.001, "loss": 0.4127, "step": 7299 }, { "epoch": 0.20142306776989483, "grad_norm": 0.0030286472756415606, "learning_rate": 0.001, "loss": 0.4077, "step": 7300 }, { "epoch": 0.2014506599709592, "grad_norm": 0.008298000320792198, "learning_rate": 0.001, "loss": 0.3692, "step": 7301 }, { "epoch": 0.2014782521720236, "grad_norm": 0.003152182325720787, "learning_rate": 0.001, "loss": 0.4169, "step": 7302 }, { "epoch": 0.20150584437308794, "grad_norm": 0.002809209283441305, "learning_rate": 0.001, "loss": 0.4033, "step": 7303 }, { "epoch": 0.20153343657415232, "grad_norm": 0.004826799966394901, "learning_rate": 0.001, "loss": 0.4196, "step": 7304 }, { "epoch": 0.20156102877521667, "grad_norm": 0.0035292392130941153, "learning_rate": 0.001, "loss": 0.4192, "step": 7305 }, { "epoch": 0.20158862097628105, "grad_norm": 0.002209904370829463, "learning_rate": 0.001, "loss": 0.4124, "step": 7306 }, { "epoch": 0.20161621317734543, "grad_norm": 0.002997803036123514, "learning_rate": 0.001, "loss": 0.4296, "step": 7307 }, { "epoch": 0.20164380537840979, "grad_norm": 0.004406254272907972, "learning_rate": 0.001, "loss": 0.4045, "step": 7308 }, { "epoch": 0.20167139757947417, "grad_norm": 0.0043180338107049465, "learning_rate": 0.001, "loss": 0.4384, "step": 7309 }, { "epoch": 0.20169898978053852, "grad_norm": 0.07319663465023041, "learning_rate": 0.001, "loss": 0.3987, "step": 7310 }, { "epoch": 0.2017265819816029, "grad_norm": 0.003474163357168436, "learning_rate": 0.001, "loss": 0.4134, "step": 7311 }, { "epoch": 0.20175417418266728, "grad_norm": 0.005750832613557577, "learning_rate": 0.001, "loss": 0.3754, "step": 7312 }, { "epoch": 0.20178176638373163, "grad_norm": 0.0026583108119666576, "learning_rate": 0.001, "loss": 0.4371, "step": 7313 }, { "epoch": 0.201809358584796, "grad_norm": 0.004087739158421755, "learning_rate": 0.001, "loss": 0.4412, "step": 7314 }, { "epoch": 0.20183695078586036, "grad_norm": 0.0027278910856693983, "learning_rate": 0.001, "loss": 0.4137, "step": 7315 }, { "epoch": 0.20186454298692474, "grad_norm": 0.002498122164979577, "learning_rate": 0.001, "loss": 0.4084, "step": 7316 }, { "epoch": 0.20189213518798912, "grad_norm": 0.004829999525099993, "learning_rate": 0.001, "loss": 0.3991, "step": 7317 }, { "epoch": 0.20191972738905348, "grad_norm": 0.0025493940338492393, "learning_rate": 0.001, "loss": 0.4411, "step": 7318 }, { "epoch": 0.20194731959011786, "grad_norm": 0.0027906931936740875, "learning_rate": 0.001, "loss": 0.3974, "step": 7319 }, { "epoch": 0.2019749117911822, "grad_norm": 0.0034205715637654066, "learning_rate": 0.001, "loss": 0.3953, "step": 7320 }, { "epoch": 0.2020025039922466, "grad_norm": 0.0030028745532035828, "learning_rate": 0.001, "loss": 0.4153, "step": 7321 }, { "epoch": 0.20203009619331097, "grad_norm": 0.006040586158633232, "learning_rate": 0.001, "loss": 0.4089, "step": 7322 }, { "epoch": 0.20205768839437532, "grad_norm": 0.0025682656560093164, "learning_rate": 0.001, "loss": 0.4298, "step": 7323 }, { "epoch": 0.2020852805954397, "grad_norm": 0.003085241885855794, "learning_rate": 0.001, "loss": 0.44, "step": 7324 }, { "epoch": 0.20211287279650406, "grad_norm": 0.0035471986047923565, "learning_rate": 0.001, "loss": 0.4127, "step": 7325 }, { "epoch": 0.20214046499756844, "grad_norm": 0.0031859648879617453, "learning_rate": 0.001, "loss": 0.4442, "step": 7326 }, { "epoch": 0.20216805719863282, "grad_norm": 0.003315818263217807, "learning_rate": 0.001, "loss": 0.4262, "step": 7327 }, { "epoch": 0.20219564939969717, "grad_norm": 0.004785994999110699, "learning_rate": 0.001, "loss": 0.346, "step": 7328 }, { "epoch": 0.20222324160076155, "grad_norm": 0.004568551201373339, "learning_rate": 0.001, "loss": 0.3935, "step": 7329 }, { "epoch": 0.2022508338018259, "grad_norm": 0.00264363712631166, "learning_rate": 0.001, "loss": 0.4275, "step": 7330 }, { "epoch": 0.20227842600289028, "grad_norm": 0.003292328678071499, "learning_rate": 0.001, "loss": 0.416, "step": 7331 }, { "epoch": 0.20230601820395466, "grad_norm": 0.004438953939825296, "learning_rate": 0.001, "loss": 0.3979, "step": 7332 }, { "epoch": 0.20233361040501902, "grad_norm": 0.0032826552633196115, "learning_rate": 0.001, "loss": 0.4065, "step": 7333 }, { "epoch": 0.2023612026060834, "grad_norm": 0.03612243011593819, "learning_rate": 0.001, "loss": 0.4007, "step": 7334 }, { "epoch": 0.20238879480714775, "grad_norm": 0.01141429878771305, "learning_rate": 0.001, "loss": 0.3783, "step": 7335 }, { "epoch": 0.20241638700821213, "grad_norm": 0.0034477144945412874, "learning_rate": 0.001, "loss": 0.4199, "step": 7336 }, { "epoch": 0.2024439792092765, "grad_norm": 0.007367158308625221, "learning_rate": 0.001, "loss": 0.37, "step": 7337 }, { "epoch": 0.20247157141034086, "grad_norm": 0.0035362287890166044, "learning_rate": 0.001, "loss": 0.3938, "step": 7338 }, { "epoch": 0.20249916361140524, "grad_norm": 0.002065723529085517, "learning_rate": 0.001, "loss": 0.3783, "step": 7339 }, { "epoch": 0.2025267558124696, "grad_norm": 0.004578362684696913, "learning_rate": 0.001, "loss": 0.4092, "step": 7340 }, { "epoch": 0.20255434801353397, "grad_norm": 0.002685924293473363, "learning_rate": 0.001, "loss": 0.3728, "step": 7341 }, { "epoch": 0.20258194021459835, "grad_norm": 0.0038794514257460833, "learning_rate": 0.001, "loss": 0.3913, "step": 7342 }, { "epoch": 0.2026095324156627, "grad_norm": 0.002912055002525449, "learning_rate": 0.001, "loss": 0.4266, "step": 7343 }, { "epoch": 0.2026371246167271, "grad_norm": 0.003003375604748726, "learning_rate": 0.001, "loss": 0.376, "step": 7344 }, { "epoch": 0.20266471681779144, "grad_norm": 0.003948207478970289, "learning_rate": 0.001, "loss": 0.4227, "step": 7345 }, { "epoch": 0.20269230901885582, "grad_norm": 0.0033744163811206818, "learning_rate": 0.001, "loss": 0.4091, "step": 7346 }, { "epoch": 0.2027199012199202, "grad_norm": 0.004838820081204176, "learning_rate": 0.001, "loss": 0.4642, "step": 7347 }, { "epoch": 0.20274749342098455, "grad_norm": 0.0024067736230790615, "learning_rate": 0.001, "loss": 0.3415, "step": 7348 }, { "epoch": 0.20277508562204893, "grad_norm": 0.0045340536162257195, "learning_rate": 0.001, "loss": 0.3561, "step": 7349 }, { "epoch": 0.2028026778231133, "grad_norm": 0.003165213158354163, "learning_rate": 0.001, "loss": 0.372, "step": 7350 }, { "epoch": 0.20283027002417767, "grad_norm": 0.004642155487090349, "learning_rate": 0.001, "loss": 0.3858, "step": 7351 }, { "epoch": 0.20285786222524205, "grad_norm": 0.005825064145028591, "learning_rate": 0.001, "loss": 0.4276, "step": 7352 }, { "epoch": 0.2028854544263064, "grad_norm": 0.002787059871479869, "learning_rate": 0.001, "loss": 0.407, "step": 7353 }, { "epoch": 0.20291304662737078, "grad_norm": 0.004571388475596905, "learning_rate": 0.001, "loss": 0.3881, "step": 7354 }, { "epoch": 0.20294063882843513, "grad_norm": 0.0032351436093449593, "learning_rate": 0.001, "loss": 0.3958, "step": 7355 }, { "epoch": 0.2029682310294995, "grad_norm": 0.0041768900118768215, "learning_rate": 0.001, "loss": 0.4159, "step": 7356 }, { "epoch": 0.2029958232305639, "grad_norm": 0.0028741308487951756, "learning_rate": 0.001, "loss": 0.4076, "step": 7357 }, { "epoch": 0.20302341543162825, "grad_norm": 0.00324125774204731, "learning_rate": 0.001, "loss": 0.3766, "step": 7358 }, { "epoch": 0.20305100763269263, "grad_norm": 0.003516893368214369, "learning_rate": 0.001, "loss": 0.3873, "step": 7359 }, { "epoch": 0.20307859983375698, "grad_norm": 0.0020891185849905014, "learning_rate": 0.001, "loss": 0.422, "step": 7360 }, { "epoch": 0.20310619203482136, "grad_norm": 0.0024756919592618942, "learning_rate": 0.001, "loss": 0.4182, "step": 7361 }, { "epoch": 0.20313378423588574, "grad_norm": 0.007347720675170422, "learning_rate": 0.001, "loss": 0.4016, "step": 7362 }, { "epoch": 0.2031613764369501, "grad_norm": 0.009618041105568409, "learning_rate": 0.001, "loss": 0.4081, "step": 7363 }, { "epoch": 0.20318896863801447, "grad_norm": 0.004037544596940279, "learning_rate": 0.001, "loss": 0.4009, "step": 7364 }, { "epoch": 0.20321656083907882, "grad_norm": 0.002381223253905773, "learning_rate": 0.001, "loss": 0.427, "step": 7365 }, { "epoch": 0.2032441530401432, "grad_norm": 0.0031598431523889303, "learning_rate": 0.001, "loss": 0.4249, "step": 7366 }, { "epoch": 0.20327174524120759, "grad_norm": 0.00809874664992094, "learning_rate": 0.001, "loss": 0.4179, "step": 7367 }, { "epoch": 0.20329933744227194, "grad_norm": 0.002093277871608734, "learning_rate": 0.001, "loss": 0.4407, "step": 7368 }, { "epoch": 0.20332692964333632, "grad_norm": 0.002877402352169156, "learning_rate": 0.001, "loss": 0.4162, "step": 7369 }, { "epoch": 0.20335452184440067, "grad_norm": 0.004072779323905706, "learning_rate": 0.001, "loss": 0.3556, "step": 7370 }, { "epoch": 0.20338211404546505, "grad_norm": 0.0036910090129822493, "learning_rate": 0.001, "loss": 0.3764, "step": 7371 }, { "epoch": 0.20340970624652943, "grad_norm": 0.002574736950919032, "learning_rate": 0.001, "loss": 0.4183, "step": 7372 }, { "epoch": 0.20343729844759378, "grad_norm": 0.0024004147853702307, "learning_rate": 0.001, "loss": 0.4241, "step": 7373 }, { "epoch": 0.20346489064865816, "grad_norm": 0.003890526946634054, "learning_rate": 0.001, "loss": 0.4319, "step": 7374 }, { "epoch": 0.20349248284972252, "grad_norm": 0.0030879208352416754, "learning_rate": 0.001, "loss": 0.4009, "step": 7375 }, { "epoch": 0.2035200750507869, "grad_norm": 0.003786006011068821, "learning_rate": 0.001, "loss": 0.4258, "step": 7376 }, { "epoch": 0.20354766725185128, "grad_norm": 0.0021713408641517162, "learning_rate": 0.001, "loss": 0.3805, "step": 7377 }, { "epoch": 0.20357525945291563, "grad_norm": 0.00433305511251092, "learning_rate": 0.001, "loss": 0.392, "step": 7378 }, { "epoch": 0.20360285165398, "grad_norm": 0.0030764697585254908, "learning_rate": 0.001, "loss": 0.4037, "step": 7379 }, { "epoch": 0.20363044385504436, "grad_norm": 0.005284723825752735, "learning_rate": 0.001, "loss": 0.3861, "step": 7380 }, { "epoch": 0.20365803605610874, "grad_norm": 0.004528792109340429, "learning_rate": 0.001, "loss": 0.3937, "step": 7381 }, { "epoch": 0.20368562825717312, "grad_norm": 0.029749277979135513, "learning_rate": 0.001, "loss": 0.3852, "step": 7382 }, { "epoch": 0.20371322045823748, "grad_norm": 0.004927974659949541, "learning_rate": 0.001, "loss": 0.4053, "step": 7383 }, { "epoch": 0.20374081265930186, "grad_norm": 0.004339797887951136, "learning_rate": 0.001, "loss": 0.4044, "step": 7384 }, { "epoch": 0.2037684048603662, "grad_norm": 0.002485482720658183, "learning_rate": 0.001, "loss": 0.4244, "step": 7385 }, { "epoch": 0.2037959970614306, "grad_norm": 0.004456434864550829, "learning_rate": 0.001, "loss": 0.3738, "step": 7386 }, { "epoch": 0.20382358926249497, "grad_norm": 0.0022889727260917425, "learning_rate": 0.001, "loss": 0.4261, "step": 7387 }, { "epoch": 0.20385118146355932, "grad_norm": 0.006221712101250887, "learning_rate": 0.001, "loss": 0.3579, "step": 7388 }, { "epoch": 0.2038787736646237, "grad_norm": 0.003358406713232398, "learning_rate": 0.001, "loss": 0.3869, "step": 7389 }, { "epoch": 0.20390636586568806, "grad_norm": 0.0029171998612582684, "learning_rate": 0.001, "loss": 0.4179, "step": 7390 }, { "epoch": 0.20393395806675244, "grad_norm": 0.0030932044610381126, "learning_rate": 0.001, "loss": 0.415, "step": 7391 }, { "epoch": 0.2039615502678168, "grad_norm": 0.009446857497096062, "learning_rate": 0.001, "loss": 0.411, "step": 7392 }, { "epoch": 0.20398914246888117, "grad_norm": 0.003646930679678917, "learning_rate": 0.001, "loss": 0.3674, "step": 7393 }, { "epoch": 0.20401673466994555, "grad_norm": 0.002314311685040593, "learning_rate": 0.001, "loss": 0.4071, "step": 7394 }, { "epoch": 0.2040443268710099, "grad_norm": 0.0032062383834272623, "learning_rate": 0.001, "loss": 0.378, "step": 7395 }, { "epoch": 0.20407191907207428, "grad_norm": 0.005731469485908747, "learning_rate": 0.001, "loss": 0.42, "step": 7396 }, { "epoch": 0.20409951127313863, "grad_norm": 0.002991395303979516, "learning_rate": 0.001, "loss": 0.4357, "step": 7397 }, { "epoch": 0.20412710347420301, "grad_norm": 0.0036581414751708508, "learning_rate": 0.001, "loss": 0.401, "step": 7398 }, { "epoch": 0.2041546956752674, "grad_norm": 0.00450851721689105, "learning_rate": 0.001, "loss": 0.3855, "step": 7399 }, { "epoch": 0.20418228787633175, "grad_norm": 0.003067772602662444, "learning_rate": 0.001, "loss": 0.392, "step": 7400 }, { "epoch": 0.20420988007739613, "grad_norm": 0.0030468387994915247, "learning_rate": 0.001, "loss": 0.3904, "step": 7401 }, { "epoch": 0.20423747227846048, "grad_norm": 0.004612901713699102, "learning_rate": 0.001, "loss": 0.3639, "step": 7402 }, { "epoch": 0.20426506447952486, "grad_norm": 0.0034134613815695047, "learning_rate": 0.001, "loss": 0.4229, "step": 7403 }, { "epoch": 0.20429265668058924, "grad_norm": 0.009274942800402641, "learning_rate": 0.001, "loss": 0.4011, "step": 7404 }, { "epoch": 0.2043202488816536, "grad_norm": 0.0026235580444335938, "learning_rate": 0.001, "loss": 0.3719, "step": 7405 }, { "epoch": 0.20434784108271797, "grad_norm": 0.006790754850953817, "learning_rate": 0.001, "loss": 0.3867, "step": 7406 }, { "epoch": 0.20437543328378233, "grad_norm": 0.0022785153705626726, "learning_rate": 0.001, "loss": 0.4071, "step": 7407 }, { "epoch": 0.2044030254848467, "grad_norm": 0.005909627769142389, "learning_rate": 0.001, "loss": 0.3857, "step": 7408 }, { "epoch": 0.2044306176859111, "grad_norm": 0.0032420544885098934, "learning_rate": 0.001, "loss": 0.3786, "step": 7409 }, { "epoch": 0.20445820988697544, "grad_norm": 0.003111346159130335, "learning_rate": 0.001, "loss": 0.3925, "step": 7410 }, { "epoch": 0.20448580208803982, "grad_norm": 0.003899330273270607, "learning_rate": 0.001, "loss": 0.3813, "step": 7411 }, { "epoch": 0.20451339428910417, "grad_norm": 0.007644087076187134, "learning_rate": 0.001, "loss": 0.4065, "step": 7412 }, { "epoch": 0.20454098649016855, "grad_norm": 0.0028403971809893847, "learning_rate": 0.001, "loss": 0.4052, "step": 7413 }, { "epoch": 0.20456857869123293, "grad_norm": 0.00496717169880867, "learning_rate": 0.001, "loss": 0.3988, "step": 7414 }, { "epoch": 0.20459617089229729, "grad_norm": 0.003765889909118414, "learning_rate": 0.001, "loss": 0.4465, "step": 7415 }, { "epoch": 0.20462376309336167, "grad_norm": 0.0031095000449568033, "learning_rate": 0.001, "loss": 0.3615, "step": 7416 }, { "epoch": 0.20465135529442602, "grad_norm": 0.0027119989972561598, "learning_rate": 0.001, "loss": 0.4084, "step": 7417 }, { "epoch": 0.2046789474954904, "grad_norm": 0.0027941481675952673, "learning_rate": 0.001, "loss": 0.4315, "step": 7418 }, { "epoch": 0.20470653969655478, "grad_norm": 0.002864205278456211, "learning_rate": 0.001, "loss": 0.4042, "step": 7419 }, { "epoch": 0.20473413189761913, "grad_norm": 0.0032913892064243555, "learning_rate": 0.001, "loss": 0.3977, "step": 7420 }, { "epoch": 0.2047617240986835, "grad_norm": 0.0031653158366680145, "learning_rate": 0.001, "loss": 0.3891, "step": 7421 }, { "epoch": 0.20478931629974786, "grad_norm": 0.002829028759151697, "learning_rate": 0.001, "loss": 0.3897, "step": 7422 }, { "epoch": 0.20481690850081224, "grad_norm": 0.0026797757018357515, "learning_rate": 0.001, "loss": 0.4038, "step": 7423 }, { "epoch": 0.20484450070187663, "grad_norm": 0.0033115644473582506, "learning_rate": 0.001, "loss": 0.4109, "step": 7424 }, { "epoch": 0.20487209290294098, "grad_norm": 0.0037570816930383444, "learning_rate": 0.001, "loss": 0.3904, "step": 7425 }, { "epoch": 0.20489968510400536, "grad_norm": 0.0022269757464528084, "learning_rate": 0.001, "loss": 0.3928, "step": 7426 }, { "epoch": 0.2049272773050697, "grad_norm": 0.003650048514828086, "learning_rate": 0.001, "loss": 0.391, "step": 7427 }, { "epoch": 0.2049548695061341, "grad_norm": 0.003723046975210309, "learning_rate": 0.001, "loss": 0.4122, "step": 7428 }, { "epoch": 0.20498246170719847, "grad_norm": 0.0031359000131487846, "learning_rate": 0.001, "loss": 0.3824, "step": 7429 }, { "epoch": 0.20501005390826282, "grad_norm": 0.0026418371126055717, "learning_rate": 0.001, "loss": 0.4462, "step": 7430 }, { "epoch": 0.2050376461093272, "grad_norm": 0.005113274324685335, "learning_rate": 0.001, "loss": 0.4356, "step": 7431 }, { "epoch": 0.20506523831039156, "grad_norm": 0.0034433556720614433, "learning_rate": 0.001, "loss": 0.4077, "step": 7432 }, { "epoch": 0.20509283051145594, "grad_norm": 0.0039060013368725777, "learning_rate": 0.001, "loss": 0.3965, "step": 7433 }, { "epoch": 0.20512042271252032, "grad_norm": 0.0031034150160849094, "learning_rate": 0.001, "loss": 0.4195, "step": 7434 }, { "epoch": 0.20514801491358467, "grad_norm": 0.004134570714086294, "learning_rate": 0.001, "loss": 0.4333, "step": 7435 }, { "epoch": 0.20517560711464905, "grad_norm": 0.002315821126103401, "learning_rate": 0.001, "loss": 0.4121, "step": 7436 }, { "epoch": 0.2052031993157134, "grad_norm": 0.004337753169238567, "learning_rate": 0.001, "loss": 0.3908, "step": 7437 }, { "epoch": 0.20523079151677778, "grad_norm": 0.014801721088588238, "learning_rate": 0.001, "loss": 0.3668, "step": 7438 }, { "epoch": 0.20525838371784216, "grad_norm": 0.003172953613102436, "learning_rate": 0.001, "loss": 0.4005, "step": 7439 }, { "epoch": 0.20528597591890652, "grad_norm": 0.0031124092638492584, "learning_rate": 0.001, "loss": 0.3994, "step": 7440 }, { "epoch": 0.2053135681199709, "grad_norm": 0.004870596341788769, "learning_rate": 0.001, "loss": 0.3706, "step": 7441 }, { "epoch": 0.20534116032103525, "grad_norm": 0.002806989708915353, "learning_rate": 0.001, "loss": 0.3714, "step": 7442 }, { "epoch": 0.20536875252209963, "grad_norm": 0.002985073020681739, "learning_rate": 0.001, "loss": 0.3927, "step": 7443 }, { "epoch": 0.205396344723164, "grad_norm": 0.002774279797449708, "learning_rate": 0.001, "loss": 0.378, "step": 7444 }, { "epoch": 0.20542393692422836, "grad_norm": 0.0028050506953150034, "learning_rate": 0.001, "loss": 0.3541, "step": 7445 }, { "epoch": 0.20545152912529274, "grad_norm": 0.004381167236715555, "learning_rate": 0.001, "loss": 0.4285, "step": 7446 }, { "epoch": 0.2054791213263571, "grad_norm": 0.003191079944372177, "learning_rate": 0.001, "loss": 0.4055, "step": 7447 }, { "epoch": 0.20550671352742148, "grad_norm": 0.0027379656676203012, "learning_rate": 0.001, "loss": 0.385, "step": 7448 }, { "epoch": 0.20553430572848586, "grad_norm": 0.0034181999508291483, "learning_rate": 0.001, "loss": 0.4061, "step": 7449 }, { "epoch": 0.2055618979295502, "grad_norm": 0.003427153918892145, "learning_rate": 0.001, "loss": 0.3757, "step": 7450 }, { "epoch": 0.2055894901306146, "grad_norm": 0.002515499945729971, "learning_rate": 0.001, "loss": 0.3551, "step": 7451 }, { "epoch": 0.20561708233167894, "grad_norm": 0.002670654794201255, "learning_rate": 0.001, "loss": 0.4392, "step": 7452 }, { "epoch": 0.20564467453274332, "grad_norm": 0.003271377645432949, "learning_rate": 0.001, "loss": 0.3804, "step": 7453 }, { "epoch": 0.2056722667338077, "grad_norm": 0.0034607460256665945, "learning_rate": 0.001, "loss": 0.3951, "step": 7454 }, { "epoch": 0.20569985893487205, "grad_norm": 0.002583264373242855, "learning_rate": 0.001, "loss": 0.4314, "step": 7455 }, { "epoch": 0.20572745113593643, "grad_norm": 0.003357719397172332, "learning_rate": 0.001, "loss": 0.3857, "step": 7456 }, { "epoch": 0.2057550433370008, "grad_norm": 0.005699231754988432, "learning_rate": 0.001, "loss": 0.3983, "step": 7457 }, { "epoch": 0.20578263553806517, "grad_norm": 0.00468082819133997, "learning_rate": 0.001, "loss": 0.4166, "step": 7458 }, { "epoch": 0.20581022773912955, "grad_norm": 0.002804639982059598, "learning_rate": 0.001, "loss": 0.4128, "step": 7459 }, { "epoch": 0.2058378199401939, "grad_norm": 0.0031567709520459175, "learning_rate": 0.001, "loss": 0.452, "step": 7460 }, { "epoch": 0.20586541214125828, "grad_norm": 0.0023213198874145746, "learning_rate": 0.001, "loss": 0.4353, "step": 7461 }, { "epoch": 0.20589300434232263, "grad_norm": 0.003251513699069619, "learning_rate": 0.001, "loss": 0.3908, "step": 7462 }, { "epoch": 0.205920596543387, "grad_norm": 0.0036114819813519716, "learning_rate": 0.001, "loss": 0.4622, "step": 7463 }, { "epoch": 0.2059481887444514, "grad_norm": 0.0032585756853222847, "learning_rate": 0.001, "loss": 0.3934, "step": 7464 }, { "epoch": 0.20597578094551575, "grad_norm": 0.004090276546776295, "learning_rate": 0.001, "loss": 0.369, "step": 7465 }, { "epoch": 0.20600337314658013, "grad_norm": 0.006538175046443939, "learning_rate": 0.001, "loss": 0.3902, "step": 7466 }, { "epoch": 0.20603096534764448, "grad_norm": 0.006117241457104683, "learning_rate": 0.001, "loss": 0.3976, "step": 7467 }, { "epoch": 0.20605855754870886, "grad_norm": 0.004128247033804655, "learning_rate": 0.001, "loss": 0.4083, "step": 7468 }, { "epoch": 0.20608614974977324, "grad_norm": 0.0023199093993753195, "learning_rate": 0.001, "loss": 0.3613, "step": 7469 }, { "epoch": 0.2061137419508376, "grad_norm": 0.002994114300236106, "learning_rate": 0.001, "loss": 0.396, "step": 7470 }, { "epoch": 0.20614133415190197, "grad_norm": 0.00449094595387578, "learning_rate": 0.001, "loss": 0.3779, "step": 7471 }, { "epoch": 0.20616892635296633, "grad_norm": 0.002262924797832966, "learning_rate": 0.001, "loss": 0.3891, "step": 7472 }, { "epoch": 0.2061965185540307, "grad_norm": 0.0033367322757840157, "learning_rate": 0.001, "loss": 0.3902, "step": 7473 }, { "epoch": 0.20622411075509509, "grad_norm": 0.004621635656803846, "learning_rate": 0.001, "loss": 0.3829, "step": 7474 }, { "epoch": 0.20625170295615944, "grad_norm": 0.0024976639542728662, "learning_rate": 0.001, "loss": 0.3893, "step": 7475 }, { "epoch": 0.20627929515722382, "grad_norm": 0.0032991066109389067, "learning_rate": 0.001, "loss": 0.3909, "step": 7476 }, { "epoch": 0.20630688735828817, "grad_norm": 0.004945872817188501, "learning_rate": 0.001, "loss": 0.3938, "step": 7477 }, { "epoch": 0.20633447955935255, "grad_norm": 0.003072677878662944, "learning_rate": 0.001, "loss": 0.3846, "step": 7478 }, { "epoch": 0.20636207176041693, "grad_norm": 0.004622003063559532, "learning_rate": 0.001, "loss": 0.4524, "step": 7479 }, { "epoch": 0.20638966396148128, "grad_norm": 0.005966620985418558, "learning_rate": 0.001, "loss": 0.3915, "step": 7480 }, { "epoch": 0.20641725616254566, "grad_norm": 0.002969125984236598, "learning_rate": 0.001, "loss": 0.4081, "step": 7481 }, { "epoch": 0.20644484836361002, "grad_norm": 0.020619425922632217, "learning_rate": 0.001, "loss": 0.4044, "step": 7482 }, { "epoch": 0.2064724405646744, "grad_norm": 0.005815454758703709, "learning_rate": 0.001, "loss": 0.3981, "step": 7483 }, { "epoch": 0.20650003276573875, "grad_norm": 0.0029577158857136965, "learning_rate": 0.001, "loss": 0.4203, "step": 7484 }, { "epoch": 0.20652762496680313, "grad_norm": 0.0039966655895113945, "learning_rate": 0.001, "loss": 0.3665, "step": 7485 }, { "epoch": 0.2065552171678675, "grad_norm": 0.00554103497415781, "learning_rate": 0.001, "loss": 0.4024, "step": 7486 }, { "epoch": 0.20658280936893186, "grad_norm": 0.00304683530703187, "learning_rate": 0.001, "loss": 0.4444, "step": 7487 }, { "epoch": 0.20661040156999624, "grad_norm": 0.007905455306172371, "learning_rate": 0.001, "loss": 0.357, "step": 7488 }, { "epoch": 0.2066379937710606, "grad_norm": 0.0040861996822059155, "learning_rate": 0.001, "loss": 0.4093, "step": 7489 }, { "epoch": 0.20666558597212498, "grad_norm": 0.00287861586548388, "learning_rate": 0.001, "loss": 0.4079, "step": 7490 }, { "epoch": 0.20669317817318936, "grad_norm": 0.00307285669259727, "learning_rate": 0.001, "loss": 0.4153, "step": 7491 }, { "epoch": 0.2067207703742537, "grad_norm": 0.0033920174464583397, "learning_rate": 0.001, "loss": 0.3732, "step": 7492 }, { "epoch": 0.2067483625753181, "grad_norm": 0.0028167800046503544, "learning_rate": 0.001, "loss": 0.4377, "step": 7493 }, { "epoch": 0.20677595477638244, "grad_norm": 0.0022106487303972244, "learning_rate": 0.001, "loss": 0.4182, "step": 7494 }, { "epoch": 0.20680354697744682, "grad_norm": 0.002651102375239134, "learning_rate": 0.001, "loss": 0.3978, "step": 7495 }, { "epoch": 0.2068311391785112, "grad_norm": 0.003722158260643482, "learning_rate": 0.001, "loss": 0.4121, "step": 7496 }, { "epoch": 0.20685873137957556, "grad_norm": 0.004339053761214018, "learning_rate": 0.001, "loss": 0.4251, "step": 7497 }, { "epoch": 0.20688632358063994, "grad_norm": 0.002610723953694105, "learning_rate": 0.001, "loss": 0.4134, "step": 7498 }, { "epoch": 0.2069139157817043, "grad_norm": 0.0024759217631071806, "learning_rate": 0.001, "loss": 0.4228, "step": 7499 }, { "epoch": 0.20694150798276867, "grad_norm": 0.005409894045442343, "learning_rate": 0.001, "loss": 0.3883, "step": 7500 }, { "epoch": 0.20694150798276867, "eval_runtime": 24.8623, "eval_samples_per_second": 1.287, "eval_steps_per_second": 0.161, "step": 7500 }, { "epoch": 0.20696910018383305, "grad_norm": 0.0043741133995354176, "learning_rate": 0.001, "loss": 0.3872, "step": 7501 }, { "epoch": 0.2069966923848974, "grad_norm": 0.0034702117554843426, "learning_rate": 0.001, "loss": 0.3996, "step": 7502 }, { "epoch": 0.20702428458596178, "grad_norm": 0.002621249994263053, "learning_rate": 0.001, "loss": 0.3799, "step": 7503 }, { "epoch": 0.20705187678702613, "grad_norm": 0.00401890417560935, "learning_rate": 0.001, "loss": 0.3787, "step": 7504 }, { "epoch": 0.20707946898809051, "grad_norm": 0.005101599730551243, "learning_rate": 0.001, "loss": 0.4362, "step": 7505 }, { "epoch": 0.2071070611891549, "grad_norm": 0.006792318541556597, "learning_rate": 0.001, "loss": 0.4084, "step": 7506 }, { "epoch": 0.20713465339021925, "grad_norm": 0.0032163949217647314, "learning_rate": 0.001, "loss": 0.3853, "step": 7507 }, { "epoch": 0.20716224559128363, "grad_norm": 0.006187311839312315, "learning_rate": 0.001, "loss": 0.4082, "step": 7508 }, { "epoch": 0.20718983779234798, "grad_norm": 0.003395832609385252, "learning_rate": 0.001, "loss": 0.3804, "step": 7509 }, { "epoch": 0.20721742999341236, "grad_norm": 0.007058777846395969, "learning_rate": 0.001, "loss": 0.3545, "step": 7510 }, { "epoch": 0.20724502219447674, "grad_norm": 0.0022814422845840454, "learning_rate": 0.001, "loss": 0.3949, "step": 7511 }, { "epoch": 0.2072726143955411, "grad_norm": 0.003063853131607175, "learning_rate": 0.001, "loss": 0.4, "step": 7512 }, { "epoch": 0.20730020659660547, "grad_norm": 0.003978427965193987, "learning_rate": 0.001, "loss": 0.3732, "step": 7513 }, { "epoch": 0.20732779879766983, "grad_norm": 0.002310180803760886, "learning_rate": 0.001, "loss": 0.4172, "step": 7514 }, { "epoch": 0.2073553909987342, "grad_norm": 0.0026651090011000633, "learning_rate": 0.001, "loss": 0.3855, "step": 7515 }, { "epoch": 0.2073829831997986, "grad_norm": 0.0029477854259312153, "learning_rate": 0.001, "loss": 0.4296, "step": 7516 }, { "epoch": 0.20741057540086294, "grad_norm": 0.0022027394734323025, "learning_rate": 0.001, "loss": 0.4052, "step": 7517 }, { "epoch": 0.20743816760192732, "grad_norm": 0.0025718417018651962, "learning_rate": 0.001, "loss": 0.4058, "step": 7518 }, { "epoch": 0.20746575980299167, "grad_norm": 0.002921158680692315, "learning_rate": 0.001, "loss": 0.383, "step": 7519 }, { "epoch": 0.20749335200405605, "grad_norm": 0.0037957970052957535, "learning_rate": 0.001, "loss": 0.4122, "step": 7520 }, { "epoch": 0.20752094420512043, "grad_norm": 0.003919035196304321, "learning_rate": 0.001, "loss": 0.4063, "step": 7521 }, { "epoch": 0.20754853640618479, "grad_norm": 0.0024592061527073383, "learning_rate": 0.001, "loss": 0.4195, "step": 7522 }, { "epoch": 0.20757612860724917, "grad_norm": 0.0034799345303326845, "learning_rate": 0.001, "loss": 0.3907, "step": 7523 }, { "epoch": 0.20760372080831352, "grad_norm": 0.00930013321340084, "learning_rate": 0.001, "loss": 0.3504, "step": 7524 }, { "epoch": 0.2076313130093779, "grad_norm": 0.004809627775102854, "learning_rate": 0.001, "loss": 0.3851, "step": 7525 }, { "epoch": 0.20765890521044228, "grad_norm": 0.0031031654216349125, "learning_rate": 0.001, "loss": 0.3746, "step": 7526 }, { "epoch": 0.20768649741150663, "grad_norm": 0.0024205984082072973, "learning_rate": 0.001, "loss": 0.414, "step": 7527 }, { "epoch": 0.207714089612571, "grad_norm": 0.003100008238106966, "learning_rate": 0.001, "loss": 0.4384, "step": 7528 }, { "epoch": 0.20774168181363536, "grad_norm": 0.0023790469858795404, "learning_rate": 0.001, "loss": 0.4258, "step": 7529 }, { "epoch": 0.20776927401469975, "grad_norm": 0.0028267705347388983, "learning_rate": 0.001, "loss": 0.3943, "step": 7530 }, { "epoch": 0.20779686621576413, "grad_norm": 0.002445453545078635, "learning_rate": 0.001, "loss": 0.4182, "step": 7531 }, { "epoch": 0.20782445841682848, "grad_norm": 0.0038315069396048784, "learning_rate": 0.001, "loss": 0.3965, "step": 7532 }, { "epoch": 0.20785205061789286, "grad_norm": 0.002911431947723031, "learning_rate": 0.001, "loss": 0.3953, "step": 7533 }, { "epoch": 0.2078796428189572, "grad_norm": 0.024655209854245186, "learning_rate": 0.001, "loss": 0.4144, "step": 7534 }, { "epoch": 0.2079072350200216, "grad_norm": 0.0030850169714540243, "learning_rate": 0.001, "loss": 0.3774, "step": 7535 }, { "epoch": 0.20793482722108597, "grad_norm": 0.0029573985375463963, "learning_rate": 0.001, "loss": 0.4237, "step": 7536 }, { "epoch": 0.20796241942215032, "grad_norm": 0.002838612301275134, "learning_rate": 0.001, "loss": 0.4398, "step": 7537 }, { "epoch": 0.2079900116232147, "grad_norm": 0.0038796397857367992, "learning_rate": 0.001, "loss": 0.3815, "step": 7538 }, { "epoch": 0.20801760382427906, "grad_norm": 0.002909380476921797, "learning_rate": 0.001, "loss": 0.3851, "step": 7539 }, { "epoch": 0.20804519602534344, "grad_norm": 0.005611411761492491, "learning_rate": 0.001, "loss": 0.3835, "step": 7540 }, { "epoch": 0.20807278822640782, "grad_norm": 0.005777435377240181, "learning_rate": 0.001, "loss": 0.3989, "step": 7541 }, { "epoch": 0.20810038042747217, "grad_norm": 0.002417078008875251, "learning_rate": 0.001, "loss": 0.4334, "step": 7542 }, { "epoch": 0.20812797262853655, "grad_norm": 0.0039400262758135796, "learning_rate": 0.001, "loss": 0.4242, "step": 7543 }, { "epoch": 0.2081555648296009, "grad_norm": 0.005697792861610651, "learning_rate": 0.001, "loss": 0.3795, "step": 7544 }, { "epoch": 0.20818315703066528, "grad_norm": 0.0035569635219872, "learning_rate": 0.001, "loss": 0.4059, "step": 7545 }, { "epoch": 0.20821074923172966, "grad_norm": 0.0035249588545411825, "learning_rate": 0.001, "loss": 0.4174, "step": 7546 }, { "epoch": 0.20823834143279402, "grad_norm": 0.003973274026066065, "learning_rate": 0.001, "loss": 0.3732, "step": 7547 }, { "epoch": 0.2082659336338584, "grad_norm": 0.010858539491891861, "learning_rate": 0.001, "loss": 0.3627, "step": 7548 }, { "epoch": 0.20829352583492275, "grad_norm": 0.002799051348119974, "learning_rate": 0.001, "loss": 0.4172, "step": 7549 }, { "epoch": 0.20832111803598713, "grad_norm": 0.0032943717669695616, "learning_rate": 0.001, "loss": 0.4083, "step": 7550 }, { "epoch": 0.2083487102370515, "grad_norm": 0.003968504723161459, "learning_rate": 0.001, "loss": 0.4046, "step": 7551 }, { "epoch": 0.20837630243811586, "grad_norm": 0.0025952784344553947, "learning_rate": 0.001, "loss": 0.4116, "step": 7552 }, { "epoch": 0.20840389463918024, "grad_norm": 0.0023193256929516792, "learning_rate": 0.001, "loss": 0.3947, "step": 7553 }, { "epoch": 0.2084314868402446, "grad_norm": 0.020232345908880234, "learning_rate": 0.001, "loss": 0.3861, "step": 7554 }, { "epoch": 0.20845907904130898, "grad_norm": 0.005101238377392292, "learning_rate": 0.001, "loss": 0.3826, "step": 7555 }, { "epoch": 0.20848667124237336, "grad_norm": 0.0028565346729010344, "learning_rate": 0.001, "loss": 0.3878, "step": 7556 }, { "epoch": 0.2085142634434377, "grad_norm": 0.0023610927164554596, "learning_rate": 0.001, "loss": 0.4383, "step": 7557 }, { "epoch": 0.2085418556445021, "grad_norm": 0.0025352907832711935, "learning_rate": 0.001, "loss": 0.4298, "step": 7558 }, { "epoch": 0.20856944784556644, "grad_norm": 0.002582514425739646, "learning_rate": 0.001, "loss": 0.3841, "step": 7559 }, { "epoch": 0.20859704004663082, "grad_norm": 0.0051224916242063046, "learning_rate": 0.001, "loss": 0.3857, "step": 7560 }, { "epoch": 0.2086246322476952, "grad_norm": 0.004121377132833004, "learning_rate": 0.001, "loss": 0.3783, "step": 7561 }, { "epoch": 0.20865222444875955, "grad_norm": 0.0028835702687501907, "learning_rate": 0.001, "loss": 0.4232, "step": 7562 }, { "epoch": 0.20867981664982393, "grad_norm": 0.0035318653099238873, "learning_rate": 0.001, "loss": 0.38, "step": 7563 }, { "epoch": 0.2087074088508883, "grad_norm": 0.003049545455724001, "learning_rate": 0.001, "loss": 0.3697, "step": 7564 }, { "epoch": 0.20873500105195267, "grad_norm": 0.01318820845335722, "learning_rate": 0.001, "loss": 0.4478, "step": 7565 }, { "epoch": 0.20876259325301705, "grad_norm": 0.0028140349313616753, "learning_rate": 0.001, "loss": 0.3822, "step": 7566 }, { "epoch": 0.2087901854540814, "grad_norm": 0.0023844155948609114, "learning_rate": 0.001, "loss": 0.4497, "step": 7567 }, { "epoch": 0.20881777765514578, "grad_norm": 0.0021965601481497288, "learning_rate": 0.001, "loss": 0.3985, "step": 7568 }, { "epoch": 0.20884536985621013, "grad_norm": 0.003476344281807542, "learning_rate": 0.001, "loss": 0.4209, "step": 7569 }, { "epoch": 0.2088729620572745, "grad_norm": 0.004417444113641977, "learning_rate": 0.001, "loss": 0.3787, "step": 7570 }, { "epoch": 0.2089005542583389, "grad_norm": 0.002342290710657835, "learning_rate": 0.001, "loss": 0.4322, "step": 7571 }, { "epoch": 0.20892814645940325, "grad_norm": 0.002702725352719426, "learning_rate": 0.001, "loss": 0.4292, "step": 7572 }, { "epoch": 0.20895573866046763, "grad_norm": 0.003713875776156783, "learning_rate": 0.001, "loss": 0.387, "step": 7573 }, { "epoch": 0.20898333086153198, "grad_norm": 0.002027127193287015, "learning_rate": 0.001, "loss": 0.4431, "step": 7574 }, { "epoch": 0.20901092306259636, "grad_norm": 0.0038261881563812494, "learning_rate": 0.001, "loss": 0.3868, "step": 7575 }, { "epoch": 0.2090385152636607, "grad_norm": 0.002401645528152585, "learning_rate": 0.001, "loss": 0.4568, "step": 7576 }, { "epoch": 0.2090661074647251, "grad_norm": 0.002996440976858139, "learning_rate": 0.001, "loss": 0.3903, "step": 7577 }, { "epoch": 0.20909369966578947, "grad_norm": 0.0030510788783431053, "learning_rate": 0.001, "loss": 0.3791, "step": 7578 }, { "epoch": 0.20912129186685383, "grad_norm": 0.0031568347476422787, "learning_rate": 0.001, "loss": 0.3908, "step": 7579 }, { "epoch": 0.2091488840679182, "grad_norm": 0.0025101054925471544, "learning_rate": 0.001, "loss": 0.4009, "step": 7580 }, { "epoch": 0.20917647626898256, "grad_norm": 0.002685755491256714, "learning_rate": 0.001, "loss": 0.4096, "step": 7581 }, { "epoch": 0.20920406847004694, "grad_norm": 0.00574698718264699, "learning_rate": 0.001, "loss": 0.3911, "step": 7582 }, { "epoch": 0.20923166067111132, "grad_norm": 0.0023178094998002052, "learning_rate": 0.001, "loss": 0.3971, "step": 7583 }, { "epoch": 0.20925925287217567, "grad_norm": 0.0048984200693666935, "learning_rate": 0.001, "loss": 0.3566, "step": 7584 }, { "epoch": 0.20928684507324005, "grad_norm": 0.0027060145512223244, "learning_rate": 0.001, "loss": 0.3853, "step": 7585 }, { "epoch": 0.2093144372743044, "grad_norm": 0.00470772897824645, "learning_rate": 0.001, "loss": 0.385, "step": 7586 }, { "epoch": 0.20934202947536878, "grad_norm": 0.002918388694524765, "learning_rate": 0.001, "loss": 0.4374, "step": 7587 }, { "epoch": 0.20936962167643317, "grad_norm": 0.002451063599437475, "learning_rate": 0.001, "loss": 0.4166, "step": 7588 }, { "epoch": 0.20939721387749752, "grad_norm": 0.0024748530704528093, "learning_rate": 0.001, "loss": 0.4419, "step": 7589 }, { "epoch": 0.2094248060785619, "grad_norm": 0.0029519018717110157, "learning_rate": 0.001, "loss": 0.3743, "step": 7590 }, { "epoch": 0.20945239827962625, "grad_norm": 0.004285396076738834, "learning_rate": 0.001, "loss": 0.3722, "step": 7591 }, { "epoch": 0.20947999048069063, "grad_norm": 0.0032341107726097107, "learning_rate": 0.001, "loss": 0.3655, "step": 7592 }, { "epoch": 0.209507582681755, "grad_norm": 0.0032684197649359703, "learning_rate": 0.001, "loss": 0.3992, "step": 7593 }, { "epoch": 0.20953517488281936, "grad_norm": 0.002590345684438944, "learning_rate": 0.001, "loss": 0.4147, "step": 7594 }, { "epoch": 0.20956276708388374, "grad_norm": 0.0075867660343647, "learning_rate": 0.001, "loss": 0.4297, "step": 7595 }, { "epoch": 0.2095903592849481, "grad_norm": 0.011841630563139915, "learning_rate": 0.001, "loss": 0.3941, "step": 7596 }, { "epoch": 0.20961795148601248, "grad_norm": 0.002727978862822056, "learning_rate": 0.001, "loss": 0.4122, "step": 7597 }, { "epoch": 0.20964554368707686, "grad_norm": 0.0028050565160810947, "learning_rate": 0.001, "loss": 0.4043, "step": 7598 }, { "epoch": 0.2096731358881412, "grad_norm": 0.0022304770536720753, "learning_rate": 0.001, "loss": 0.4186, "step": 7599 }, { "epoch": 0.2097007280892056, "grad_norm": 0.0042723724618554115, "learning_rate": 0.001, "loss": 0.409, "step": 7600 }, { "epoch": 0.20972832029026994, "grad_norm": 0.00373181514441967, "learning_rate": 0.001, "loss": 0.4158, "step": 7601 }, { "epoch": 0.20975591249133432, "grad_norm": 0.0032532603945583105, "learning_rate": 0.001, "loss": 0.4188, "step": 7602 }, { "epoch": 0.2097835046923987, "grad_norm": 0.002218460664153099, "learning_rate": 0.001, "loss": 0.4221, "step": 7603 }, { "epoch": 0.20981109689346306, "grad_norm": 0.00374056794680655, "learning_rate": 0.001, "loss": 0.412, "step": 7604 }, { "epoch": 0.20983868909452744, "grad_norm": 0.0029005371034145355, "learning_rate": 0.001, "loss": 0.3753, "step": 7605 }, { "epoch": 0.2098662812955918, "grad_norm": 0.0028765443712472916, "learning_rate": 0.001, "loss": 0.4026, "step": 7606 }, { "epoch": 0.20989387349665617, "grad_norm": 0.0032596492674201727, "learning_rate": 0.001, "loss": 0.4057, "step": 7607 }, { "epoch": 0.20992146569772055, "grad_norm": 0.0027232312131673098, "learning_rate": 0.001, "loss": 0.4062, "step": 7608 }, { "epoch": 0.2099490578987849, "grad_norm": 0.0047914572060108185, "learning_rate": 0.001, "loss": 0.383, "step": 7609 }, { "epoch": 0.20997665009984928, "grad_norm": 0.002376759424805641, "learning_rate": 0.001, "loss": 0.4403, "step": 7610 }, { "epoch": 0.21000424230091363, "grad_norm": 0.0035623747389763594, "learning_rate": 0.001, "loss": 0.3995, "step": 7611 }, { "epoch": 0.21003183450197802, "grad_norm": 0.003275655210018158, "learning_rate": 0.001, "loss": 0.4302, "step": 7612 }, { "epoch": 0.2100594267030424, "grad_norm": 0.0037639643996953964, "learning_rate": 0.001, "loss": 0.4191, "step": 7613 }, { "epoch": 0.21008701890410675, "grad_norm": 0.010343240574002266, "learning_rate": 0.001, "loss": 0.3744, "step": 7614 }, { "epoch": 0.21011461110517113, "grad_norm": 0.002769758924841881, "learning_rate": 0.001, "loss": 0.4141, "step": 7615 }, { "epoch": 0.21014220330623548, "grad_norm": 0.0020274778362363577, "learning_rate": 0.001, "loss": 0.4406, "step": 7616 }, { "epoch": 0.21016979550729986, "grad_norm": 0.0034504812210798264, "learning_rate": 0.001, "loss": 0.4136, "step": 7617 }, { "epoch": 0.21019738770836424, "grad_norm": 0.0032671205699443817, "learning_rate": 0.001, "loss": 0.3769, "step": 7618 }, { "epoch": 0.2102249799094286, "grad_norm": 0.0033758427016437054, "learning_rate": 0.001, "loss": 0.4191, "step": 7619 }, { "epoch": 0.21025257211049297, "grad_norm": 0.005549068097025156, "learning_rate": 0.001, "loss": 0.383, "step": 7620 }, { "epoch": 0.21028016431155733, "grad_norm": 0.0029898162465542555, "learning_rate": 0.001, "loss": 0.4265, "step": 7621 }, { "epoch": 0.2103077565126217, "grad_norm": 0.0020999349653720856, "learning_rate": 0.001, "loss": 0.405, "step": 7622 }, { "epoch": 0.2103353487136861, "grad_norm": 0.0028715464286506176, "learning_rate": 0.001, "loss": 0.3823, "step": 7623 }, { "epoch": 0.21036294091475044, "grad_norm": 0.0032411713618785143, "learning_rate": 0.001, "loss": 0.3973, "step": 7624 }, { "epoch": 0.21039053311581482, "grad_norm": 0.006850194651633501, "learning_rate": 0.001, "loss": 0.3864, "step": 7625 }, { "epoch": 0.21041812531687917, "grad_norm": 0.0031761995051056147, "learning_rate": 0.001, "loss": 0.3689, "step": 7626 }, { "epoch": 0.21044571751794355, "grad_norm": 0.0024642094504088163, "learning_rate": 0.001, "loss": 0.4037, "step": 7627 }, { "epoch": 0.21047330971900793, "grad_norm": 0.0035544894635677338, "learning_rate": 0.001, "loss": 0.3876, "step": 7628 }, { "epoch": 0.2105009019200723, "grad_norm": 0.009451251477003098, "learning_rate": 0.001, "loss": 0.394, "step": 7629 }, { "epoch": 0.21052849412113667, "grad_norm": 0.002780449576675892, "learning_rate": 0.001, "loss": 0.4073, "step": 7630 }, { "epoch": 0.21055608632220102, "grad_norm": 0.0032124193385243416, "learning_rate": 0.001, "loss": 0.4213, "step": 7631 }, { "epoch": 0.2105836785232654, "grad_norm": 0.004741044715046883, "learning_rate": 0.001, "loss": 0.3862, "step": 7632 }, { "epoch": 0.21061127072432978, "grad_norm": 0.005938271526247263, "learning_rate": 0.001, "loss": 0.3802, "step": 7633 }, { "epoch": 0.21063886292539413, "grad_norm": 0.003253635484725237, "learning_rate": 0.001, "loss": 0.362, "step": 7634 }, { "epoch": 0.2106664551264585, "grad_norm": 0.0031698490492999554, "learning_rate": 0.001, "loss": 0.4288, "step": 7635 }, { "epoch": 0.21069404732752287, "grad_norm": 0.0025998263154178858, "learning_rate": 0.001, "loss": 0.3606, "step": 7636 }, { "epoch": 0.21072163952858725, "grad_norm": 0.0023143659345805645, "learning_rate": 0.001, "loss": 0.4475, "step": 7637 }, { "epoch": 0.21074923172965163, "grad_norm": 0.0022685672156512737, "learning_rate": 0.001, "loss": 0.3564, "step": 7638 }, { "epoch": 0.21077682393071598, "grad_norm": 0.003365386975929141, "learning_rate": 0.001, "loss": 0.4087, "step": 7639 }, { "epoch": 0.21080441613178036, "grad_norm": 0.0031616310589015484, "learning_rate": 0.001, "loss": 0.3569, "step": 7640 }, { "epoch": 0.2108320083328447, "grad_norm": 0.0061267949640750885, "learning_rate": 0.001, "loss": 0.3689, "step": 7641 }, { "epoch": 0.2108596005339091, "grad_norm": 0.0033039050176739693, "learning_rate": 0.001, "loss": 0.4077, "step": 7642 }, { "epoch": 0.21088719273497347, "grad_norm": 0.002562372013926506, "learning_rate": 0.001, "loss": 0.3989, "step": 7643 }, { "epoch": 0.21091478493603782, "grad_norm": 0.0026763584464788437, "learning_rate": 0.001, "loss": 0.3675, "step": 7644 }, { "epoch": 0.2109423771371022, "grad_norm": 0.0032096514478325844, "learning_rate": 0.001, "loss": 0.3803, "step": 7645 }, { "epoch": 0.21096996933816656, "grad_norm": 0.002220802940428257, "learning_rate": 0.001, "loss": 0.4069, "step": 7646 }, { "epoch": 0.21099756153923094, "grad_norm": 0.0039492035284638405, "learning_rate": 0.001, "loss": 0.3918, "step": 7647 }, { "epoch": 0.21102515374029532, "grad_norm": 0.0027708462439477444, "learning_rate": 0.001, "loss": 0.4026, "step": 7648 }, { "epoch": 0.21105274594135967, "grad_norm": 0.002891121432185173, "learning_rate": 0.001, "loss": 0.4248, "step": 7649 }, { "epoch": 0.21108033814242405, "grad_norm": 0.008183618076145649, "learning_rate": 0.001, "loss": 0.3889, "step": 7650 }, { "epoch": 0.2111079303434884, "grad_norm": 0.0033024440053850412, "learning_rate": 0.001, "loss": 0.408, "step": 7651 }, { "epoch": 0.21113552254455278, "grad_norm": 0.0020405303221195936, "learning_rate": 0.001, "loss": 0.421, "step": 7652 }, { "epoch": 0.21116311474561716, "grad_norm": 0.004159392323344946, "learning_rate": 0.001, "loss": 0.3637, "step": 7653 }, { "epoch": 0.21119070694668152, "grad_norm": 0.004359352868050337, "learning_rate": 0.001, "loss": 0.4021, "step": 7654 }, { "epoch": 0.2112182991477459, "grad_norm": 0.003729519434273243, "learning_rate": 0.001, "loss": 0.4094, "step": 7655 }, { "epoch": 0.21124589134881025, "grad_norm": 0.0033560399897396564, "learning_rate": 0.001, "loss": 0.3683, "step": 7656 }, { "epoch": 0.21127348354987463, "grad_norm": 0.008382863365113735, "learning_rate": 0.001, "loss": 0.3876, "step": 7657 }, { "epoch": 0.211301075750939, "grad_norm": 0.002670932561159134, "learning_rate": 0.001, "loss": 0.3864, "step": 7658 }, { "epoch": 0.21132866795200336, "grad_norm": 0.0026596023235470057, "learning_rate": 0.001, "loss": 0.3925, "step": 7659 }, { "epoch": 0.21135626015306774, "grad_norm": 0.0030948955100029707, "learning_rate": 0.001, "loss": 0.4074, "step": 7660 }, { "epoch": 0.2113838523541321, "grad_norm": 0.005249246954917908, "learning_rate": 0.001, "loss": 0.3961, "step": 7661 }, { "epoch": 0.21141144455519648, "grad_norm": 0.0028843062464147806, "learning_rate": 0.001, "loss": 0.3791, "step": 7662 }, { "epoch": 0.21143903675626086, "grad_norm": 0.0027450129855424166, "learning_rate": 0.001, "loss": 0.3956, "step": 7663 }, { "epoch": 0.2114666289573252, "grad_norm": 0.01026302482932806, "learning_rate": 0.001, "loss": 0.3887, "step": 7664 }, { "epoch": 0.2114942211583896, "grad_norm": 0.0032274313271045685, "learning_rate": 0.001, "loss": 0.3951, "step": 7665 }, { "epoch": 0.21152181335945394, "grad_norm": 0.005412998143583536, "learning_rate": 0.001, "loss": 0.4233, "step": 7666 }, { "epoch": 0.21154940556051832, "grad_norm": 0.00447877449914813, "learning_rate": 0.001, "loss": 0.3871, "step": 7667 }, { "epoch": 0.2115769977615827, "grad_norm": 0.0035270792432129383, "learning_rate": 0.001, "loss": 0.3559, "step": 7668 }, { "epoch": 0.21160458996264706, "grad_norm": 0.009330617263913155, "learning_rate": 0.001, "loss": 0.4233, "step": 7669 }, { "epoch": 0.21163218216371144, "grad_norm": 0.003103942610323429, "learning_rate": 0.001, "loss": 0.3885, "step": 7670 }, { "epoch": 0.2116597743647758, "grad_norm": 0.0024018525145947933, "learning_rate": 0.001, "loss": 0.384, "step": 7671 }, { "epoch": 0.21168736656584017, "grad_norm": 0.0029490841552615166, "learning_rate": 0.001, "loss": 0.3935, "step": 7672 }, { "epoch": 0.21171495876690452, "grad_norm": 0.003537841374054551, "learning_rate": 0.001, "loss": 0.4237, "step": 7673 }, { "epoch": 0.2117425509679689, "grad_norm": 0.007800383027642965, "learning_rate": 0.001, "loss": 0.4145, "step": 7674 }, { "epoch": 0.21177014316903328, "grad_norm": 0.0025011899415403605, "learning_rate": 0.001, "loss": 0.4257, "step": 7675 }, { "epoch": 0.21179773537009763, "grad_norm": 0.004059432540088892, "learning_rate": 0.001, "loss": 0.3877, "step": 7676 }, { "epoch": 0.21182532757116201, "grad_norm": 0.0030776297207921743, "learning_rate": 0.001, "loss": 0.3552, "step": 7677 }, { "epoch": 0.21185291977222637, "grad_norm": 0.004162721801549196, "learning_rate": 0.001, "loss": 0.3905, "step": 7678 }, { "epoch": 0.21188051197329075, "grad_norm": 0.003027817001566291, "learning_rate": 0.001, "loss": 0.3937, "step": 7679 }, { "epoch": 0.21190810417435513, "grad_norm": 0.004329508636146784, "learning_rate": 0.001, "loss": 0.3874, "step": 7680 }, { "epoch": 0.21193569637541948, "grad_norm": 0.003321508876979351, "learning_rate": 0.001, "loss": 0.4219, "step": 7681 }, { "epoch": 0.21196328857648386, "grad_norm": 0.003910355735570192, "learning_rate": 0.001, "loss": 0.3825, "step": 7682 }, { "epoch": 0.2119908807775482, "grad_norm": 0.002138720592483878, "learning_rate": 0.001, "loss": 0.4045, "step": 7683 }, { "epoch": 0.2120184729786126, "grad_norm": 0.003873582696542144, "learning_rate": 0.001, "loss": 0.4121, "step": 7684 }, { "epoch": 0.21204606517967697, "grad_norm": 0.008897165767848492, "learning_rate": 0.001, "loss": 0.3989, "step": 7685 }, { "epoch": 0.21207365738074133, "grad_norm": 0.009049713611602783, "learning_rate": 0.001, "loss": 0.3947, "step": 7686 }, { "epoch": 0.2121012495818057, "grad_norm": 0.00294255162589252, "learning_rate": 0.001, "loss": 0.3908, "step": 7687 }, { "epoch": 0.21212884178287006, "grad_norm": 0.003320804564282298, "learning_rate": 0.001, "loss": 0.3711, "step": 7688 }, { "epoch": 0.21215643398393444, "grad_norm": 0.006188528146594763, "learning_rate": 0.001, "loss": 0.404, "step": 7689 }, { "epoch": 0.21218402618499882, "grad_norm": 0.002375251380726695, "learning_rate": 0.001, "loss": 0.4214, "step": 7690 }, { "epoch": 0.21221161838606317, "grad_norm": 0.00397599907591939, "learning_rate": 0.001, "loss": 0.4141, "step": 7691 }, { "epoch": 0.21223921058712755, "grad_norm": 0.002846440998837352, "learning_rate": 0.001, "loss": 0.3989, "step": 7692 }, { "epoch": 0.2122668027881919, "grad_norm": 0.005194092635065317, "learning_rate": 0.001, "loss": 0.3759, "step": 7693 }, { "epoch": 0.21229439498925629, "grad_norm": 0.004019029904156923, "learning_rate": 0.001, "loss": 0.4079, "step": 7694 }, { "epoch": 0.21232198719032067, "grad_norm": 0.0032543425913900137, "learning_rate": 0.001, "loss": 0.4172, "step": 7695 }, { "epoch": 0.21234957939138502, "grad_norm": 0.004645978100597858, "learning_rate": 0.001, "loss": 0.4443, "step": 7696 }, { "epoch": 0.2123771715924494, "grad_norm": 0.003786055836826563, "learning_rate": 0.001, "loss": 0.4281, "step": 7697 }, { "epoch": 0.21240476379351375, "grad_norm": 0.005406087264418602, "learning_rate": 0.001, "loss": 0.4052, "step": 7698 }, { "epoch": 0.21243235599457813, "grad_norm": 0.005549001973122358, "learning_rate": 0.001, "loss": 0.4038, "step": 7699 }, { "epoch": 0.2124599481956425, "grad_norm": 0.002728297607973218, "learning_rate": 0.001, "loss": 0.4014, "step": 7700 }, { "epoch": 0.21248754039670686, "grad_norm": 0.005214143078774214, "learning_rate": 0.001, "loss": 0.4125, "step": 7701 }, { "epoch": 0.21251513259777124, "grad_norm": 0.0025736193638294935, "learning_rate": 0.001, "loss": 0.4219, "step": 7702 }, { "epoch": 0.2125427247988356, "grad_norm": 0.00296932109631598, "learning_rate": 0.001, "loss": 0.3849, "step": 7703 }, { "epoch": 0.21257031699989998, "grad_norm": 0.003445189446210861, "learning_rate": 0.001, "loss": 0.3584, "step": 7704 }, { "epoch": 0.21259790920096436, "grad_norm": 0.0035377286840230227, "learning_rate": 0.001, "loss": 0.3979, "step": 7705 }, { "epoch": 0.2126255014020287, "grad_norm": 0.0029369608964771032, "learning_rate": 0.001, "loss": 0.4084, "step": 7706 }, { "epoch": 0.2126530936030931, "grad_norm": 0.0030892151407897472, "learning_rate": 0.001, "loss": 0.395, "step": 7707 }, { "epoch": 0.21268068580415744, "grad_norm": 0.004821309354156256, "learning_rate": 0.001, "loss": 0.4165, "step": 7708 }, { "epoch": 0.21270827800522182, "grad_norm": 0.003210441442206502, "learning_rate": 0.001, "loss": 0.4002, "step": 7709 }, { "epoch": 0.2127358702062862, "grad_norm": 0.0030719011556357145, "learning_rate": 0.001, "loss": 0.399, "step": 7710 }, { "epoch": 0.21276346240735056, "grad_norm": 0.0028634623158723116, "learning_rate": 0.001, "loss": 0.3503, "step": 7711 }, { "epoch": 0.21279105460841494, "grad_norm": 0.004046322777867317, "learning_rate": 0.001, "loss": 0.4125, "step": 7712 }, { "epoch": 0.2128186468094793, "grad_norm": 0.00294592441059649, "learning_rate": 0.001, "loss": 0.3417, "step": 7713 }, { "epoch": 0.21284623901054367, "grad_norm": 0.0021102267783135176, "learning_rate": 0.001, "loss": 0.4369, "step": 7714 }, { "epoch": 0.21287383121160805, "grad_norm": 0.0028041282203048468, "learning_rate": 0.001, "loss": 0.3816, "step": 7715 }, { "epoch": 0.2129014234126724, "grad_norm": 0.0034821259323507547, "learning_rate": 0.001, "loss": 0.3934, "step": 7716 }, { "epoch": 0.21292901561373678, "grad_norm": 0.003997510299086571, "learning_rate": 0.001, "loss": 0.3884, "step": 7717 }, { "epoch": 0.21295660781480114, "grad_norm": 0.00297334766946733, "learning_rate": 0.001, "loss": 0.4262, "step": 7718 }, { "epoch": 0.21298420001586552, "grad_norm": 0.0029889491852372885, "learning_rate": 0.001, "loss": 0.3567, "step": 7719 }, { "epoch": 0.2130117922169299, "grad_norm": 0.005800141021609306, "learning_rate": 0.001, "loss": 0.3693, "step": 7720 }, { "epoch": 0.21303938441799425, "grad_norm": 0.004063557833433151, "learning_rate": 0.001, "loss": 0.4006, "step": 7721 }, { "epoch": 0.21306697661905863, "grad_norm": 0.0052527179941535, "learning_rate": 0.001, "loss": 0.3863, "step": 7722 }, { "epoch": 0.21309456882012298, "grad_norm": 0.004150967579334974, "learning_rate": 0.001, "loss": 0.3818, "step": 7723 }, { "epoch": 0.21312216102118736, "grad_norm": 0.003142510773614049, "learning_rate": 0.001, "loss": 0.4367, "step": 7724 }, { "epoch": 0.21314975322225174, "grad_norm": 0.0024571139365434647, "learning_rate": 0.001, "loss": 0.397, "step": 7725 }, { "epoch": 0.2131773454233161, "grad_norm": 0.007491269148886204, "learning_rate": 0.001, "loss": 0.4254, "step": 7726 }, { "epoch": 0.21320493762438048, "grad_norm": 0.003059527836740017, "learning_rate": 0.001, "loss": 0.4296, "step": 7727 }, { "epoch": 0.21323252982544483, "grad_norm": 0.0030870058108121157, "learning_rate": 0.001, "loss": 0.4143, "step": 7728 }, { "epoch": 0.2132601220265092, "grad_norm": 0.002307609189301729, "learning_rate": 0.001, "loss": 0.404, "step": 7729 }, { "epoch": 0.2132877142275736, "grad_norm": 0.004910447634756565, "learning_rate": 0.001, "loss": 0.3676, "step": 7730 }, { "epoch": 0.21331530642863794, "grad_norm": 0.0032054877374321222, "learning_rate": 0.001, "loss": 0.4157, "step": 7731 }, { "epoch": 0.21334289862970232, "grad_norm": 0.004739253781735897, "learning_rate": 0.001, "loss": 0.3884, "step": 7732 }, { "epoch": 0.21337049083076667, "grad_norm": 0.0027026189491152763, "learning_rate": 0.001, "loss": 0.3723, "step": 7733 }, { "epoch": 0.21339808303183105, "grad_norm": 0.0031935579609125853, "learning_rate": 0.001, "loss": 0.4053, "step": 7734 }, { "epoch": 0.21342567523289543, "grad_norm": 0.0026129090692847967, "learning_rate": 0.001, "loss": 0.3873, "step": 7735 }, { "epoch": 0.2134532674339598, "grad_norm": 0.0030374531634151936, "learning_rate": 0.001, "loss": 0.3681, "step": 7736 }, { "epoch": 0.21348085963502417, "grad_norm": 0.0035555511713027954, "learning_rate": 0.001, "loss": 0.4156, "step": 7737 }, { "epoch": 0.21350845183608852, "grad_norm": 0.005884343292564154, "learning_rate": 0.001, "loss": 0.3908, "step": 7738 }, { "epoch": 0.2135360440371529, "grad_norm": 0.0030709283892065287, "learning_rate": 0.001, "loss": 0.376, "step": 7739 }, { "epoch": 0.21356363623821728, "grad_norm": 0.0027664625085890293, "learning_rate": 0.001, "loss": 0.4092, "step": 7740 }, { "epoch": 0.21359122843928163, "grad_norm": 0.005600049160420895, "learning_rate": 0.001, "loss": 0.3438, "step": 7741 }, { "epoch": 0.213618820640346, "grad_norm": 0.00319514493457973, "learning_rate": 0.001, "loss": 0.3608, "step": 7742 }, { "epoch": 0.21364641284141037, "grad_norm": 0.0033522641751915216, "learning_rate": 0.001, "loss": 0.4022, "step": 7743 }, { "epoch": 0.21367400504247475, "grad_norm": 0.0030811361502856016, "learning_rate": 0.001, "loss": 0.3919, "step": 7744 }, { "epoch": 0.21370159724353913, "grad_norm": 0.0026056889910250902, "learning_rate": 0.001, "loss": 0.4102, "step": 7745 }, { "epoch": 0.21372918944460348, "grad_norm": 0.0046222517266869545, "learning_rate": 0.001, "loss": 0.3717, "step": 7746 }, { "epoch": 0.21375678164566786, "grad_norm": 0.005610965192317963, "learning_rate": 0.001, "loss": 0.3899, "step": 7747 }, { "epoch": 0.2137843738467322, "grad_norm": 0.004591417033225298, "learning_rate": 0.001, "loss": 0.3818, "step": 7748 }, { "epoch": 0.2138119660477966, "grad_norm": 0.0033903804142028093, "learning_rate": 0.001, "loss": 0.396, "step": 7749 }, { "epoch": 0.21383955824886097, "grad_norm": 0.0037878809962421656, "learning_rate": 0.001, "loss": 0.4143, "step": 7750 }, { "epoch": 0.21386715044992533, "grad_norm": 0.006682871840894222, "learning_rate": 0.001, "loss": 0.3891, "step": 7751 }, { "epoch": 0.2138947426509897, "grad_norm": 0.0054921298287808895, "learning_rate": 0.001, "loss": 0.4035, "step": 7752 }, { "epoch": 0.21392233485205406, "grad_norm": 0.002533350605517626, "learning_rate": 0.001, "loss": 0.415, "step": 7753 }, { "epoch": 0.21394992705311844, "grad_norm": 0.004424331709742546, "learning_rate": 0.001, "loss": 0.4039, "step": 7754 }, { "epoch": 0.21397751925418282, "grad_norm": 0.003484759945422411, "learning_rate": 0.001, "loss": 0.4224, "step": 7755 }, { "epoch": 0.21400511145524717, "grad_norm": 0.0028570671565830708, "learning_rate": 0.001, "loss": 0.3774, "step": 7756 }, { "epoch": 0.21403270365631155, "grad_norm": 0.0032984951976686716, "learning_rate": 0.001, "loss": 0.405, "step": 7757 }, { "epoch": 0.2140602958573759, "grad_norm": 0.002993236295878887, "learning_rate": 0.001, "loss": 0.381, "step": 7758 }, { "epoch": 0.21408788805844028, "grad_norm": 0.0039431145414710045, "learning_rate": 0.001, "loss": 0.3957, "step": 7759 }, { "epoch": 0.21411548025950466, "grad_norm": 0.0023782916832715273, "learning_rate": 0.001, "loss": 0.3813, "step": 7760 }, { "epoch": 0.21414307246056902, "grad_norm": 0.005132326390594244, "learning_rate": 0.001, "loss": 0.3922, "step": 7761 }, { "epoch": 0.2141706646616334, "grad_norm": 0.004260215442627668, "learning_rate": 0.001, "loss": 0.3811, "step": 7762 }, { "epoch": 0.21419825686269775, "grad_norm": 0.00576009601354599, "learning_rate": 0.001, "loss": 0.431, "step": 7763 }, { "epoch": 0.21422584906376213, "grad_norm": 0.002464310498908162, "learning_rate": 0.001, "loss": 0.4194, "step": 7764 }, { "epoch": 0.21425344126482648, "grad_norm": 0.0022607767023146152, "learning_rate": 0.001, "loss": 0.4046, "step": 7765 }, { "epoch": 0.21428103346589086, "grad_norm": 0.002766657853499055, "learning_rate": 0.001, "loss": 0.3995, "step": 7766 }, { "epoch": 0.21430862566695524, "grad_norm": 0.002656952477991581, "learning_rate": 0.001, "loss": 0.3916, "step": 7767 }, { "epoch": 0.2143362178680196, "grad_norm": 0.005709274671971798, "learning_rate": 0.001, "loss": 0.3811, "step": 7768 }, { "epoch": 0.21436381006908398, "grad_norm": 0.0032649333588778973, "learning_rate": 0.001, "loss": 0.4037, "step": 7769 }, { "epoch": 0.21439140227014833, "grad_norm": 0.0026809549890458584, "learning_rate": 0.001, "loss": 0.3998, "step": 7770 }, { "epoch": 0.2144189944712127, "grad_norm": 0.010057074949145317, "learning_rate": 0.001, "loss": 0.3962, "step": 7771 }, { "epoch": 0.2144465866722771, "grad_norm": 0.003958344459533691, "learning_rate": 0.001, "loss": 0.4137, "step": 7772 }, { "epoch": 0.21447417887334144, "grad_norm": 0.004084006417542696, "learning_rate": 0.001, "loss": 0.4316, "step": 7773 }, { "epoch": 0.21450177107440582, "grad_norm": 0.0051500373519957066, "learning_rate": 0.001, "loss": 0.4043, "step": 7774 }, { "epoch": 0.21452936327547018, "grad_norm": 0.003794713644310832, "learning_rate": 0.001, "loss": 0.4239, "step": 7775 }, { "epoch": 0.21455695547653456, "grad_norm": 0.02025657705962658, "learning_rate": 0.001, "loss": 0.4091, "step": 7776 }, { "epoch": 0.21458454767759894, "grad_norm": 0.0035089454613626003, "learning_rate": 0.001, "loss": 0.3933, "step": 7777 }, { "epoch": 0.2146121398786633, "grad_norm": 0.0029882427770644426, "learning_rate": 0.001, "loss": 0.4157, "step": 7778 }, { "epoch": 0.21463973207972767, "grad_norm": 0.0025343652814626694, "learning_rate": 0.001, "loss": 0.391, "step": 7779 }, { "epoch": 0.21466732428079202, "grad_norm": 0.0028338837437331676, "learning_rate": 0.001, "loss": 0.3991, "step": 7780 }, { "epoch": 0.2146949164818564, "grad_norm": 0.0033451595809310675, "learning_rate": 0.001, "loss": 0.4014, "step": 7781 }, { "epoch": 0.21472250868292078, "grad_norm": 0.0033898449037224054, "learning_rate": 0.001, "loss": 0.3728, "step": 7782 }, { "epoch": 0.21475010088398513, "grad_norm": 0.0038336054421961308, "learning_rate": 0.001, "loss": 0.4243, "step": 7783 }, { "epoch": 0.21477769308504951, "grad_norm": 0.012188595719635487, "learning_rate": 0.001, "loss": 0.4066, "step": 7784 }, { "epoch": 0.21480528528611387, "grad_norm": 0.005000379402190447, "learning_rate": 0.001, "loss": 0.3877, "step": 7785 }, { "epoch": 0.21483287748717825, "grad_norm": 0.00532375555485487, "learning_rate": 0.001, "loss": 0.3766, "step": 7786 }, { "epoch": 0.21486046968824263, "grad_norm": 0.004247152712196112, "learning_rate": 0.001, "loss": 0.4009, "step": 7787 }, { "epoch": 0.21488806188930698, "grad_norm": 0.0024253970477730036, "learning_rate": 0.001, "loss": 0.4233, "step": 7788 }, { "epoch": 0.21491565409037136, "grad_norm": 0.0028955123852938414, "learning_rate": 0.001, "loss": 0.3938, "step": 7789 }, { "epoch": 0.2149432462914357, "grad_norm": 0.0019871145486831665, "learning_rate": 0.001, "loss": 0.4043, "step": 7790 }, { "epoch": 0.2149708384925001, "grad_norm": 0.0026270560920238495, "learning_rate": 0.001, "loss": 0.4235, "step": 7791 }, { "epoch": 0.21499843069356447, "grad_norm": 0.0022613939363509417, "learning_rate": 0.001, "loss": 0.399, "step": 7792 }, { "epoch": 0.21502602289462883, "grad_norm": 0.0022467582020908594, "learning_rate": 0.001, "loss": 0.4139, "step": 7793 }, { "epoch": 0.2150536150956932, "grad_norm": 0.003144500544294715, "learning_rate": 0.001, "loss": 0.4121, "step": 7794 }, { "epoch": 0.21508120729675756, "grad_norm": 0.004727689083665609, "learning_rate": 0.001, "loss": 0.3643, "step": 7795 }, { "epoch": 0.21510879949782194, "grad_norm": 0.0030138723086565733, "learning_rate": 0.001, "loss": 0.4194, "step": 7796 }, { "epoch": 0.21513639169888632, "grad_norm": 0.003997731953859329, "learning_rate": 0.001, "loss": 0.4116, "step": 7797 }, { "epoch": 0.21516398389995067, "grad_norm": 0.0034538882318884134, "learning_rate": 0.001, "loss": 0.4138, "step": 7798 }, { "epoch": 0.21519157610101505, "grad_norm": 0.0030911024659872055, "learning_rate": 0.001, "loss": 0.4247, "step": 7799 }, { "epoch": 0.2152191683020794, "grad_norm": 0.006776158697903156, "learning_rate": 0.001, "loss": 0.406, "step": 7800 }, { "epoch": 0.21524676050314379, "grad_norm": 0.0032637538388371468, "learning_rate": 0.001, "loss": 0.392, "step": 7801 }, { "epoch": 0.21527435270420817, "grad_norm": 0.0028168826829642057, "learning_rate": 0.001, "loss": 0.3902, "step": 7802 }, { "epoch": 0.21530194490527252, "grad_norm": 0.002768394071608782, "learning_rate": 0.001, "loss": 0.3795, "step": 7803 }, { "epoch": 0.2153295371063369, "grad_norm": 0.004282908979803324, "learning_rate": 0.001, "loss": 0.3893, "step": 7804 }, { "epoch": 0.21535712930740125, "grad_norm": 0.002293472411110997, "learning_rate": 0.001, "loss": 0.4139, "step": 7805 }, { "epoch": 0.21538472150846563, "grad_norm": 0.003117901738733053, "learning_rate": 0.001, "loss": 0.4014, "step": 7806 }, { "epoch": 0.21541231370953, "grad_norm": 0.0030702080111950636, "learning_rate": 0.001, "loss": 0.3675, "step": 7807 }, { "epoch": 0.21543990591059436, "grad_norm": 0.0037037963047623634, "learning_rate": 0.001, "loss": 0.4097, "step": 7808 }, { "epoch": 0.21546749811165875, "grad_norm": 0.003569400403648615, "learning_rate": 0.001, "loss": 0.4125, "step": 7809 }, { "epoch": 0.2154950903127231, "grad_norm": 0.005185401998460293, "learning_rate": 0.001, "loss": 0.4272, "step": 7810 }, { "epoch": 0.21552268251378748, "grad_norm": 0.0026427856646478176, "learning_rate": 0.001, "loss": 0.4301, "step": 7811 }, { "epoch": 0.21555027471485186, "grad_norm": 0.0026996051892638206, "learning_rate": 0.001, "loss": 0.4272, "step": 7812 }, { "epoch": 0.2155778669159162, "grad_norm": 0.0026872565504163504, "learning_rate": 0.001, "loss": 0.3894, "step": 7813 }, { "epoch": 0.2156054591169806, "grad_norm": 0.0033470892813056707, "learning_rate": 0.001, "loss": 0.3964, "step": 7814 }, { "epoch": 0.21563305131804494, "grad_norm": 0.002979893935844302, "learning_rate": 0.001, "loss": 0.4182, "step": 7815 }, { "epoch": 0.21566064351910932, "grad_norm": 0.002338412217795849, "learning_rate": 0.001, "loss": 0.4119, "step": 7816 }, { "epoch": 0.2156882357201737, "grad_norm": 0.0026039378717541695, "learning_rate": 0.001, "loss": 0.4217, "step": 7817 }, { "epoch": 0.21571582792123806, "grad_norm": 0.004928927402943373, "learning_rate": 0.001, "loss": 0.3915, "step": 7818 }, { "epoch": 0.21574342012230244, "grad_norm": 0.003969724290072918, "learning_rate": 0.001, "loss": 0.4052, "step": 7819 }, { "epoch": 0.2157710123233668, "grad_norm": 0.004132222384214401, "learning_rate": 0.001, "loss": 0.4446, "step": 7820 }, { "epoch": 0.21579860452443117, "grad_norm": 0.003034410998225212, "learning_rate": 0.001, "loss": 0.4339, "step": 7821 }, { "epoch": 0.21582619672549555, "grad_norm": 0.0034465952776372433, "learning_rate": 0.001, "loss": 0.4024, "step": 7822 }, { "epoch": 0.2158537889265599, "grad_norm": 0.0028153613675385714, "learning_rate": 0.001, "loss": 0.4302, "step": 7823 }, { "epoch": 0.21588138112762428, "grad_norm": 0.007028549909591675, "learning_rate": 0.001, "loss": 0.3774, "step": 7824 }, { "epoch": 0.21590897332868864, "grad_norm": 0.0035245222970843315, "learning_rate": 0.001, "loss": 0.4092, "step": 7825 }, { "epoch": 0.21593656552975302, "grad_norm": 0.0027619630564004183, "learning_rate": 0.001, "loss": 0.3975, "step": 7826 }, { "epoch": 0.2159641577308174, "grad_norm": 0.0029921175446361303, "learning_rate": 0.001, "loss": 0.3609, "step": 7827 }, { "epoch": 0.21599174993188175, "grad_norm": 0.004075322765856981, "learning_rate": 0.001, "loss": 0.3674, "step": 7828 }, { "epoch": 0.21601934213294613, "grad_norm": 0.004721477162092924, "learning_rate": 0.001, "loss": 0.4119, "step": 7829 }, { "epoch": 0.21604693433401048, "grad_norm": 0.00825839675962925, "learning_rate": 0.001, "loss": 0.3915, "step": 7830 }, { "epoch": 0.21607452653507486, "grad_norm": 0.0029653101228177547, "learning_rate": 0.001, "loss": 0.4062, "step": 7831 }, { "epoch": 0.21610211873613924, "grad_norm": 0.004584647715091705, "learning_rate": 0.001, "loss": 0.3687, "step": 7832 }, { "epoch": 0.2161297109372036, "grad_norm": 0.0019515285966917872, "learning_rate": 0.001, "loss": 0.4817, "step": 7833 }, { "epoch": 0.21615730313826798, "grad_norm": 0.0029263091273605824, "learning_rate": 0.001, "loss": 0.3899, "step": 7834 }, { "epoch": 0.21618489533933233, "grad_norm": 0.002956991782411933, "learning_rate": 0.001, "loss": 0.3953, "step": 7835 }, { "epoch": 0.2162124875403967, "grad_norm": 0.0037960107438266277, "learning_rate": 0.001, "loss": 0.3888, "step": 7836 }, { "epoch": 0.2162400797414611, "grad_norm": 0.0029722515027970076, "learning_rate": 0.001, "loss": 0.3817, "step": 7837 }, { "epoch": 0.21626767194252544, "grad_norm": 0.0032150924671441317, "learning_rate": 0.001, "loss": 0.3977, "step": 7838 }, { "epoch": 0.21629526414358982, "grad_norm": 0.005019112955778837, "learning_rate": 0.001, "loss": 0.4123, "step": 7839 }, { "epoch": 0.21632285634465417, "grad_norm": 0.0028452936094254255, "learning_rate": 0.001, "loss": 0.3936, "step": 7840 }, { "epoch": 0.21635044854571855, "grad_norm": 0.0035461215302348137, "learning_rate": 0.001, "loss": 0.4054, "step": 7841 }, { "epoch": 0.21637804074678293, "grad_norm": 0.0051398370414972305, "learning_rate": 0.001, "loss": 0.3803, "step": 7842 }, { "epoch": 0.2164056329478473, "grad_norm": 0.0029704368207603693, "learning_rate": 0.001, "loss": 0.4225, "step": 7843 }, { "epoch": 0.21643322514891167, "grad_norm": 0.004192579071968794, "learning_rate": 0.001, "loss": 0.3837, "step": 7844 }, { "epoch": 0.21646081734997602, "grad_norm": 0.0036786114796996117, "learning_rate": 0.001, "loss": 0.3991, "step": 7845 }, { "epoch": 0.2164884095510404, "grad_norm": 0.0032894443720579147, "learning_rate": 0.001, "loss": 0.3671, "step": 7846 }, { "epoch": 0.21651600175210478, "grad_norm": 0.00437202537432313, "learning_rate": 0.001, "loss": 0.4354, "step": 7847 }, { "epoch": 0.21654359395316913, "grad_norm": 0.004263371229171753, "learning_rate": 0.001, "loss": 0.3765, "step": 7848 }, { "epoch": 0.2165711861542335, "grad_norm": 0.0023139826953411102, "learning_rate": 0.001, "loss": 0.3926, "step": 7849 }, { "epoch": 0.21659877835529787, "grad_norm": 0.002766036195680499, "learning_rate": 0.001, "loss": 0.3959, "step": 7850 }, { "epoch": 0.21662637055636225, "grad_norm": 0.004952535964548588, "learning_rate": 0.001, "loss": 0.3839, "step": 7851 }, { "epoch": 0.21665396275742663, "grad_norm": 0.008287088945508003, "learning_rate": 0.001, "loss": 0.3569, "step": 7852 }, { "epoch": 0.21668155495849098, "grad_norm": 0.019219983369112015, "learning_rate": 0.001, "loss": 0.4082, "step": 7853 }, { "epoch": 0.21670914715955536, "grad_norm": 0.0029021003283560276, "learning_rate": 0.001, "loss": 0.4005, "step": 7854 }, { "epoch": 0.2167367393606197, "grad_norm": 0.004268075339496136, "learning_rate": 0.001, "loss": 0.3729, "step": 7855 }, { "epoch": 0.2167643315616841, "grad_norm": 0.0038819487672299147, "learning_rate": 0.001, "loss": 0.366, "step": 7856 }, { "epoch": 0.21679192376274847, "grad_norm": 0.0033485370222479105, "learning_rate": 0.001, "loss": 0.3949, "step": 7857 }, { "epoch": 0.21681951596381283, "grad_norm": 0.004031899850815535, "learning_rate": 0.001, "loss": 0.3687, "step": 7858 }, { "epoch": 0.2168471081648772, "grad_norm": 0.0028309531044214964, "learning_rate": 0.001, "loss": 0.3855, "step": 7859 }, { "epoch": 0.21687470036594156, "grad_norm": 0.005679224617779255, "learning_rate": 0.001, "loss": 0.3675, "step": 7860 }, { "epoch": 0.21690229256700594, "grad_norm": 0.0028747431933879852, "learning_rate": 0.001, "loss": 0.4003, "step": 7861 }, { "epoch": 0.2169298847680703, "grad_norm": 0.0022720531560480595, "learning_rate": 0.001, "loss": 0.4386, "step": 7862 }, { "epoch": 0.21695747696913467, "grad_norm": 0.0037069146055728197, "learning_rate": 0.001, "loss": 0.399, "step": 7863 }, { "epoch": 0.21698506917019905, "grad_norm": 0.004593148361891508, "learning_rate": 0.001, "loss": 0.3713, "step": 7864 }, { "epoch": 0.2170126613712634, "grad_norm": 0.004522950388491154, "learning_rate": 0.001, "loss": 0.3679, "step": 7865 }, { "epoch": 0.21704025357232778, "grad_norm": 0.0038752169348299503, "learning_rate": 0.001, "loss": 0.4058, "step": 7866 }, { "epoch": 0.21706784577339214, "grad_norm": 0.002294728998094797, "learning_rate": 0.001, "loss": 0.4457, "step": 7867 }, { "epoch": 0.21709543797445652, "grad_norm": 0.002397543750703335, "learning_rate": 0.001, "loss": 0.3893, "step": 7868 }, { "epoch": 0.2171230301755209, "grad_norm": 0.009867599233984947, "learning_rate": 0.001, "loss": 0.3669, "step": 7869 }, { "epoch": 0.21715062237658525, "grad_norm": 0.003990727476775646, "learning_rate": 0.001, "loss": 0.3495, "step": 7870 }, { "epoch": 0.21717821457764963, "grad_norm": 0.003901657648384571, "learning_rate": 0.001, "loss": 0.3531, "step": 7871 }, { "epoch": 0.21720580677871398, "grad_norm": 0.00325774191878736, "learning_rate": 0.001, "loss": 0.3673, "step": 7872 }, { "epoch": 0.21723339897977836, "grad_norm": 0.003350186161696911, "learning_rate": 0.001, "loss": 0.4241, "step": 7873 }, { "epoch": 0.21726099118084274, "grad_norm": 0.002645864151418209, "learning_rate": 0.001, "loss": 0.3833, "step": 7874 }, { "epoch": 0.2172885833819071, "grad_norm": 0.006232825573533773, "learning_rate": 0.001, "loss": 0.3636, "step": 7875 }, { "epoch": 0.21731617558297148, "grad_norm": 0.0021961445454508066, "learning_rate": 0.001, "loss": 0.3775, "step": 7876 }, { "epoch": 0.21734376778403583, "grad_norm": 0.0030032650101929903, "learning_rate": 0.001, "loss": 0.3906, "step": 7877 }, { "epoch": 0.2173713599851002, "grad_norm": 0.006241418421268463, "learning_rate": 0.001, "loss": 0.3826, "step": 7878 }, { "epoch": 0.2173989521861646, "grad_norm": 0.003943136427551508, "learning_rate": 0.001, "loss": 0.4231, "step": 7879 }, { "epoch": 0.21742654438722894, "grad_norm": 0.0024433995131403208, "learning_rate": 0.001, "loss": 0.4239, "step": 7880 }, { "epoch": 0.21745413658829332, "grad_norm": 0.0043607852421700954, "learning_rate": 0.001, "loss": 0.4227, "step": 7881 }, { "epoch": 0.21748172878935768, "grad_norm": 0.004530392121523619, "learning_rate": 0.001, "loss": 0.4202, "step": 7882 }, { "epoch": 0.21750932099042206, "grad_norm": 0.0024341184180229902, "learning_rate": 0.001, "loss": 0.4491, "step": 7883 }, { "epoch": 0.21753691319148644, "grad_norm": 0.003600149182602763, "learning_rate": 0.001, "loss": 0.3859, "step": 7884 }, { "epoch": 0.2175645053925508, "grad_norm": 0.002762843621894717, "learning_rate": 0.001, "loss": 0.4081, "step": 7885 }, { "epoch": 0.21759209759361517, "grad_norm": 0.0029462978709489107, "learning_rate": 0.001, "loss": 0.375, "step": 7886 }, { "epoch": 0.21761968979467952, "grad_norm": 0.0023480509407818317, "learning_rate": 0.001, "loss": 0.4116, "step": 7887 }, { "epoch": 0.2176472819957439, "grad_norm": 0.002522410824894905, "learning_rate": 0.001, "loss": 0.434, "step": 7888 }, { "epoch": 0.21767487419680828, "grad_norm": 0.0022286747116595507, "learning_rate": 0.001, "loss": 0.4345, "step": 7889 }, { "epoch": 0.21770246639787263, "grad_norm": 0.002925209002569318, "learning_rate": 0.001, "loss": 0.3965, "step": 7890 }, { "epoch": 0.21773005859893702, "grad_norm": 0.003066236851736903, "learning_rate": 0.001, "loss": 0.3512, "step": 7891 }, { "epoch": 0.21775765080000137, "grad_norm": 0.003831464098766446, "learning_rate": 0.001, "loss": 0.4169, "step": 7892 }, { "epoch": 0.21778524300106575, "grad_norm": 0.0033246371895074844, "learning_rate": 0.001, "loss": 0.4158, "step": 7893 }, { "epoch": 0.21781283520213013, "grad_norm": 0.0038893541786819696, "learning_rate": 0.001, "loss": 0.3877, "step": 7894 }, { "epoch": 0.21784042740319448, "grad_norm": 0.003152028191834688, "learning_rate": 0.001, "loss": 0.3895, "step": 7895 }, { "epoch": 0.21786801960425886, "grad_norm": 0.004073096439242363, "learning_rate": 0.001, "loss": 0.4466, "step": 7896 }, { "epoch": 0.2178956118053232, "grad_norm": 0.00838442798703909, "learning_rate": 0.001, "loss": 0.43, "step": 7897 }, { "epoch": 0.2179232040063876, "grad_norm": 0.004715254995971918, "learning_rate": 0.001, "loss": 0.4276, "step": 7898 }, { "epoch": 0.21795079620745197, "grad_norm": 0.005644774530082941, "learning_rate": 0.001, "loss": 0.3796, "step": 7899 }, { "epoch": 0.21797838840851633, "grad_norm": 0.0035621551796793938, "learning_rate": 0.001, "loss": 0.4021, "step": 7900 }, { "epoch": 0.2180059806095807, "grad_norm": 0.003715448547154665, "learning_rate": 0.001, "loss": 0.4418, "step": 7901 }, { "epoch": 0.21803357281064506, "grad_norm": 0.0025230993051081896, "learning_rate": 0.001, "loss": 0.389, "step": 7902 }, { "epoch": 0.21806116501170944, "grad_norm": 0.0039021014235913754, "learning_rate": 0.001, "loss": 0.3779, "step": 7903 }, { "epoch": 0.21808875721277382, "grad_norm": 0.004864577203989029, "learning_rate": 0.001, "loss": 0.4003, "step": 7904 }, { "epoch": 0.21811634941383817, "grad_norm": 0.007110804785043001, "learning_rate": 0.001, "loss": 0.4061, "step": 7905 }, { "epoch": 0.21814394161490255, "grad_norm": 0.004145526327192783, "learning_rate": 0.001, "loss": 0.398, "step": 7906 }, { "epoch": 0.2181715338159669, "grad_norm": 0.003214552765712142, "learning_rate": 0.001, "loss": 0.4061, "step": 7907 }, { "epoch": 0.2181991260170313, "grad_norm": 0.0025902027264237404, "learning_rate": 0.001, "loss": 0.4325, "step": 7908 }, { "epoch": 0.21822671821809567, "grad_norm": 0.005056621506810188, "learning_rate": 0.001, "loss": 0.384, "step": 7909 }, { "epoch": 0.21825431041916002, "grad_norm": 0.0027781168464571238, "learning_rate": 0.001, "loss": 0.3889, "step": 7910 }, { "epoch": 0.2182819026202244, "grad_norm": 0.002909269416704774, "learning_rate": 0.001, "loss": 0.409, "step": 7911 }, { "epoch": 0.21830949482128875, "grad_norm": 0.003969018347561359, "learning_rate": 0.001, "loss": 0.415, "step": 7912 }, { "epoch": 0.21833708702235313, "grad_norm": 0.002928847912698984, "learning_rate": 0.001, "loss": 0.4139, "step": 7913 }, { "epoch": 0.2183646792234175, "grad_norm": 0.003366072429344058, "learning_rate": 0.001, "loss": 0.3787, "step": 7914 }, { "epoch": 0.21839227142448187, "grad_norm": 0.003718828782439232, "learning_rate": 0.001, "loss": 0.4136, "step": 7915 }, { "epoch": 0.21841986362554625, "grad_norm": 0.0029193959198892117, "learning_rate": 0.001, "loss": 0.3749, "step": 7916 }, { "epoch": 0.2184474558266106, "grad_norm": 0.002285629976540804, "learning_rate": 0.001, "loss": 0.4077, "step": 7917 }, { "epoch": 0.21847504802767498, "grad_norm": 0.002450938569381833, "learning_rate": 0.001, "loss": 0.3821, "step": 7918 }, { "epoch": 0.21850264022873936, "grad_norm": 0.002341889077797532, "learning_rate": 0.001, "loss": 0.4225, "step": 7919 }, { "epoch": 0.2185302324298037, "grad_norm": 0.003041157266125083, "learning_rate": 0.001, "loss": 0.4094, "step": 7920 }, { "epoch": 0.2185578246308681, "grad_norm": 0.004077001940459013, "learning_rate": 0.001, "loss": 0.4035, "step": 7921 }, { "epoch": 0.21858541683193244, "grad_norm": 0.002720333868637681, "learning_rate": 0.001, "loss": 0.3965, "step": 7922 }, { "epoch": 0.21861300903299682, "grad_norm": 0.005403697956353426, "learning_rate": 0.001, "loss": 0.3741, "step": 7923 }, { "epoch": 0.2186406012340612, "grad_norm": 0.004800851922482252, "learning_rate": 0.001, "loss": 0.3859, "step": 7924 }, { "epoch": 0.21866819343512556, "grad_norm": 0.003321266733109951, "learning_rate": 0.001, "loss": 0.4121, "step": 7925 }, { "epoch": 0.21869578563618994, "grad_norm": 0.003606748068705201, "learning_rate": 0.001, "loss": 0.3809, "step": 7926 }, { "epoch": 0.2187233778372543, "grad_norm": 0.002816277788951993, "learning_rate": 0.001, "loss": 0.4194, "step": 7927 }, { "epoch": 0.21875097003831867, "grad_norm": 0.0028528219554573298, "learning_rate": 0.001, "loss": 0.423, "step": 7928 }, { "epoch": 0.21877856223938305, "grad_norm": 0.00370188825763762, "learning_rate": 0.001, "loss": 0.3935, "step": 7929 }, { "epoch": 0.2188061544404474, "grad_norm": 0.00391001021489501, "learning_rate": 0.001, "loss": 0.357, "step": 7930 }, { "epoch": 0.21883374664151178, "grad_norm": 0.002469925908371806, "learning_rate": 0.001, "loss": 0.4025, "step": 7931 }, { "epoch": 0.21886133884257614, "grad_norm": 0.004554017446935177, "learning_rate": 0.001, "loss": 0.4178, "step": 7932 }, { "epoch": 0.21888893104364052, "grad_norm": 0.005216664168983698, "learning_rate": 0.001, "loss": 0.404, "step": 7933 }, { "epoch": 0.2189165232447049, "grad_norm": 0.002773087937384844, "learning_rate": 0.001, "loss": 0.3708, "step": 7934 }, { "epoch": 0.21894411544576925, "grad_norm": 0.002156183123588562, "learning_rate": 0.001, "loss": 0.4021, "step": 7935 }, { "epoch": 0.21897170764683363, "grad_norm": 0.0025752519723027945, "learning_rate": 0.001, "loss": 0.4342, "step": 7936 }, { "epoch": 0.21899929984789798, "grad_norm": 0.002592964330688119, "learning_rate": 0.001, "loss": 0.3793, "step": 7937 }, { "epoch": 0.21902689204896236, "grad_norm": 0.0023423507809638977, "learning_rate": 0.001, "loss": 0.4115, "step": 7938 }, { "epoch": 0.21905448425002674, "grad_norm": 0.0025791767984628677, "learning_rate": 0.001, "loss": 0.3848, "step": 7939 }, { "epoch": 0.2190820764510911, "grad_norm": 0.0030768734868615866, "learning_rate": 0.001, "loss": 0.384, "step": 7940 }, { "epoch": 0.21910966865215548, "grad_norm": 0.0035807385575026274, "learning_rate": 0.001, "loss": 0.4136, "step": 7941 }, { "epoch": 0.21913726085321983, "grad_norm": 0.0031572608277201653, "learning_rate": 0.001, "loss": 0.3611, "step": 7942 }, { "epoch": 0.2191648530542842, "grad_norm": 0.0027867392636835575, "learning_rate": 0.001, "loss": 0.3867, "step": 7943 }, { "epoch": 0.2191924452553486, "grad_norm": 0.0025052884593605995, "learning_rate": 0.001, "loss": 0.4148, "step": 7944 }, { "epoch": 0.21922003745641294, "grad_norm": 0.0027730814181268215, "learning_rate": 0.001, "loss": 0.4021, "step": 7945 }, { "epoch": 0.21924762965747732, "grad_norm": 0.0026065954007208347, "learning_rate": 0.001, "loss": 0.4361, "step": 7946 }, { "epoch": 0.21927522185854167, "grad_norm": 0.00736627820879221, "learning_rate": 0.001, "loss": 0.3858, "step": 7947 }, { "epoch": 0.21930281405960605, "grad_norm": 0.003984878305345774, "learning_rate": 0.001, "loss": 0.4282, "step": 7948 }, { "epoch": 0.21933040626067044, "grad_norm": 0.005414186976850033, "learning_rate": 0.001, "loss": 0.4019, "step": 7949 }, { "epoch": 0.2193579984617348, "grad_norm": 0.0026020752266049385, "learning_rate": 0.001, "loss": 0.4018, "step": 7950 }, { "epoch": 0.21938559066279917, "grad_norm": 0.0026629299391061068, "learning_rate": 0.001, "loss": 0.4099, "step": 7951 }, { "epoch": 0.21941318286386352, "grad_norm": 0.0022089278791099787, "learning_rate": 0.001, "loss": 0.4051, "step": 7952 }, { "epoch": 0.2194407750649279, "grad_norm": 0.0028548568952828646, "learning_rate": 0.001, "loss": 0.3881, "step": 7953 }, { "epoch": 0.21946836726599225, "grad_norm": 0.0029203901067376137, "learning_rate": 0.001, "loss": 0.4204, "step": 7954 }, { "epoch": 0.21949595946705663, "grad_norm": 0.0032568967435508966, "learning_rate": 0.001, "loss": 0.4321, "step": 7955 }, { "epoch": 0.21952355166812101, "grad_norm": 0.005700434558093548, "learning_rate": 0.001, "loss": 0.3849, "step": 7956 }, { "epoch": 0.21955114386918537, "grad_norm": 0.0026754315476864576, "learning_rate": 0.001, "loss": 0.4113, "step": 7957 }, { "epoch": 0.21957873607024975, "grad_norm": 0.004874562378972769, "learning_rate": 0.001, "loss": 0.3982, "step": 7958 }, { "epoch": 0.2196063282713141, "grad_norm": 0.0051938071846961975, "learning_rate": 0.001, "loss": 0.412, "step": 7959 }, { "epoch": 0.21963392047237848, "grad_norm": 0.005463124252855778, "learning_rate": 0.001, "loss": 0.392, "step": 7960 }, { "epoch": 0.21966151267344286, "grad_norm": 0.0026376366149634123, "learning_rate": 0.001, "loss": 0.3492, "step": 7961 }, { "epoch": 0.2196891048745072, "grad_norm": 0.0029911526944488287, "learning_rate": 0.001, "loss": 0.4019, "step": 7962 }, { "epoch": 0.2197166970755716, "grad_norm": 0.002725818660110235, "learning_rate": 0.001, "loss": 0.3863, "step": 7963 }, { "epoch": 0.21974428927663595, "grad_norm": 0.0030495068058371544, "learning_rate": 0.001, "loss": 0.4246, "step": 7964 }, { "epoch": 0.21977188147770033, "grad_norm": 0.0032879766076803207, "learning_rate": 0.001, "loss": 0.3787, "step": 7965 }, { "epoch": 0.2197994736787647, "grad_norm": 0.005279941018670797, "learning_rate": 0.001, "loss": 0.3665, "step": 7966 }, { "epoch": 0.21982706587982906, "grad_norm": 0.004807317163795233, "learning_rate": 0.001, "loss": 0.4117, "step": 7967 }, { "epoch": 0.21985465808089344, "grad_norm": 0.004063909407705069, "learning_rate": 0.001, "loss": 0.38, "step": 7968 }, { "epoch": 0.2198822502819578, "grad_norm": 0.002503841184079647, "learning_rate": 0.001, "loss": 0.3815, "step": 7969 }, { "epoch": 0.21990984248302217, "grad_norm": 0.002414180664345622, "learning_rate": 0.001, "loss": 0.3916, "step": 7970 }, { "epoch": 0.21993743468408655, "grad_norm": 0.004260845948010683, "learning_rate": 0.001, "loss": 0.3956, "step": 7971 }, { "epoch": 0.2199650268851509, "grad_norm": 0.003588800085708499, "learning_rate": 0.001, "loss": 0.3539, "step": 7972 }, { "epoch": 0.21999261908621529, "grad_norm": 0.01770518720149994, "learning_rate": 0.001, "loss": 0.3468, "step": 7973 }, { "epoch": 0.22002021128727964, "grad_norm": 0.004121637437492609, "learning_rate": 0.001, "loss": 0.3988, "step": 7974 }, { "epoch": 0.22004780348834402, "grad_norm": 0.0025279296096414328, "learning_rate": 0.001, "loss": 0.4385, "step": 7975 }, { "epoch": 0.2200753956894084, "grad_norm": 0.0031378697603940964, "learning_rate": 0.001, "loss": 0.4171, "step": 7976 }, { "epoch": 0.22010298789047275, "grad_norm": 0.006243572104722261, "learning_rate": 0.001, "loss": 0.3851, "step": 7977 }, { "epoch": 0.22013058009153713, "grad_norm": 0.005349922459572554, "learning_rate": 0.001, "loss": 0.3876, "step": 7978 }, { "epoch": 0.22015817229260148, "grad_norm": 0.004400680772960186, "learning_rate": 0.001, "loss": 0.399, "step": 7979 }, { "epoch": 0.22018576449366586, "grad_norm": 0.005931714083999395, "learning_rate": 0.001, "loss": 0.3864, "step": 7980 }, { "epoch": 0.22021335669473024, "grad_norm": 0.006180520635098219, "learning_rate": 0.001, "loss": 0.3868, "step": 7981 }, { "epoch": 0.2202409488957946, "grad_norm": 0.002432264154776931, "learning_rate": 0.001, "loss": 0.3742, "step": 7982 }, { "epoch": 0.22026854109685898, "grad_norm": 0.002539379522204399, "learning_rate": 0.001, "loss": 0.4761, "step": 7983 }, { "epoch": 0.22029613329792333, "grad_norm": 0.0032303177285939455, "learning_rate": 0.001, "loss": 0.4102, "step": 7984 }, { "epoch": 0.2203237254989877, "grad_norm": 0.004244436044245958, "learning_rate": 0.001, "loss": 0.3844, "step": 7985 }, { "epoch": 0.2203513177000521, "grad_norm": 0.003201343584805727, "learning_rate": 0.001, "loss": 0.4399, "step": 7986 }, { "epoch": 0.22037890990111644, "grad_norm": 0.004487950354814529, "learning_rate": 0.001, "loss": 0.4543, "step": 7987 }, { "epoch": 0.22040650210218082, "grad_norm": 0.002907637506723404, "learning_rate": 0.001, "loss": 0.4088, "step": 7988 }, { "epoch": 0.22043409430324518, "grad_norm": 0.002862214343622327, "learning_rate": 0.001, "loss": 0.4378, "step": 7989 }, { "epoch": 0.22046168650430956, "grad_norm": 0.011252621188759804, "learning_rate": 0.001, "loss": 0.4219, "step": 7990 }, { "epoch": 0.22048927870537394, "grad_norm": 0.002715484006330371, "learning_rate": 0.001, "loss": 0.4007, "step": 7991 }, { "epoch": 0.2205168709064383, "grad_norm": 0.0029305212665349245, "learning_rate": 0.001, "loss": 0.4262, "step": 7992 }, { "epoch": 0.22054446310750267, "grad_norm": 0.0029706000350415707, "learning_rate": 0.001, "loss": 0.348, "step": 7993 }, { "epoch": 0.22057205530856702, "grad_norm": 0.00246282946318388, "learning_rate": 0.001, "loss": 0.4015, "step": 7994 }, { "epoch": 0.2205996475096314, "grad_norm": 0.0036219728644937277, "learning_rate": 0.001, "loss": 0.3819, "step": 7995 }, { "epoch": 0.22062723971069578, "grad_norm": 0.0028112917207181454, "learning_rate": 0.001, "loss": 0.4004, "step": 7996 }, { "epoch": 0.22065483191176014, "grad_norm": 0.005280990153551102, "learning_rate": 0.001, "loss": 0.3977, "step": 7997 }, { "epoch": 0.22068242411282452, "grad_norm": 0.004479492083191872, "learning_rate": 0.001, "loss": 0.3864, "step": 7998 }, { "epoch": 0.22071001631388887, "grad_norm": 0.0030434499494731426, "learning_rate": 0.001, "loss": 0.3753, "step": 7999 }, { "epoch": 0.22073760851495325, "grad_norm": 0.016081584617495537, "learning_rate": 0.001, "loss": 0.3885, "step": 8000 }, { "epoch": 0.22073760851495325, "eval_runtime": 24.424, "eval_samples_per_second": 1.31, "eval_steps_per_second": 0.164, "step": 8000 }, { "epoch": 0.22076520071601763, "grad_norm": 0.004563974682241678, "learning_rate": 0.001, "loss": 0.3883, "step": 8001 }, { "epoch": 0.22079279291708198, "grad_norm": 0.002752038650214672, "learning_rate": 0.001, "loss": 0.3746, "step": 8002 }, { "epoch": 0.22082038511814636, "grad_norm": 0.0030077442061156034, "learning_rate": 0.001, "loss": 0.403, "step": 8003 }, { "epoch": 0.22084797731921071, "grad_norm": 0.0024848515167832375, "learning_rate": 0.001, "loss": 0.387, "step": 8004 }, { "epoch": 0.2208755695202751, "grad_norm": 0.0023180947173386812, "learning_rate": 0.001, "loss": 0.4063, "step": 8005 }, { "epoch": 0.22090316172133947, "grad_norm": 0.0037670242600142956, "learning_rate": 0.001, "loss": 0.4131, "step": 8006 }, { "epoch": 0.22093075392240383, "grad_norm": 0.003507862566038966, "learning_rate": 0.001, "loss": 0.3705, "step": 8007 }, { "epoch": 0.2209583461234682, "grad_norm": 0.004687591455876827, "learning_rate": 0.001, "loss": 0.3816, "step": 8008 }, { "epoch": 0.22098593832453256, "grad_norm": 0.002483742544427514, "learning_rate": 0.001, "loss": 0.4235, "step": 8009 }, { "epoch": 0.22101353052559694, "grad_norm": 0.004836369771510363, "learning_rate": 0.001, "loss": 0.4485, "step": 8010 }, { "epoch": 0.22104112272666132, "grad_norm": 0.003374388674274087, "learning_rate": 0.001, "loss": 0.398, "step": 8011 }, { "epoch": 0.22106871492772567, "grad_norm": 0.004700181540101767, "learning_rate": 0.001, "loss": 0.3449, "step": 8012 }, { "epoch": 0.22109630712879005, "grad_norm": 0.011050062254071236, "learning_rate": 0.001, "loss": 0.3755, "step": 8013 }, { "epoch": 0.2211238993298544, "grad_norm": 0.0021428498439490795, "learning_rate": 0.001, "loss": 0.3979, "step": 8014 }, { "epoch": 0.2211514915309188, "grad_norm": 0.00233366503380239, "learning_rate": 0.001, "loss": 0.4079, "step": 8015 }, { "epoch": 0.22117908373198317, "grad_norm": 0.0025947142858058214, "learning_rate": 0.001, "loss": 0.4434, "step": 8016 }, { "epoch": 0.22120667593304752, "grad_norm": 0.00214517954736948, "learning_rate": 0.001, "loss": 0.3971, "step": 8017 }, { "epoch": 0.2212342681341119, "grad_norm": 0.003991144709289074, "learning_rate": 0.001, "loss": 0.4082, "step": 8018 }, { "epoch": 0.22126186033517625, "grad_norm": 0.003229195484891534, "learning_rate": 0.001, "loss": 0.3928, "step": 8019 }, { "epoch": 0.22128945253624063, "grad_norm": 0.002979196375235915, "learning_rate": 0.001, "loss": 0.4029, "step": 8020 }, { "epoch": 0.221317044737305, "grad_norm": 0.004278901033103466, "learning_rate": 0.001, "loss": 0.4478, "step": 8021 }, { "epoch": 0.22134463693836937, "grad_norm": 0.007357322610914707, "learning_rate": 0.001, "loss": 0.4118, "step": 8022 }, { "epoch": 0.22137222913943375, "grad_norm": 0.0035520815290510654, "learning_rate": 0.001, "loss": 0.368, "step": 8023 }, { "epoch": 0.2213998213404981, "grad_norm": 0.004246349446475506, "learning_rate": 0.001, "loss": 0.3497, "step": 8024 }, { "epoch": 0.22142741354156248, "grad_norm": 0.0035226894542574883, "learning_rate": 0.001, "loss": 0.3883, "step": 8025 }, { "epoch": 0.22145500574262686, "grad_norm": 0.0033106855116784573, "learning_rate": 0.001, "loss": 0.4018, "step": 8026 }, { "epoch": 0.2214825979436912, "grad_norm": 0.004675147123634815, "learning_rate": 0.001, "loss": 0.3878, "step": 8027 }, { "epoch": 0.2215101901447556, "grad_norm": 0.004575135186314583, "learning_rate": 0.001, "loss": 0.3664, "step": 8028 }, { "epoch": 0.22153778234581994, "grad_norm": 0.002557947300374508, "learning_rate": 0.001, "loss": 0.4291, "step": 8029 }, { "epoch": 0.22156537454688432, "grad_norm": 0.0031390858348459005, "learning_rate": 0.001, "loss": 0.454, "step": 8030 }, { "epoch": 0.2215929667479487, "grad_norm": 0.004276024177670479, "learning_rate": 0.001, "loss": 0.3828, "step": 8031 }, { "epoch": 0.22162055894901306, "grad_norm": 0.00613689050078392, "learning_rate": 0.001, "loss": 0.3498, "step": 8032 }, { "epoch": 0.22164815115007744, "grad_norm": 0.0052116867154836655, "learning_rate": 0.001, "loss": 0.4038, "step": 8033 }, { "epoch": 0.2216757433511418, "grad_norm": 0.0032780475448817015, "learning_rate": 0.001, "loss": 0.4159, "step": 8034 }, { "epoch": 0.22170333555220617, "grad_norm": 0.003026155522093177, "learning_rate": 0.001, "loss": 0.4079, "step": 8035 }, { "epoch": 0.22173092775327055, "grad_norm": 0.0075735533609986305, "learning_rate": 0.001, "loss": 0.4076, "step": 8036 }, { "epoch": 0.2217585199543349, "grad_norm": 0.010422303341329098, "learning_rate": 0.001, "loss": 0.3969, "step": 8037 }, { "epoch": 0.22178611215539928, "grad_norm": 0.009534427896142006, "learning_rate": 0.001, "loss": 0.3807, "step": 8038 }, { "epoch": 0.22181370435646364, "grad_norm": 0.004428492859005928, "learning_rate": 0.001, "loss": 0.3755, "step": 8039 }, { "epoch": 0.22184129655752802, "grad_norm": 0.006164777558296919, "learning_rate": 0.001, "loss": 0.3625, "step": 8040 }, { "epoch": 0.2218688887585924, "grad_norm": 0.003359799971804023, "learning_rate": 0.001, "loss": 0.3913, "step": 8041 }, { "epoch": 0.22189648095965675, "grad_norm": 0.006137318443506956, "learning_rate": 0.001, "loss": 0.3905, "step": 8042 }, { "epoch": 0.22192407316072113, "grad_norm": 0.002756676636636257, "learning_rate": 0.001, "loss": 0.4066, "step": 8043 }, { "epoch": 0.22195166536178548, "grad_norm": 0.004392385482788086, "learning_rate": 0.001, "loss": 0.3889, "step": 8044 }, { "epoch": 0.22197925756284986, "grad_norm": 0.002814218634739518, "learning_rate": 0.001, "loss": 0.4143, "step": 8045 }, { "epoch": 0.22200684976391422, "grad_norm": 0.002874415833503008, "learning_rate": 0.001, "loss": 0.3878, "step": 8046 }, { "epoch": 0.2220344419649786, "grad_norm": 0.005475836340337992, "learning_rate": 0.001, "loss": 0.3815, "step": 8047 }, { "epoch": 0.22206203416604298, "grad_norm": 0.002302660373970866, "learning_rate": 0.001, "loss": 0.3921, "step": 8048 }, { "epoch": 0.22208962636710733, "grad_norm": 0.003013992914929986, "learning_rate": 0.001, "loss": 0.3822, "step": 8049 }, { "epoch": 0.2221172185681717, "grad_norm": 0.0038887159898877144, "learning_rate": 0.001, "loss": 0.3634, "step": 8050 }, { "epoch": 0.22214481076923606, "grad_norm": 0.004871399141848087, "learning_rate": 0.001, "loss": 0.4017, "step": 8051 }, { "epoch": 0.22217240297030044, "grad_norm": 0.002852171426638961, "learning_rate": 0.001, "loss": 0.4119, "step": 8052 }, { "epoch": 0.22219999517136482, "grad_norm": 0.0025132293812930584, "learning_rate": 0.001, "loss": 0.4383, "step": 8053 }, { "epoch": 0.22222758737242918, "grad_norm": 0.002570350421592593, "learning_rate": 0.001, "loss": 0.4199, "step": 8054 }, { "epoch": 0.22225517957349356, "grad_norm": 0.005364352371543646, "learning_rate": 0.001, "loss": 0.3952, "step": 8055 }, { "epoch": 0.2222827717745579, "grad_norm": 0.0030977013520896435, "learning_rate": 0.001, "loss": 0.3853, "step": 8056 }, { "epoch": 0.2223103639756223, "grad_norm": 0.0027198875322937965, "learning_rate": 0.001, "loss": 0.4575, "step": 8057 }, { "epoch": 0.22233795617668667, "grad_norm": 0.011028733104467392, "learning_rate": 0.001, "loss": 0.3959, "step": 8058 }, { "epoch": 0.22236554837775102, "grad_norm": 0.0037885240744799376, "learning_rate": 0.001, "loss": 0.4189, "step": 8059 }, { "epoch": 0.2223931405788154, "grad_norm": 0.005188632290810347, "learning_rate": 0.001, "loss": 0.3937, "step": 8060 }, { "epoch": 0.22242073277987975, "grad_norm": 0.003573206951841712, "learning_rate": 0.001, "loss": 0.4131, "step": 8061 }, { "epoch": 0.22244832498094413, "grad_norm": 0.0036306334659457207, "learning_rate": 0.001, "loss": 0.4015, "step": 8062 }, { "epoch": 0.22247591718200851, "grad_norm": 0.0033542602322995663, "learning_rate": 0.001, "loss": 0.3792, "step": 8063 }, { "epoch": 0.22250350938307287, "grad_norm": 0.004111922346055508, "learning_rate": 0.001, "loss": 0.424, "step": 8064 }, { "epoch": 0.22253110158413725, "grad_norm": 0.004905781242996454, "learning_rate": 0.001, "loss": 0.4319, "step": 8065 }, { "epoch": 0.2225586937852016, "grad_norm": 0.009073971770703793, "learning_rate": 0.001, "loss": 0.3574, "step": 8066 }, { "epoch": 0.22258628598626598, "grad_norm": 0.0031212870962917805, "learning_rate": 0.001, "loss": 0.4205, "step": 8067 }, { "epoch": 0.22261387818733036, "grad_norm": 0.0061768037267029285, "learning_rate": 0.001, "loss": 0.4067, "step": 8068 }, { "epoch": 0.2226414703883947, "grad_norm": 0.004057316109538078, "learning_rate": 0.001, "loss": 0.4284, "step": 8069 }, { "epoch": 0.2226690625894591, "grad_norm": 0.002062835730612278, "learning_rate": 0.001, "loss": 0.4023, "step": 8070 }, { "epoch": 0.22269665479052345, "grad_norm": 0.010553326457738876, "learning_rate": 0.001, "loss": 0.3838, "step": 8071 }, { "epoch": 0.22272424699158783, "grad_norm": 0.004648920148611069, "learning_rate": 0.001, "loss": 0.3822, "step": 8072 }, { "epoch": 0.2227518391926522, "grad_norm": 0.002884708344936371, "learning_rate": 0.001, "loss": 0.4043, "step": 8073 }, { "epoch": 0.22277943139371656, "grad_norm": 0.002019941108301282, "learning_rate": 0.001, "loss": 0.4259, "step": 8074 }, { "epoch": 0.22280702359478094, "grad_norm": 0.004767908249050379, "learning_rate": 0.001, "loss": 0.423, "step": 8075 }, { "epoch": 0.2228346157958453, "grad_norm": 0.003971953876316547, "learning_rate": 0.001, "loss": 0.3976, "step": 8076 }, { "epoch": 0.22286220799690967, "grad_norm": 0.004503239411860704, "learning_rate": 0.001, "loss": 0.3891, "step": 8077 }, { "epoch": 0.22288980019797405, "grad_norm": 0.002689438173547387, "learning_rate": 0.001, "loss": 0.4082, "step": 8078 }, { "epoch": 0.2229173923990384, "grad_norm": 0.003111765021458268, "learning_rate": 0.001, "loss": 0.3963, "step": 8079 }, { "epoch": 0.22294498460010279, "grad_norm": 0.003488035872578621, "learning_rate": 0.001, "loss": 0.3488, "step": 8080 }, { "epoch": 0.22297257680116714, "grad_norm": 0.004513781983405352, "learning_rate": 0.001, "loss": 0.3627, "step": 8081 }, { "epoch": 0.22300016900223152, "grad_norm": 0.0029607918113470078, "learning_rate": 0.001, "loss": 0.4006, "step": 8082 }, { "epoch": 0.2230277612032959, "grad_norm": 0.0026686247438192368, "learning_rate": 0.001, "loss": 0.4311, "step": 8083 }, { "epoch": 0.22305535340436025, "grad_norm": 0.003172766650095582, "learning_rate": 0.001, "loss": 0.4155, "step": 8084 }, { "epoch": 0.22308294560542463, "grad_norm": 0.002642567502334714, "learning_rate": 0.001, "loss": 0.4093, "step": 8085 }, { "epoch": 0.22311053780648898, "grad_norm": 0.002758550923317671, "learning_rate": 0.001, "loss": 0.3962, "step": 8086 }, { "epoch": 0.22313813000755336, "grad_norm": 0.004465687554329634, "learning_rate": 0.001, "loss": 0.3925, "step": 8087 }, { "epoch": 0.22316572220861775, "grad_norm": 0.003773334203287959, "learning_rate": 0.001, "loss": 0.3736, "step": 8088 }, { "epoch": 0.2231933144096821, "grad_norm": 0.0027832011692225933, "learning_rate": 0.001, "loss": 0.3967, "step": 8089 }, { "epoch": 0.22322090661074648, "grad_norm": 0.00280310888774693, "learning_rate": 0.001, "loss": 0.3727, "step": 8090 }, { "epoch": 0.22324849881181083, "grad_norm": 0.003244071500375867, "learning_rate": 0.001, "loss": 0.3965, "step": 8091 }, { "epoch": 0.2232760910128752, "grad_norm": 0.0024930760264396667, "learning_rate": 0.001, "loss": 0.4322, "step": 8092 }, { "epoch": 0.2233036832139396, "grad_norm": 0.002686891006305814, "learning_rate": 0.001, "loss": 0.3844, "step": 8093 }, { "epoch": 0.22333127541500394, "grad_norm": 0.00237724045291543, "learning_rate": 0.001, "loss": 0.437, "step": 8094 }, { "epoch": 0.22335886761606832, "grad_norm": 0.004243023227900267, "learning_rate": 0.001, "loss": 0.3899, "step": 8095 }, { "epoch": 0.22338645981713268, "grad_norm": 0.004333519376814365, "learning_rate": 0.001, "loss": 0.4245, "step": 8096 }, { "epoch": 0.22341405201819706, "grad_norm": 0.0025725301820784807, "learning_rate": 0.001, "loss": 0.3968, "step": 8097 }, { "epoch": 0.22344164421926144, "grad_norm": 0.004863837733864784, "learning_rate": 0.001, "loss": 0.4261, "step": 8098 }, { "epoch": 0.2234692364203258, "grad_norm": 0.0025464182253926992, "learning_rate": 0.001, "loss": 0.412, "step": 8099 }, { "epoch": 0.22349682862139017, "grad_norm": 0.003454705933108926, "learning_rate": 0.001, "loss": 0.3631, "step": 8100 }, { "epoch": 0.22352442082245452, "grad_norm": 0.004076403100043535, "learning_rate": 0.001, "loss": 0.4161, "step": 8101 }, { "epoch": 0.2235520130235189, "grad_norm": 0.002538996748626232, "learning_rate": 0.001, "loss": 0.4246, "step": 8102 }, { "epoch": 0.22357960522458328, "grad_norm": 0.002853696933016181, "learning_rate": 0.001, "loss": 0.3856, "step": 8103 }, { "epoch": 0.22360719742564764, "grad_norm": 0.005618890281766653, "learning_rate": 0.001, "loss": 0.4101, "step": 8104 }, { "epoch": 0.22363478962671202, "grad_norm": 0.0023889478761702776, "learning_rate": 0.001, "loss": 0.407, "step": 8105 }, { "epoch": 0.22366238182777637, "grad_norm": 0.0049384357407689095, "learning_rate": 0.001, "loss": 0.3995, "step": 8106 }, { "epoch": 0.22368997402884075, "grad_norm": 0.0029198869597166777, "learning_rate": 0.001, "loss": 0.3977, "step": 8107 }, { "epoch": 0.22371756622990513, "grad_norm": 0.00721718929708004, "learning_rate": 0.001, "loss": 0.3965, "step": 8108 }, { "epoch": 0.22374515843096948, "grad_norm": 0.0038066962733864784, "learning_rate": 0.001, "loss": 0.3987, "step": 8109 }, { "epoch": 0.22377275063203386, "grad_norm": 0.0029343971982598305, "learning_rate": 0.001, "loss": 0.3974, "step": 8110 }, { "epoch": 0.22380034283309821, "grad_norm": 0.004783669952303171, "learning_rate": 0.001, "loss": 0.4032, "step": 8111 }, { "epoch": 0.2238279350341626, "grad_norm": 0.005818706471472979, "learning_rate": 0.001, "loss": 0.3737, "step": 8112 }, { "epoch": 0.22385552723522698, "grad_norm": 0.0027874866500496864, "learning_rate": 0.001, "loss": 0.3932, "step": 8113 }, { "epoch": 0.22388311943629133, "grad_norm": 0.006304432172328234, "learning_rate": 0.001, "loss": 0.4075, "step": 8114 }, { "epoch": 0.2239107116373557, "grad_norm": 0.006212018895894289, "learning_rate": 0.001, "loss": 0.3632, "step": 8115 }, { "epoch": 0.22393830383842006, "grad_norm": 0.0021557544823735952, "learning_rate": 0.001, "loss": 0.4262, "step": 8116 }, { "epoch": 0.22396589603948444, "grad_norm": 0.0035563178826123476, "learning_rate": 0.001, "loss": 0.4008, "step": 8117 }, { "epoch": 0.22399348824054882, "grad_norm": 0.002528556389734149, "learning_rate": 0.001, "loss": 0.3833, "step": 8118 }, { "epoch": 0.22402108044161317, "grad_norm": 0.0022855051793158054, "learning_rate": 0.001, "loss": 0.3971, "step": 8119 }, { "epoch": 0.22404867264267755, "grad_norm": 0.0027871252968907356, "learning_rate": 0.001, "loss": 0.3744, "step": 8120 }, { "epoch": 0.2240762648437419, "grad_norm": 0.003169616684317589, "learning_rate": 0.001, "loss": 0.3901, "step": 8121 }, { "epoch": 0.2241038570448063, "grad_norm": 0.003899726551026106, "learning_rate": 0.001, "loss": 0.4177, "step": 8122 }, { "epoch": 0.22413144924587067, "grad_norm": 0.0040968372486531734, "learning_rate": 0.001, "loss": 0.3875, "step": 8123 }, { "epoch": 0.22415904144693502, "grad_norm": 0.002130861859768629, "learning_rate": 0.001, "loss": 0.4066, "step": 8124 }, { "epoch": 0.2241866336479994, "grad_norm": 0.0030744632240384817, "learning_rate": 0.001, "loss": 0.3703, "step": 8125 }, { "epoch": 0.22421422584906375, "grad_norm": 0.0023388941772282124, "learning_rate": 0.001, "loss": 0.428, "step": 8126 }, { "epoch": 0.22424181805012813, "grad_norm": 0.005775371100753546, "learning_rate": 0.001, "loss": 0.3984, "step": 8127 }, { "epoch": 0.2242694102511925, "grad_norm": 0.002374644624069333, "learning_rate": 0.001, "loss": 0.4077, "step": 8128 }, { "epoch": 0.22429700245225687, "grad_norm": 0.0058793784119188786, "learning_rate": 0.001, "loss": 0.3693, "step": 8129 }, { "epoch": 0.22432459465332125, "grad_norm": 0.0027781794779002666, "learning_rate": 0.001, "loss": 0.4153, "step": 8130 }, { "epoch": 0.2243521868543856, "grad_norm": 0.004530200269073248, "learning_rate": 0.001, "loss": 0.3909, "step": 8131 }, { "epoch": 0.22437977905544998, "grad_norm": 0.002897880505770445, "learning_rate": 0.001, "loss": 0.3793, "step": 8132 }, { "epoch": 0.22440737125651436, "grad_norm": 0.0025317175313830376, "learning_rate": 0.001, "loss": 0.398, "step": 8133 }, { "epoch": 0.2244349634575787, "grad_norm": 0.004969809204339981, "learning_rate": 0.001, "loss": 0.3771, "step": 8134 }, { "epoch": 0.2244625556586431, "grad_norm": 0.019586963579058647, "learning_rate": 0.001, "loss": 0.4059, "step": 8135 }, { "epoch": 0.22449014785970745, "grad_norm": 0.003650497179478407, "learning_rate": 0.001, "loss": 0.4429, "step": 8136 }, { "epoch": 0.22451774006077183, "grad_norm": 0.004681742750108242, "learning_rate": 0.001, "loss": 0.3727, "step": 8137 }, { "epoch": 0.2245453322618362, "grad_norm": 0.0019167111022397876, "learning_rate": 0.001, "loss": 0.4284, "step": 8138 }, { "epoch": 0.22457292446290056, "grad_norm": 0.002770308405160904, "learning_rate": 0.001, "loss": 0.393, "step": 8139 }, { "epoch": 0.22460051666396494, "grad_norm": 0.002731415443122387, "learning_rate": 0.001, "loss": 0.373, "step": 8140 }, { "epoch": 0.2246281088650293, "grad_norm": 0.0024662816431373358, "learning_rate": 0.001, "loss": 0.3751, "step": 8141 }, { "epoch": 0.22465570106609367, "grad_norm": 0.00265320367179811, "learning_rate": 0.001, "loss": 0.3967, "step": 8142 }, { "epoch": 0.22468329326715802, "grad_norm": 0.0031822596210986376, "learning_rate": 0.001, "loss": 0.3825, "step": 8143 }, { "epoch": 0.2247108854682224, "grad_norm": 0.005078164394944906, "learning_rate": 0.001, "loss": 0.3827, "step": 8144 }, { "epoch": 0.22473847766928678, "grad_norm": 0.003918538335710764, "learning_rate": 0.001, "loss": 0.3767, "step": 8145 }, { "epoch": 0.22476606987035114, "grad_norm": 0.0030788714066147804, "learning_rate": 0.001, "loss": 0.4253, "step": 8146 }, { "epoch": 0.22479366207141552, "grad_norm": 0.0029016491025686264, "learning_rate": 0.001, "loss": 0.4175, "step": 8147 }, { "epoch": 0.22482125427247987, "grad_norm": 0.0023613544180989265, "learning_rate": 0.001, "loss": 0.4157, "step": 8148 }, { "epoch": 0.22484884647354425, "grad_norm": 0.0021770396269857883, "learning_rate": 0.001, "loss": 0.3961, "step": 8149 }, { "epoch": 0.22487643867460863, "grad_norm": 0.004851729609072208, "learning_rate": 0.001, "loss": 0.3996, "step": 8150 }, { "epoch": 0.22490403087567298, "grad_norm": 0.0023205161560326815, "learning_rate": 0.001, "loss": 0.4151, "step": 8151 }, { "epoch": 0.22493162307673736, "grad_norm": 0.0027498926501721144, "learning_rate": 0.001, "loss": 0.3587, "step": 8152 }, { "epoch": 0.22495921527780172, "grad_norm": 0.004693325143307447, "learning_rate": 0.001, "loss": 0.3944, "step": 8153 }, { "epoch": 0.2249868074788661, "grad_norm": 0.0030215794686228037, "learning_rate": 0.001, "loss": 0.4149, "step": 8154 }, { "epoch": 0.22501439967993048, "grad_norm": 0.0024281737860292196, "learning_rate": 0.001, "loss": 0.4235, "step": 8155 }, { "epoch": 0.22504199188099483, "grad_norm": 0.006052395328879356, "learning_rate": 0.001, "loss": 0.3992, "step": 8156 }, { "epoch": 0.2250695840820592, "grad_norm": 0.005301719065755606, "learning_rate": 0.001, "loss": 0.3867, "step": 8157 }, { "epoch": 0.22509717628312356, "grad_norm": 0.0024900834541767836, "learning_rate": 0.001, "loss": 0.4067, "step": 8158 }, { "epoch": 0.22512476848418794, "grad_norm": 0.00321084912866354, "learning_rate": 0.001, "loss": 0.3843, "step": 8159 }, { "epoch": 0.22515236068525232, "grad_norm": 0.002374766394495964, "learning_rate": 0.001, "loss": 0.3948, "step": 8160 }, { "epoch": 0.22517995288631668, "grad_norm": 0.002608582377433777, "learning_rate": 0.001, "loss": 0.409, "step": 8161 }, { "epoch": 0.22520754508738106, "grad_norm": 0.0029289585072547197, "learning_rate": 0.001, "loss": 0.3982, "step": 8162 }, { "epoch": 0.2252351372884454, "grad_norm": 0.0028822184540331364, "learning_rate": 0.001, "loss": 0.3814, "step": 8163 }, { "epoch": 0.2252627294895098, "grad_norm": 0.004350913688540459, "learning_rate": 0.001, "loss": 0.3946, "step": 8164 }, { "epoch": 0.22529032169057417, "grad_norm": 0.0049639069475233555, "learning_rate": 0.001, "loss": 0.4058, "step": 8165 }, { "epoch": 0.22531791389163852, "grad_norm": 0.005849256180226803, "learning_rate": 0.001, "loss": 0.4043, "step": 8166 }, { "epoch": 0.2253455060927029, "grad_norm": 0.007307564374059439, "learning_rate": 0.001, "loss": 0.4227, "step": 8167 }, { "epoch": 0.22537309829376725, "grad_norm": 0.0058277989737689495, "learning_rate": 0.001, "loss": 0.4153, "step": 8168 }, { "epoch": 0.22540069049483163, "grad_norm": 0.0024729755241423845, "learning_rate": 0.001, "loss": 0.4078, "step": 8169 }, { "epoch": 0.22542828269589602, "grad_norm": 0.0030476911924779415, "learning_rate": 0.001, "loss": 0.4271, "step": 8170 }, { "epoch": 0.22545587489696037, "grad_norm": 0.003712064353749156, "learning_rate": 0.001, "loss": 0.405, "step": 8171 }, { "epoch": 0.22548346709802475, "grad_norm": 0.0031436847057193518, "learning_rate": 0.001, "loss": 0.3763, "step": 8172 }, { "epoch": 0.2255110592990891, "grad_norm": 0.002069181762635708, "learning_rate": 0.001, "loss": 0.4075, "step": 8173 }, { "epoch": 0.22553865150015348, "grad_norm": 0.0023455459158867598, "learning_rate": 0.001, "loss": 0.4161, "step": 8174 }, { "epoch": 0.22556624370121786, "grad_norm": 0.005130277015268803, "learning_rate": 0.001, "loss": 0.3735, "step": 8175 }, { "epoch": 0.2255938359022822, "grad_norm": 0.002300563734024763, "learning_rate": 0.001, "loss": 0.4008, "step": 8176 }, { "epoch": 0.2256214281033466, "grad_norm": 0.002703635022044182, "learning_rate": 0.001, "loss": 0.4045, "step": 8177 }, { "epoch": 0.22564902030441095, "grad_norm": 0.00716767180711031, "learning_rate": 0.001, "loss": 0.4063, "step": 8178 }, { "epoch": 0.22567661250547533, "grad_norm": 0.002465134486556053, "learning_rate": 0.001, "loss": 0.4011, "step": 8179 }, { "epoch": 0.2257042047065397, "grad_norm": 0.004032150376588106, "learning_rate": 0.001, "loss": 0.4256, "step": 8180 }, { "epoch": 0.22573179690760406, "grad_norm": 0.0025379545986652374, "learning_rate": 0.001, "loss": 0.3947, "step": 8181 }, { "epoch": 0.22575938910866844, "grad_norm": 0.0032701275777071714, "learning_rate": 0.001, "loss": 0.4011, "step": 8182 }, { "epoch": 0.2257869813097328, "grad_norm": 0.002877070102840662, "learning_rate": 0.001, "loss": 0.3997, "step": 8183 }, { "epoch": 0.22581457351079717, "grad_norm": 0.003329310566186905, "learning_rate": 0.001, "loss": 0.4441, "step": 8184 }, { "epoch": 0.22584216571186155, "grad_norm": 0.003345692064613104, "learning_rate": 0.001, "loss": 0.3866, "step": 8185 }, { "epoch": 0.2258697579129259, "grad_norm": 0.002473951783031225, "learning_rate": 0.001, "loss": 0.4265, "step": 8186 }, { "epoch": 0.2258973501139903, "grad_norm": 0.0027697773184627295, "learning_rate": 0.001, "loss": 0.4272, "step": 8187 }, { "epoch": 0.22592494231505464, "grad_norm": 0.00608485285192728, "learning_rate": 0.001, "loss": 0.3947, "step": 8188 }, { "epoch": 0.22595253451611902, "grad_norm": 0.0026021813973784447, "learning_rate": 0.001, "loss": 0.3898, "step": 8189 }, { "epoch": 0.2259801267171834, "grad_norm": 0.003049626713618636, "learning_rate": 0.001, "loss": 0.3972, "step": 8190 }, { "epoch": 0.22600771891824775, "grad_norm": 0.0028316755779087543, "learning_rate": 0.001, "loss": 0.402, "step": 8191 }, { "epoch": 0.22603531111931213, "grad_norm": 0.0024335982743650675, "learning_rate": 0.001, "loss": 0.3873, "step": 8192 }, { "epoch": 0.22606290332037648, "grad_norm": 0.0019406350329518318, "learning_rate": 0.001, "loss": 0.3972, "step": 8193 }, { "epoch": 0.22609049552144087, "grad_norm": 0.0031156607437878847, "learning_rate": 0.001, "loss": 0.4109, "step": 8194 }, { "epoch": 0.22611808772250525, "grad_norm": 0.0024029607884585857, "learning_rate": 0.001, "loss": 0.3982, "step": 8195 }, { "epoch": 0.2261456799235696, "grad_norm": 0.002428837353363633, "learning_rate": 0.001, "loss": 0.3735, "step": 8196 }, { "epoch": 0.22617327212463398, "grad_norm": 0.003429250791668892, "learning_rate": 0.001, "loss": 0.4288, "step": 8197 }, { "epoch": 0.22620086432569833, "grad_norm": 0.003104103496298194, "learning_rate": 0.001, "loss": 0.3953, "step": 8198 }, { "epoch": 0.2262284565267627, "grad_norm": 0.002276889281347394, "learning_rate": 0.001, "loss": 0.3932, "step": 8199 }, { "epoch": 0.2262560487278271, "grad_norm": 0.002815461019054055, "learning_rate": 0.001, "loss": 0.378, "step": 8200 }, { "epoch": 0.22628364092889144, "grad_norm": 0.0021125515922904015, "learning_rate": 0.001, "loss": 0.416, "step": 8201 }, { "epoch": 0.22631123312995582, "grad_norm": 0.0026191947981715202, "learning_rate": 0.001, "loss": 0.3934, "step": 8202 }, { "epoch": 0.22633882533102018, "grad_norm": 0.002689339919015765, "learning_rate": 0.001, "loss": 0.3949, "step": 8203 }, { "epoch": 0.22636641753208456, "grad_norm": 0.00354541908018291, "learning_rate": 0.001, "loss": 0.3831, "step": 8204 }, { "epoch": 0.22639400973314894, "grad_norm": 0.0037923678755760193, "learning_rate": 0.001, "loss": 0.4343, "step": 8205 }, { "epoch": 0.2264216019342133, "grad_norm": 0.004423064645379782, "learning_rate": 0.001, "loss": 0.4002, "step": 8206 }, { "epoch": 0.22644919413527767, "grad_norm": 0.004195543006062508, "learning_rate": 0.001, "loss": 0.4197, "step": 8207 }, { "epoch": 0.22647678633634202, "grad_norm": 0.0025238455273211002, "learning_rate": 0.001, "loss": 0.4091, "step": 8208 }, { "epoch": 0.2265043785374064, "grad_norm": 0.0026775554288178682, "learning_rate": 0.001, "loss": 0.4152, "step": 8209 }, { "epoch": 0.22653197073847078, "grad_norm": 0.004028724506497383, "learning_rate": 0.001, "loss": 0.3778, "step": 8210 }, { "epoch": 0.22655956293953514, "grad_norm": 0.002054669661447406, "learning_rate": 0.001, "loss": 0.3787, "step": 8211 }, { "epoch": 0.22658715514059952, "grad_norm": 0.003321393858641386, "learning_rate": 0.001, "loss": 0.3685, "step": 8212 }, { "epoch": 0.22661474734166387, "grad_norm": 0.0035699696745723486, "learning_rate": 0.001, "loss": 0.4127, "step": 8213 }, { "epoch": 0.22664233954272825, "grad_norm": 0.004189506638795137, "learning_rate": 0.001, "loss": 0.383, "step": 8214 }, { "epoch": 0.22666993174379263, "grad_norm": 0.003797245444729924, "learning_rate": 0.001, "loss": 0.3812, "step": 8215 }, { "epoch": 0.22669752394485698, "grad_norm": 0.0045156171545386314, "learning_rate": 0.001, "loss": 0.4078, "step": 8216 }, { "epoch": 0.22672511614592136, "grad_norm": 0.0026707893703132868, "learning_rate": 0.001, "loss": 0.3575, "step": 8217 }, { "epoch": 0.22675270834698572, "grad_norm": 0.0025783004239201546, "learning_rate": 0.001, "loss": 0.4373, "step": 8218 }, { "epoch": 0.2267803005480501, "grad_norm": 0.004405698273330927, "learning_rate": 0.001, "loss": 0.4001, "step": 8219 }, { "epoch": 0.22680789274911448, "grad_norm": 0.00231956597417593, "learning_rate": 0.001, "loss": 0.3762, "step": 8220 }, { "epoch": 0.22683548495017883, "grad_norm": 0.0033627781085669994, "learning_rate": 0.001, "loss": 0.4201, "step": 8221 }, { "epoch": 0.2268630771512432, "grad_norm": 0.003316913964226842, "learning_rate": 0.001, "loss": 0.4167, "step": 8222 }, { "epoch": 0.22689066935230756, "grad_norm": 0.0026452546007931232, "learning_rate": 0.001, "loss": 0.3952, "step": 8223 }, { "epoch": 0.22691826155337194, "grad_norm": 0.004451108165085316, "learning_rate": 0.001, "loss": 0.3784, "step": 8224 }, { "epoch": 0.22694585375443632, "grad_norm": 0.0040152668952941895, "learning_rate": 0.001, "loss": 0.3664, "step": 8225 }, { "epoch": 0.22697344595550067, "grad_norm": 0.003606460290029645, "learning_rate": 0.001, "loss": 0.441, "step": 8226 }, { "epoch": 0.22700103815656505, "grad_norm": 0.003297601593658328, "learning_rate": 0.001, "loss": 0.4248, "step": 8227 }, { "epoch": 0.2270286303576294, "grad_norm": 0.003477993654087186, "learning_rate": 0.001, "loss": 0.3738, "step": 8228 }, { "epoch": 0.2270562225586938, "grad_norm": 0.002349859569221735, "learning_rate": 0.001, "loss": 0.4143, "step": 8229 }, { "epoch": 0.22708381475975817, "grad_norm": 0.003242079634219408, "learning_rate": 0.001, "loss": 0.4205, "step": 8230 }, { "epoch": 0.22711140696082252, "grad_norm": 0.007759904023259878, "learning_rate": 0.001, "loss": 0.3954, "step": 8231 }, { "epoch": 0.2271389991618869, "grad_norm": 0.003948306664824486, "learning_rate": 0.001, "loss": 0.3749, "step": 8232 }, { "epoch": 0.22716659136295125, "grad_norm": 0.002385435625910759, "learning_rate": 0.001, "loss": 0.3925, "step": 8233 }, { "epoch": 0.22719418356401563, "grad_norm": 0.0032237351406365633, "learning_rate": 0.001, "loss": 0.398, "step": 8234 }, { "epoch": 0.22722177576508, "grad_norm": 0.0027539869770407677, "learning_rate": 0.001, "loss": 0.4185, "step": 8235 }, { "epoch": 0.22724936796614437, "grad_norm": 0.002958692843094468, "learning_rate": 0.001, "loss": 0.3964, "step": 8236 }, { "epoch": 0.22727696016720875, "grad_norm": 0.005276213400065899, "learning_rate": 0.001, "loss": 0.3884, "step": 8237 }, { "epoch": 0.2273045523682731, "grad_norm": 0.0028638257645070553, "learning_rate": 0.001, "loss": 0.4019, "step": 8238 }, { "epoch": 0.22733214456933748, "grad_norm": 0.0034719184041023254, "learning_rate": 0.001, "loss": 0.4131, "step": 8239 }, { "epoch": 0.22735973677040183, "grad_norm": 0.002775615779682994, "learning_rate": 0.001, "loss": 0.4081, "step": 8240 }, { "epoch": 0.2273873289714662, "grad_norm": 0.0028845765627920628, "learning_rate": 0.001, "loss": 0.3774, "step": 8241 }, { "epoch": 0.2274149211725306, "grad_norm": 0.002906092908233404, "learning_rate": 0.001, "loss": 0.3827, "step": 8242 }, { "epoch": 0.22744251337359495, "grad_norm": 0.0031754986848682165, "learning_rate": 0.001, "loss": 0.4096, "step": 8243 }, { "epoch": 0.22747010557465933, "grad_norm": 0.00252131768502295, "learning_rate": 0.001, "loss": 0.4347, "step": 8244 }, { "epoch": 0.22749769777572368, "grad_norm": 0.0025290907360613346, "learning_rate": 0.001, "loss": 0.4043, "step": 8245 }, { "epoch": 0.22752528997678806, "grad_norm": 0.0034006794448941946, "learning_rate": 0.001, "loss": 0.3981, "step": 8246 }, { "epoch": 0.22755288217785244, "grad_norm": 0.002173987217247486, "learning_rate": 0.001, "loss": 0.4244, "step": 8247 }, { "epoch": 0.2275804743789168, "grad_norm": 0.002650484209880233, "learning_rate": 0.001, "loss": 0.4232, "step": 8248 }, { "epoch": 0.22760806657998117, "grad_norm": 0.0069848657585680485, "learning_rate": 0.001, "loss": 0.3648, "step": 8249 }, { "epoch": 0.22763565878104552, "grad_norm": 0.002296354155987501, "learning_rate": 0.001, "loss": 0.4071, "step": 8250 }, { "epoch": 0.2276632509821099, "grad_norm": 0.0023510761093348265, "learning_rate": 0.001, "loss": 0.348, "step": 8251 }, { "epoch": 0.22769084318317429, "grad_norm": 0.002062094397842884, "learning_rate": 0.001, "loss": 0.3983, "step": 8252 }, { "epoch": 0.22771843538423864, "grad_norm": 0.004330387804657221, "learning_rate": 0.001, "loss": 0.3546, "step": 8253 }, { "epoch": 0.22774602758530302, "grad_norm": 0.0027810055762529373, "learning_rate": 0.001, "loss": 0.3931, "step": 8254 }, { "epoch": 0.22777361978636737, "grad_norm": 0.0025015720166265965, "learning_rate": 0.001, "loss": 0.3958, "step": 8255 }, { "epoch": 0.22780121198743175, "grad_norm": 0.004701843950897455, "learning_rate": 0.001, "loss": 0.3982, "step": 8256 }, { "epoch": 0.22782880418849613, "grad_norm": 0.0027081798762083054, "learning_rate": 0.001, "loss": 0.3989, "step": 8257 }, { "epoch": 0.22785639638956048, "grad_norm": 0.00656101293861866, "learning_rate": 0.001, "loss": 0.3426, "step": 8258 }, { "epoch": 0.22788398859062486, "grad_norm": 0.004077386576682329, "learning_rate": 0.001, "loss": 0.4016, "step": 8259 }, { "epoch": 0.22791158079168922, "grad_norm": 0.005094864405691624, "learning_rate": 0.001, "loss": 0.3737, "step": 8260 }, { "epoch": 0.2279391729927536, "grad_norm": 0.003425776259973645, "learning_rate": 0.001, "loss": 0.3538, "step": 8261 }, { "epoch": 0.22796676519381798, "grad_norm": 0.0027753396425396204, "learning_rate": 0.001, "loss": 0.4208, "step": 8262 }, { "epoch": 0.22799435739488233, "grad_norm": 0.00263264961540699, "learning_rate": 0.001, "loss": 0.4395, "step": 8263 }, { "epoch": 0.2280219495959467, "grad_norm": 0.002466138917952776, "learning_rate": 0.001, "loss": 0.402, "step": 8264 }, { "epoch": 0.22804954179701106, "grad_norm": 0.0038233925588428974, "learning_rate": 0.001, "loss": 0.3909, "step": 8265 }, { "epoch": 0.22807713399807544, "grad_norm": 0.004745953716337681, "learning_rate": 0.001, "loss": 0.3964, "step": 8266 }, { "epoch": 0.22810472619913982, "grad_norm": 0.0024268226698040962, "learning_rate": 0.001, "loss": 0.4141, "step": 8267 }, { "epoch": 0.22813231840020418, "grad_norm": 0.002283599926158786, "learning_rate": 0.001, "loss": 0.4234, "step": 8268 }, { "epoch": 0.22815991060126856, "grad_norm": 0.003483585547655821, "learning_rate": 0.001, "loss": 0.3517, "step": 8269 }, { "epoch": 0.2281875028023329, "grad_norm": 0.002310832031071186, "learning_rate": 0.001, "loss": 0.4246, "step": 8270 }, { "epoch": 0.2282150950033973, "grad_norm": 0.0025413238909095526, "learning_rate": 0.001, "loss": 0.4106, "step": 8271 }, { "epoch": 0.22824268720446167, "grad_norm": 0.002345605753362179, "learning_rate": 0.001, "loss": 0.4098, "step": 8272 }, { "epoch": 0.22827027940552602, "grad_norm": 0.005003250669687986, "learning_rate": 0.001, "loss": 0.3899, "step": 8273 }, { "epoch": 0.2282978716065904, "grad_norm": 0.002230096375569701, "learning_rate": 0.001, "loss": 0.4071, "step": 8274 }, { "epoch": 0.22832546380765475, "grad_norm": 0.0022804015316069126, "learning_rate": 0.001, "loss": 0.429, "step": 8275 }, { "epoch": 0.22835305600871914, "grad_norm": 0.004967406392097473, "learning_rate": 0.001, "loss": 0.3755, "step": 8276 }, { "epoch": 0.22838064820978352, "grad_norm": 0.003264637663960457, "learning_rate": 0.001, "loss": 0.3904, "step": 8277 }, { "epoch": 0.22840824041084787, "grad_norm": 0.0022652260959148407, "learning_rate": 0.001, "loss": 0.4021, "step": 8278 }, { "epoch": 0.22843583261191225, "grad_norm": 0.002336147939786315, "learning_rate": 0.001, "loss": 0.4132, "step": 8279 }, { "epoch": 0.2284634248129766, "grad_norm": 0.00243670167401433, "learning_rate": 0.001, "loss": 0.3809, "step": 8280 }, { "epoch": 0.22849101701404098, "grad_norm": 0.0036559735890477896, "learning_rate": 0.001, "loss": 0.4095, "step": 8281 }, { "epoch": 0.22851860921510536, "grad_norm": 0.007312767673283815, "learning_rate": 0.001, "loss": 0.4186, "step": 8282 }, { "epoch": 0.22854620141616971, "grad_norm": 0.008099487982690334, "learning_rate": 0.001, "loss": 0.356, "step": 8283 }, { "epoch": 0.2285737936172341, "grad_norm": 0.0025754349771887064, "learning_rate": 0.001, "loss": 0.3802, "step": 8284 }, { "epoch": 0.22860138581829845, "grad_norm": 0.002983193611726165, "learning_rate": 0.001, "loss": 0.3994, "step": 8285 }, { "epoch": 0.22862897801936283, "grad_norm": 0.0024407468736171722, "learning_rate": 0.001, "loss": 0.4239, "step": 8286 }, { "epoch": 0.2286565702204272, "grad_norm": 0.004473252687603235, "learning_rate": 0.001, "loss": 0.3746, "step": 8287 }, { "epoch": 0.22868416242149156, "grad_norm": 0.002675954718142748, "learning_rate": 0.001, "loss": 0.4297, "step": 8288 }, { "epoch": 0.22871175462255594, "grad_norm": 0.0025287093594670296, "learning_rate": 0.001, "loss": 0.395, "step": 8289 }, { "epoch": 0.2287393468236203, "grad_norm": 0.002895546378567815, "learning_rate": 0.001, "loss": 0.3759, "step": 8290 }, { "epoch": 0.22876693902468467, "grad_norm": 0.003705421229824424, "learning_rate": 0.001, "loss": 0.4142, "step": 8291 }, { "epoch": 0.22879453122574905, "grad_norm": 0.003449185285717249, "learning_rate": 0.001, "loss": 0.4024, "step": 8292 }, { "epoch": 0.2288221234268134, "grad_norm": 0.0022859005257487297, "learning_rate": 0.001, "loss": 0.3739, "step": 8293 }, { "epoch": 0.2288497156278778, "grad_norm": 0.0023443542886525393, "learning_rate": 0.001, "loss": 0.391, "step": 8294 }, { "epoch": 0.22887730782894214, "grad_norm": 0.0021697822958230972, "learning_rate": 0.001, "loss": 0.401, "step": 8295 }, { "epoch": 0.22890490003000652, "grad_norm": 0.0023217375855892897, "learning_rate": 0.001, "loss": 0.3897, "step": 8296 }, { "epoch": 0.2289324922310709, "grad_norm": 0.005043079610913992, "learning_rate": 0.001, "loss": 0.3887, "step": 8297 }, { "epoch": 0.22896008443213525, "grad_norm": 0.004579662811011076, "learning_rate": 0.001, "loss": 0.3543, "step": 8298 }, { "epoch": 0.22898767663319963, "grad_norm": 0.0032538543455302715, "learning_rate": 0.001, "loss": 0.3964, "step": 8299 }, { "epoch": 0.22901526883426399, "grad_norm": 0.00212521362118423, "learning_rate": 0.001, "loss": 0.4059, "step": 8300 }, { "epoch": 0.22904286103532837, "grad_norm": 0.003112571081146598, "learning_rate": 0.001, "loss": 0.4247, "step": 8301 }, { "epoch": 0.22907045323639275, "grad_norm": 0.0030887876637279987, "learning_rate": 0.001, "loss": 0.4055, "step": 8302 }, { "epoch": 0.2290980454374571, "grad_norm": 0.002820172579959035, "learning_rate": 0.001, "loss": 0.4015, "step": 8303 }, { "epoch": 0.22912563763852148, "grad_norm": 0.003904817858710885, "learning_rate": 0.001, "loss": 0.4197, "step": 8304 }, { "epoch": 0.22915322983958583, "grad_norm": 0.003779762424528599, "learning_rate": 0.001, "loss": 0.434, "step": 8305 }, { "epoch": 0.2291808220406502, "grad_norm": 0.0025575195904821157, "learning_rate": 0.001, "loss": 0.3972, "step": 8306 }, { "epoch": 0.2292084142417146, "grad_norm": 0.0039753662422299385, "learning_rate": 0.001, "loss": 0.401, "step": 8307 }, { "epoch": 0.22923600644277894, "grad_norm": 0.0023605753667652607, "learning_rate": 0.001, "loss": 0.3817, "step": 8308 }, { "epoch": 0.22926359864384332, "grad_norm": 0.0050887493416666985, "learning_rate": 0.001, "loss": 0.3755, "step": 8309 }, { "epoch": 0.22929119084490768, "grad_norm": 0.0026991376653313637, "learning_rate": 0.001, "loss": 0.3734, "step": 8310 }, { "epoch": 0.22931878304597206, "grad_norm": 0.0030844821594655514, "learning_rate": 0.001, "loss": 0.3769, "step": 8311 }, { "epoch": 0.22934637524703644, "grad_norm": 0.0029710521921515465, "learning_rate": 0.001, "loss": 0.4106, "step": 8312 }, { "epoch": 0.2293739674481008, "grad_norm": 0.0033121525775641203, "learning_rate": 0.001, "loss": 0.3688, "step": 8313 }, { "epoch": 0.22940155964916517, "grad_norm": 0.002841067034751177, "learning_rate": 0.001, "loss": 0.3627, "step": 8314 }, { "epoch": 0.22942915185022952, "grad_norm": 0.003009919775649905, "learning_rate": 0.001, "loss": 0.3875, "step": 8315 }, { "epoch": 0.2294567440512939, "grad_norm": 0.007487253285944462, "learning_rate": 0.001, "loss": 0.3973, "step": 8316 }, { "epoch": 0.22948433625235828, "grad_norm": 0.003841083962470293, "learning_rate": 0.001, "loss": 0.4063, "step": 8317 }, { "epoch": 0.22951192845342264, "grad_norm": 0.003769118804484606, "learning_rate": 0.001, "loss": 0.4195, "step": 8318 }, { "epoch": 0.22953952065448702, "grad_norm": 0.0026735500432550907, "learning_rate": 0.001, "loss": 0.4123, "step": 8319 }, { "epoch": 0.22956711285555137, "grad_norm": 0.0035852077417075634, "learning_rate": 0.001, "loss": 0.376, "step": 8320 }, { "epoch": 0.22959470505661575, "grad_norm": 0.004404496867209673, "learning_rate": 0.001, "loss": 0.3884, "step": 8321 }, { "epoch": 0.22962229725768013, "grad_norm": 0.004810912534594536, "learning_rate": 0.001, "loss": 0.3728, "step": 8322 }, { "epoch": 0.22964988945874448, "grad_norm": 0.002430463209748268, "learning_rate": 0.001, "loss": 0.4139, "step": 8323 }, { "epoch": 0.22967748165980886, "grad_norm": 0.0034299069084227085, "learning_rate": 0.001, "loss": 0.3697, "step": 8324 }, { "epoch": 0.22970507386087322, "grad_norm": 0.002617501188069582, "learning_rate": 0.001, "loss": 0.408, "step": 8325 }, { "epoch": 0.2297326660619376, "grad_norm": 0.002159700496122241, "learning_rate": 0.001, "loss": 0.4001, "step": 8326 }, { "epoch": 0.22976025826300198, "grad_norm": 0.003289289539679885, "learning_rate": 0.001, "loss": 0.4243, "step": 8327 }, { "epoch": 0.22978785046406633, "grad_norm": 0.0034644966945052147, "learning_rate": 0.001, "loss": 0.3866, "step": 8328 }, { "epoch": 0.2298154426651307, "grad_norm": 0.005093908403068781, "learning_rate": 0.001, "loss": 0.3948, "step": 8329 }, { "epoch": 0.22984303486619506, "grad_norm": 0.0033900076523423195, "learning_rate": 0.001, "loss": 0.3983, "step": 8330 }, { "epoch": 0.22987062706725944, "grad_norm": 0.0024000401608645916, "learning_rate": 0.001, "loss": 0.4097, "step": 8331 }, { "epoch": 0.2298982192683238, "grad_norm": 0.0034197689965367317, "learning_rate": 0.001, "loss": 0.3977, "step": 8332 }, { "epoch": 0.22992581146938817, "grad_norm": 0.0020301672630012035, "learning_rate": 0.001, "loss": 0.4249, "step": 8333 }, { "epoch": 0.22995340367045256, "grad_norm": 0.002956526121124625, "learning_rate": 0.001, "loss": 0.4192, "step": 8334 }, { "epoch": 0.2299809958715169, "grad_norm": 0.003122934838756919, "learning_rate": 0.001, "loss": 0.3925, "step": 8335 }, { "epoch": 0.2300085880725813, "grad_norm": 0.005318638868629932, "learning_rate": 0.001, "loss": 0.4263, "step": 8336 }, { "epoch": 0.23003618027364564, "grad_norm": 0.0033183018676936626, "learning_rate": 0.001, "loss": 0.3753, "step": 8337 }, { "epoch": 0.23006377247471002, "grad_norm": 0.0026557277888059616, "learning_rate": 0.001, "loss": 0.4063, "step": 8338 }, { "epoch": 0.2300913646757744, "grad_norm": 0.002340905833989382, "learning_rate": 0.001, "loss": 0.388, "step": 8339 }, { "epoch": 0.23011895687683875, "grad_norm": 0.002298276871442795, "learning_rate": 0.001, "loss": 0.4251, "step": 8340 }, { "epoch": 0.23014654907790313, "grad_norm": 0.004820041824132204, "learning_rate": 0.001, "loss": 0.3819, "step": 8341 }, { "epoch": 0.2301741412789675, "grad_norm": 0.00302408030256629, "learning_rate": 0.001, "loss": 0.3706, "step": 8342 }, { "epoch": 0.23020173348003187, "grad_norm": 0.002940029837191105, "learning_rate": 0.001, "loss": 0.3724, "step": 8343 }, { "epoch": 0.23022932568109625, "grad_norm": 0.007366549223661423, "learning_rate": 0.001, "loss": 0.3765, "step": 8344 }, { "epoch": 0.2302569178821606, "grad_norm": 0.00624980591237545, "learning_rate": 0.001, "loss": 0.4048, "step": 8345 }, { "epoch": 0.23028451008322498, "grad_norm": 0.003244641702622175, "learning_rate": 0.001, "loss": 0.3637, "step": 8346 }, { "epoch": 0.23031210228428933, "grad_norm": 0.003316201502457261, "learning_rate": 0.001, "loss": 0.409, "step": 8347 }, { "epoch": 0.2303396944853537, "grad_norm": 0.004240705166012049, "learning_rate": 0.001, "loss": 0.3763, "step": 8348 }, { "epoch": 0.2303672866864181, "grad_norm": 0.004042000509798527, "learning_rate": 0.001, "loss": 0.392, "step": 8349 }, { "epoch": 0.23039487888748245, "grad_norm": 0.013157385401427746, "learning_rate": 0.001, "loss": 0.4483, "step": 8350 }, { "epoch": 0.23042247108854683, "grad_norm": 0.014354042708873749, "learning_rate": 0.001, "loss": 0.3777, "step": 8351 }, { "epoch": 0.23045006328961118, "grad_norm": 0.003511092159897089, "learning_rate": 0.001, "loss": 0.417, "step": 8352 }, { "epoch": 0.23047765549067556, "grad_norm": 0.0031483755446970463, "learning_rate": 0.001, "loss": 0.3863, "step": 8353 }, { "epoch": 0.23050524769173994, "grad_norm": 0.0037154678720980883, "learning_rate": 0.001, "loss": 0.3836, "step": 8354 }, { "epoch": 0.2305328398928043, "grad_norm": 0.004443106707185507, "learning_rate": 0.001, "loss": 0.4128, "step": 8355 }, { "epoch": 0.23056043209386867, "grad_norm": 0.0030360580421984196, "learning_rate": 0.001, "loss": 0.4006, "step": 8356 }, { "epoch": 0.23058802429493302, "grad_norm": 0.0023466795682907104, "learning_rate": 0.001, "loss": 0.3841, "step": 8357 }, { "epoch": 0.2306156164959974, "grad_norm": 0.005221103318035603, "learning_rate": 0.001, "loss": 0.3873, "step": 8358 }, { "epoch": 0.23064320869706179, "grad_norm": 0.0028962953947484493, "learning_rate": 0.001, "loss": 0.4029, "step": 8359 }, { "epoch": 0.23067080089812614, "grad_norm": 0.0029752026312053204, "learning_rate": 0.001, "loss": 0.3917, "step": 8360 }, { "epoch": 0.23069839309919052, "grad_norm": 0.005493995267897844, "learning_rate": 0.001, "loss": 0.3873, "step": 8361 }, { "epoch": 0.23072598530025487, "grad_norm": 0.0037254132330417633, "learning_rate": 0.001, "loss": 0.3481, "step": 8362 }, { "epoch": 0.23075357750131925, "grad_norm": 0.0025524902157485485, "learning_rate": 0.001, "loss": 0.3797, "step": 8363 }, { "epoch": 0.23078116970238363, "grad_norm": 0.0027644087094813585, "learning_rate": 0.001, "loss": 0.4005, "step": 8364 }, { "epoch": 0.23080876190344798, "grad_norm": 0.0027628885582089424, "learning_rate": 0.001, "loss": 0.4303, "step": 8365 }, { "epoch": 0.23083635410451236, "grad_norm": 0.0039875865913927555, "learning_rate": 0.001, "loss": 0.3986, "step": 8366 }, { "epoch": 0.23086394630557672, "grad_norm": 0.003650275059044361, "learning_rate": 0.001, "loss": 0.386, "step": 8367 }, { "epoch": 0.2308915385066411, "grad_norm": 0.0026859240606427193, "learning_rate": 0.001, "loss": 0.4035, "step": 8368 }, { "epoch": 0.23091913070770548, "grad_norm": 0.004638598766177893, "learning_rate": 0.001, "loss": 0.4052, "step": 8369 }, { "epoch": 0.23094672290876983, "grad_norm": 0.0029127905145287514, "learning_rate": 0.001, "loss": 0.4049, "step": 8370 }, { "epoch": 0.2309743151098342, "grad_norm": 0.01036902703344822, "learning_rate": 0.001, "loss": 0.3835, "step": 8371 }, { "epoch": 0.23100190731089856, "grad_norm": 0.0038089959416538477, "learning_rate": 0.001, "loss": 0.3554, "step": 8372 }, { "epoch": 0.23102949951196294, "grad_norm": 0.0037768797483295202, "learning_rate": 0.001, "loss": 0.3817, "step": 8373 }, { "epoch": 0.23105709171302732, "grad_norm": 0.006050050258636475, "learning_rate": 0.001, "loss": 0.3826, "step": 8374 }, { "epoch": 0.23108468391409168, "grad_norm": 0.0037368466146290302, "learning_rate": 0.001, "loss": 0.4804, "step": 8375 }, { "epoch": 0.23111227611515606, "grad_norm": 0.002388233318924904, "learning_rate": 0.001, "loss": 0.4179, "step": 8376 }, { "epoch": 0.2311398683162204, "grad_norm": 0.002968754153698683, "learning_rate": 0.001, "loss": 0.393, "step": 8377 }, { "epoch": 0.2311674605172848, "grad_norm": 0.005483376793563366, "learning_rate": 0.001, "loss": 0.418, "step": 8378 }, { "epoch": 0.23119505271834917, "grad_norm": 0.0025941620115190744, "learning_rate": 0.001, "loss": 0.392, "step": 8379 }, { "epoch": 0.23122264491941352, "grad_norm": 0.0066278777085244656, "learning_rate": 0.001, "loss": 0.386, "step": 8380 }, { "epoch": 0.2312502371204779, "grad_norm": 0.00500098429620266, "learning_rate": 0.001, "loss": 0.3955, "step": 8381 }, { "epoch": 0.23127782932154226, "grad_norm": 0.002849952783435583, "learning_rate": 0.001, "loss": 0.4072, "step": 8382 }, { "epoch": 0.23130542152260664, "grad_norm": 0.0047418465837836266, "learning_rate": 0.001, "loss": 0.3982, "step": 8383 }, { "epoch": 0.23133301372367102, "grad_norm": 0.005365386605262756, "learning_rate": 0.001, "loss": 0.4049, "step": 8384 }, { "epoch": 0.23136060592473537, "grad_norm": 0.002952918875962496, "learning_rate": 0.001, "loss": 0.4072, "step": 8385 }, { "epoch": 0.23138819812579975, "grad_norm": 0.003814739640802145, "learning_rate": 0.001, "loss": 0.4248, "step": 8386 }, { "epoch": 0.2314157903268641, "grad_norm": 0.0031040378380566835, "learning_rate": 0.001, "loss": 0.4025, "step": 8387 }, { "epoch": 0.23144338252792848, "grad_norm": 0.0033247387036681175, "learning_rate": 0.001, "loss": 0.3893, "step": 8388 }, { "epoch": 0.23147097472899286, "grad_norm": 0.002116522518917918, "learning_rate": 0.001, "loss": 0.408, "step": 8389 }, { "epoch": 0.23149856693005721, "grad_norm": 0.0033864439465105534, "learning_rate": 0.001, "loss": 0.3803, "step": 8390 }, { "epoch": 0.2315261591311216, "grad_norm": 0.0037893191911280155, "learning_rate": 0.001, "loss": 0.4271, "step": 8391 }, { "epoch": 0.23155375133218595, "grad_norm": 0.0026662342716008425, "learning_rate": 0.001, "loss": 0.3892, "step": 8392 }, { "epoch": 0.23158134353325033, "grad_norm": 0.002584991976618767, "learning_rate": 0.001, "loss": 0.3913, "step": 8393 }, { "epoch": 0.2316089357343147, "grad_norm": 0.002258981578052044, "learning_rate": 0.001, "loss": 0.4066, "step": 8394 }, { "epoch": 0.23163652793537906, "grad_norm": 0.004243454430252314, "learning_rate": 0.001, "loss": 0.3886, "step": 8395 }, { "epoch": 0.23166412013644344, "grad_norm": 0.004912800621241331, "learning_rate": 0.001, "loss": 0.3912, "step": 8396 }, { "epoch": 0.2316917123375078, "grad_norm": 0.0038783997297286987, "learning_rate": 0.001, "loss": 0.3873, "step": 8397 }, { "epoch": 0.23171930453857217, "grad_norm": 0.0028120707720518112, "learning_rate": 0.001, "loss": 0.3917, "step": 8398 }, { "epoch": 0.23174689673963655, "grad_norm": 0.003508375259116292, "learning_rate": 0.001, "loss": 0.3738, "step": 8399 }, { "epoch": 0.2317744889407009, "grad_norm": 0.0044393884018063545, "learning_rate": 0.001, "loss": 0.3629, "step": 8400 }, { "epoch": 0.2318020811417653, "grad_norm": 0.006484494544565678, "learning_rate": 0.001, "loss": 0.3978, "step": 8401 }, { "epoch": 0.23182967334282964, "grad_norm": 0.0041907550767064095, "learning_rate": 0.001, "loss": 0.3908, "step": 8402 }, { "epoch": 0.23185726554389402, "grad_norm": 0.002757348818704486, "learning_rate": 0.001, "loss": 0.3968, "step": 8403 }, { "epoch": 0.2318848577449584, "grad_norm": 0.004819660913199186, "learning_rate": 0.001, "loss": 0.3842, "step": 8404 }, { "epoch": 0.23191244994602275, "grad_norm": 0.0022516471799463034, "learning_rate": 0.001, "loss": 0.4485, "step": 8405 }, { "epoch": 0.23194004214708713, "grad_norm": 0.002369027817621827, "learning_rate": 0.001, "loss": 0.3904, "step": 8406 }, { "epoch": 0.23196763434815149, "grad_norm": 0.0035298550501465797, "learning_rate": 0.001, "loss": 0.3857, "step": 8407 }, { "epoch": 0.23199522654921587, "grad_norm": 0.002744413213804364, "learning_rate": 0.001, "loss": 0.3847, "step": 8408 }, { "epoch": 0.23202281875028025, "grad_norm": 0.002334202639758587, "learning_rate": 0.001, "loss": 0.4083, "step": 8409 }, { "epoch": 0.2320504109513446, "grad_norm": 0.0026494518388062716, "learning_rate": 0.001, "loss": 0.4027, "step": 8410 }, { "epoch": 0.23207800315240898, "grad_norm": 0.002644699066877365, "learning_rate": 0.001, "loss": 0.3781, "step": 8411 }, { "epoch": 0.23210559535347333, "grad_norm": 0.0030237159226089716, "learning_rate": 0.001, "loss": 0.4239, "step": 8412 }, { "epoch": 0.2321331875545377, "grad_norm": 0.003086669836193323, "learning_rate": 0.001, "loss": 0.3747, "step": 8413 }, { "epoch": 0.2321607797556021, "grad_norm": 0.002666539279744029, "learning_rate": 0.001, "loss": 0.4265, "step": 8414 }, { "epoch": 0.23218837195666645, "grad_norm": 0.0034365833271294832, "learning_rate": 0.001, "loss": 0.3816, "step": 8415 }, { "epoch": 0.23221596415773083, "grad_norm": 0.0030753256287425756, "learning_rate": 0.001, "loss": 0.3846, "step": 8416 }, { "epoch": 0.23224355635879518, "grad_norm": 0.003509074915200472, "learning_rate": 0.001, "loss": 0.4075, "step": 8417 }, { "epoch": 0.23227114855985956, "grad_norm": 0.005115900654345751, "learning_rate": 0.001, "loss": 0.4373, "step": 8418 }, { "epoch": 0.23229874076092394, "grad_norm": 0.005185318179428577, "learning_rate": 0.001, "loss": 0.4104, "step": 8419 }, { "epoch": 0.2323263329619883, "grad_norm": 0.0024755573831498623, "learning_rate": 0.001, "loss": 0.4156, "step": 8420 }, { "epoch": 0.23235392516305267, "grad_norm": 0.005173765122890472, "learning_rate": 0.001, "loss": 0.3588, "step": 8421 }, { "epoch": 0.23238151736411702, "grad_norm": 0.004211380612105131, "learning_rate": 0.001, "loss": 0.3902, "step": 8422 }, { "epoch": 0.2324091095651814, "grad_norm": 0.002343039261177182, "learning_rate": 0.001, "loss": 0.3848, "step": 8423 }, { "epoch": 0.23243670176624576, "grad_norm": 0.004708436783403158, "learning_rate": 0.001, "loss": 0.3721, "step": 8424 }, { "epoch": 0.23246429396731014, "grad_norm": 0.004656490869820118, "learning_rate": 0.001, "loss": 0.3696, "step": 8425 }, { "epoch": 0.23249188616837452, "grad_norm": 0.002325586276128888, "learning_rate": 0.001, "loss": 0.3799, "step": 8426 }, { "epoch": 0.23251947836943887, "grad_norm": 0.00335716363042593, "learning_rate": 0.001, "loss": 0.3875, "step": 8427 }, { "epoch": 0.23254707057050325, "grad_norm": 0.00340313115157187, "learning_rate": 0.001, "loss": 0.3978, "step": 8428 }, { "epoch": 0.2325746627715676, "grad_norm": 0.0026581473648548126, "learning_rate": 0.001, "loss": 0.399, "step": 8429 }, { "epoch": 0.23260225497263198, "grad_norm": 0.019147882238030434, "learning_rate": 0.001, "loss": 0.4087, "step": 8430 }, { "epoch": 0.23262984717369636, "grad_norm": 0.0027826984878629446, "learning_rate": 0.001, "loss": 0.3854, "step": 8431 }, { "epoch": 0.23265743937476072, "grad_norm": 0.002678795251995325, "learning_rate": 0.001, "loss": 0.3954, "step": 8432 }, { "epoch": 0.2326850315758251, "grad_norm": 0.002674340968951583, "learning_rate": 0.001, "loss": 0.3785, "step": 8433 }, { "epoch": 0.23271262377688945, "grad_norm": 0.01514797005802393, "learning_rate": 0.001, "loss": 0.3665, "step": 8434 }, { "epoch": 0.23274021597795383, "grad_norm": 0.005059736780822277, "learning_rate": 0.001, "loss": 0.4126, "step": 8435 }, { "epoch": 0.2327678081790182, "grad_norm": 0.0021096577402204275, "learning_rate": 0.001, "loss": 0.4087, "step": 8436 }, { "epoch": 0.23279540038008256, "grad_norm": 0.003909014165401459, "learning_rate": 0.001, "loss": 0.4205, "step": 8437 }, { "epoch": 0.23282299258114694, "grad_norm": 0.003196495585143566, "learning_rate": 0.001, "loss": 0.3961, "step": 8438 }, { "epoch": 0.2328505847822113, "grad_norm": 0.009719678200781345, "learning_rate": 0.001, "loss": 0.3553, "step": 8439 }, { "epoch": 0.23287817698327568, "grad_norm": 0.0031904377974569798, "learning_rate": 0.001, "loss": 0.4286, "step": 8440 }, { "epoch": 0.23290576918434006, "grad_norm": 0.0028641929384320974, "learning_rate": 0.001, "loss": 0.4295, "step": 8441 }, { "epoch": 0.2329333613854044, "grad_norm": 0.0037782087456434965, "learning_rate": 0.001, "loss": 0.3953, "step": 8442 }, { "epoch": 0.2329609535864688, "grad_norm": 0.002453066175803542, "learning_rate": 0.001, "loss": 0.3873, "step": 8443 }, { "epoch": 0.23298854578753314, "grad_norm": 0.0037556791212409735, "learning_rate": 0.001, "loss": 0.3777, "step": 8444 }, { "epoch": 0.23301613798859752, "grad_norm": 0.0029747902881354094, "learning_rate": 0.001, "loss": 0.3621, "step": 8445 }, { "epoch": 0.2330437301896619, "grad_norm": 0.004175838083028793, "learning_rate": 0.001, "loss": 0.3412, "step": 8446 }, { "epoch": 0.23307132239072625, "grad_norm": 0.003063888754695654, "learning_rate": 0.001, "loss": 0.3858, "step": 8447 }, { "epoch": 0.23309891459179063, "grad_norm": 0.0027233557775616646, "learning_rate": 0.001, "loss": 0.3872, "step": 8448 }, { "epoch": 0.233126506792855, "grad_norm": 0.002462203847244382, "learning_rate": 0.001, "loss": 0.3878, "step": 8449 }, { "epoch": 0.23315409899391937, "grad_norm": 0.003968900069594383, "learning_rate": 0.001, "loss": 0.3769, "step": 8450 }, { "epoch": 0.23318169119498375, "grad_norm": 0.004251559264957905, "learning_rate": 0.001, "loss": 0.4029, "step": 8451 }, { "epoch": 0.2332092833960481, "grad_norm": 0.0033609773963689804, "learning_rate": 0.001, "loss": 0.3802, "step": 8452 }, { "epoch": 0.23323687559711248, "grad_norm": 0.0026037050411105156, "learning_rate": 0.001, "loss": 0.4176, "step": 8453 }, { "epoch": 0.23326446779817683, "grad_norm": 0.0028592885937541723, "learning_rate": 0.001, "loss": 0.4125, "step": 8454 }, { "epoch": 0.2332920599992412, "grad_norm": 0.0032447976991534233, "learning_rate": 0.001, "loss": 0.3974, "step": 8455 }, { "epoch": 0.2333196522003056, "grad_norm": 0.0034741810522973537, "learning_rate": 0.001, "loss": 0.4033, "step": 8456 }, { "epoch": 0.23334724440136995, "grad_norm": 0.0030226546805351973, "learning_rate": 0.001, "loss": 0.4209, "step": 8457 }, { "epoch": 0.23337483660243433, "grad_norm": 0.002790014259517193, "learning_rate": 0.001, "loss": 0.3957, "step": 8458 }, { "epoch": 0.23340242880349868, "grad_norm": 0.0024738421197980642, "learning_rate": 0.001, "loss": 0.4226, "step": 8459 }, { "epoch": 0.23343002100456306, "grad_norm": 0.002386496402323246, "learning_rate": 0.001, "loss": 0.4101, "step": 8460 }, { "epoch": 0.23345761320562744, "grad_norm": 0.005026910919696093, "learning_rate": 0.001, "loss": 0.4357, "step": 8461 }, { "epoch": 0.2334852054066918, "grad_norm": 0.002900604158639908, "learning_rate": 0.001, "loss": 0.3655, "step": 8462 }, { "epoch": 0.23351279760775617, "grad_norm": 0.002177110407501459, "learning_rate": 0.001, "loss": 0.3966, "step": 8463 }, { "epoch": 0.23354038980882053, "grad_norm": 0.003278181655332446, "learning_rate": 0.001, "loss": 0.3813, "step": 8464 }, { "epoch": 0.2335679820098849, "grad_norm": 0.002810918027535081, "learning_rate": 0.001, "loss": 0.4054, "step": 8465 }, { "epoch": 0.23359557421094929, "grad_norm": 0.002259747590869665, "learning_rate": 0.001, "loss": 0.4319, "step": 8466 }, { "epoch": 0.23362316641201364, "grad_norm": 0.0023482360411435366, "learning_rate": 0.001, "loss": 0.4158, "step": 8467 }, { "epoch": 0.23365075861307802, "grad_norm": 0.004843599628657103, "learning_rate": 0.001, "loss": 0.3671, "step": 8468 }, { "epoch": 0.23367835081414237, "grad_norm": 0.0035332750994712114, "learning_rate": 0.001, "loss": 0.379, "step": 8469 }, { "epoch": 0.23370594301520675, "grad_norm": 0.009748833253979683, "learning_rate": 0.001, "loss": 0.4217, "step": 8470 }, { "epoch": 0.23373353521627113, "grad_norm": 0.002617282560095191, "learning_rate": 0.001, "loss": 0.407, "step": 8471 }, { "epoch": 0.23376112741733548, "grad_norm": 0.0023585897870361805, "learning_rate": 0.001, "loss": 0.4036, "step": 8472 }, { "epoch": 0.23378871961839987, "grad_norm": 0.004000307526439428, "learning_rate": 0.001, "loss": 0.4238, "step": 8473 }, { "epoch": 0.23381631181946422, "grad_norm": 0.003987070173025131, "learning_rate": 0.001, "loss": 0.3792, "step": 8474 }, { "epoch": 0.2338439040205286, "grad_norm": 0.004123962949961424, "learning_rate": 0.001, "loss": 0.3982, "step": 8475 }, { "epoch": 0.23387149622159298, "grad_norm": 0.014491601847112179, "learning_rate": 0.001, "loss": 0.4144, "step": 8476 }, { "epoch": 0.23389908842265733, "grad_norm": 0.0026626239996403456, "learning_rate": 0.001, "loss": 0.3685, "step": 8477 }, { "epoch": 0.2339266806237217, "grad_norm": 0.006109694018959999, "learning_rate": 0.001, "loss": 0.3845, "step": 8478 }, { "epoch": 0.23395427282478606, "grad_norm": 0.00370441609993577, "learning_rate": 0.001, "loss": 0.3962, "step": 8479 }, { "epoch": 0.23398186502585044, "grad_norm": 0.004282459616661072, "learning_rate": 0.001, "loss": 0.4074, "step": 8480 }, { "epoch": 0.23400945722691482, "grad_norm": 0.003556980052962899, "learning_rate": 0.001, "loss": 0.3753, "step": 8481 }, { "epoch": 0.23403704942797918, "grad_norm": 0.00320242578163743, "learning_rate": 0.001, "loss": 0.4397, "step": 8482 }, { "epoch": 0.23406464162904356, "grad_norm": 0.0037783649750053883, "learning_rate": 0.001, "loss": 0.3802, "step": 8483 }, { "epoch": 0.2340922338301079, "grad_norm": 0.003380347043275833, "learning_rate": 0.001, "loss": 0.4028, "step": 8484 }, { "epoch": 0.2341198260311723, "grad_norm": 0.0029710596427321434, "learning_rate": 0.001, "loss": 0.4007, "step": 8485 }, { "epoch": 0.23414741823223667, "grad_norm": 0.003365810727700591, "learning_rate": 0.001, "loss": 0.3871, "step": 8486 }, { "epoch": 0.23417501043330102, "grad_norm": 0.0027016180101782084, "learning_rate": 0.001, "loss": 0.4146, "step": 8487 }, { "epoch": 0.2342026026343654, "grad_norm": 0.002262349473312497, "learning_rate": 0.001, "loss": 0.3878, "step": 8488 }, { "epoch": 0.23423019483542976, "grad_norm": 0.002855840837582946, "learning_rate": 0.001, "loss": 0.3857, "step": 8489 }, { "epoch": 0.23425778703649414, "grad_norm": 0.004416047595441341, "learning_rate": 0.001, "loss": 0.389, "step": 8490 }, { "epoch": 0.23428537923755852, "grad_norm": 0.0035155070945620537, "learning_rate": 0.001, "loss": 0.392, "step": 8491 }, { "epoch": 0.23431297143862287, "grad_norm": 0.003230678616091609, "learning_rate": 0.001, "loss": 0.3429, "step": 8492 }, { "epoch": 0.23434056363968725, "grad_norm": 0.0038198174443095922, "learning_rate": 0.001, "loss": 0.4244, "step": 8493 }, { "epoch": 0.2343681558407516, "grad_norm": 0.003024979494512081, "learning_rate": 0.001, "loss": 0.3822, "step": 8494 }, { "epoch": 0.23439574804181598, "grad_norm": 0.0022407593205571175, "learning_rate": 0.001, "loss": 0.4219, "step": 8495 }, { "epoch": 0.23442334024288036, "grad_norm": 0.0024072916712611914, "learning_rate": 0.001, "loss": 0.4466, "step": 8496 }, { "epoch": 0.23445093244394472, "grad_norm": 0.004507563542574644, "learning_rate": 0.001, "loss": 0.3714, "step": 8497 }, { "epoch": 0.2344785246450091, "grad_norm": 0.0035954902414232492, "learning_rate": 0.001, "loss": 0.3927, "step": 8498 }, { "epoch": 0.23450611684607345, "grad_norm": 0.003051930107176304, "learning_rate": 0.001, "loss": 0.4189, "step": 8499 }, { "epoch": 0.23453370904713783, "grad_norm": 0.0034672280307859182, "learning_rate": 0.001, "loss": 0.4067, "step": 8500 }, { "epoch": 0.23453370904713783, "eval_runtime": 23.4916, "eval_samples_per_second": 1.362, "eval_steps_per_second": 0.17, "step": 8500 }, { "epoch": 0.2345613012482022, "grad_norm": 0.004859286360442638, "learning_rate": 0.001, "loss": 0.3772, "step": 8501 }, { "epoch": 0.23458889344926656, "grad_norm": 0.005819413810968399, "learning_rate": 0.001, "loss": 0.3959, "step": 8502 }, { "epoch": 0.23461648565033094, "grad_norm": 0.0030995369888842106, "learning_rate": 0.001, "loss": 0.3793, "step": 8503 }, { "epoch": 0.2346440778513953, "grad_norm": 0.0032427343539893627, "learning_rate": 0.001, "loss": 0.3817, "step": 8504 }, { "epoch": 0.23467167005245967, "grad_norm": 0.0034287397284060717, "learning_rate": 0.001, "loss": 0.3937, "step": 8505 }, { "epoch": 0.23469926225352405, "grad_norm": 0.003676005406305194, "learning_rate": 0.001, "loss": 0.422, "step": 8506 }, { "epoch": 0.2347268544545884, "grad_norm": 0.0029224164318293333, "learning_rate": 0.001, "loss": 0.3695, "step": 8507 }, { "epoch": 0.2347544466556528, "grad_norm": 0.00621433649212122, "learning_rate": 0.001, "loss": 0.412, "step": 8508 }, { "epoch": 0.23478203885671714, "grad_norm": 0.0034033150877803564, "learning_rate": 0.001, "loss": 0.3924, "step": 8509 }, { "epoch": 0.23480963105778152, "grad_norm": 0.006222172640264034, "learning_rate": 0.001, "loss": 0.381, "step": 8510 }, { "epoch": 0.2348372232588459, "grad_norm": 0.0026931515894830227, "learning_rate": 0.001, "loss": 0.3948, "step": 8511 }, { "epoch": 0.23486481545991025, "grad_norm": 0.004435943905264139, "learning_rate": 0.001, "loss": 0.4159, "step": 8512 }, { "epoch": 0.23489240766097463, "grad_norm": 0.003721091663464904, "learning_rate": 0.001, "loss": 0.384, "step": 8513 }, { "epoch": 0.234919999862039, "grad_norm": 0.002308727940544486, "learning_rate": 0.001, "loss": 0.3725, "step": 8514 }, { "epoch": 0.23494759206310337, "grad_norm": 0.0038359523750841618, "learning_rate": 0.001, "loss": 0.3872, "step": 8515 }, { "epoch": 0.23497518426416775, "grad_norm": 0.002997712232172489, "learning_rate": 0.001, "loss": 0.3917, "step": 8516 }, { "epoch": 0.2350027764652321, "grad_norm": 0.0034595001488924026, "learning_rate": 0.001, "loss": 0.3658, "step": 8517 }, { "epoch": 0.23503036866629648, "grad_norm": 0.0026987416204065084, "learning_rate": 0.001, "loss": 0.3928, "step": 8518 }, { "epoch": 0.23505796086736083, "grad_norm": 0.00263298605568707, "learning_rate": 0.001, "loss": 0.4276, "step": 8519 }, { "epoch": 0.2350855530684252, "grad_norm": 0.0024717003107070923, "learning_rate": 0.001, "loss": 0.438, "step": 8520 }, { "epoch": 0.23511314526948957, "grad_norm": 0.003266090527176857, "learning_rate": 0.001, "loss": 0.4012, "step": 8521 }, { "epoch": 0.23514073747055395, "grad_norm": 0.007142478600144386, "learning_rate": 0.001, "loss": 0.3845, "step": 8522 }, { "epoch": 0.23516832967161833, "grad_norm": 0.003394266590476036, "learning_rate": 0.001, "loss": 0.3885, "step": 8523 }, { "epoch": 0.23519592187268268, "grad_norm": 0.0034649951849132776, "learning_rate": 0.001, "loss": 0.3894, "step": 8524 }, { "epoch": 0.23522351407374706, "grad_norm": 0.0022391905076801777, "learning_rate": 0.001, "loss": 0.4333, "step": 8525 }, { "epoch": 0.2352511062748114, "grad_norm": 0.002735828747972846, "learning_rate": 0.001, "loss": 0.396, "step": 8526 }, { "epoch": 0.2352786984758758, "grad_norm": 0.00296612735837698, "learning_rate": 0.001, "loss": 0.3983, "step": 8527 }, { "epoch": 0.23530629067694017, "grad_norm": 0.0029764720238745213, "learning_rate": 0.001, "loss": 0.3874, "step": 8528 }, { "epoch": 0.23533388287800452, "grad_norm": 0.002517016837373376, "learning_rate": 0.001, "loss": 0.3849, "step": 8529 }, { "epoch": 0.2353614750790689, "grad_norm": 0.004660237114876509, "learning_rate": 0.001, "loss": 0.3877, "step": 8530 }, { "epoch": 0.23538906728013326, "grad_norm": 0.01403157226741314, "learning_rate": 0.001, "loss": 0.3797, "step": 8531 }, { "epoch": 0.23541665948119764, "grad_norm": 0.002752164611592889, "learning_rate": 0.001, "loss": 0.3705, "step": 8532 }, { "epoch": 0.23544425168226202, "grad_norm": 0.004661790560930967, "learning_rate": 0.001, "loss": 0.3852, "step": 8533 }, { "epoch": 0.23547184388332637, "grad_norm": 0.0029863761737942696, "learning_rate": 0.001, "loss": 0.4253, "step": 8534 }, { "epoch": 0.23549943608439075, "grad_norm": 0.002214206848293543, "learning_rate": 0.001, "loss": 0.4207, "step": 8535 }, { "epoch": 0.2355270282854551, "grad_norm": 0.00356200966052711, "learning_rate": 0.001, "loss": 0.3813, "step": 8536 }, { "epoch": 0.23555462048651948, "grad_norm": 0.0029001201037317514, "learning_rate": 0.001, "loss": 0.3874, "step": 8537 }, { "epoch": 0.23558221268758386, "grad_norm": 0.0023703081533312798, "learning_rate": 0.001, "loss": 0.4531, "step": 8538 }, { "epoch": 0.23560980488864822, "grad_norm": 0.0027728870045393705, "learning_rate": 0.001, "loss": 0.4478, "step": 8539 }, { "epoch": 0.2356373970897126, "grad_norm": 0.002979228738695383, "learning_rate": 0.001, "loss": 0.4161, "step": 8540 }, { "epoch": 0.23566498929077695, "grad_norm": 0.0030399069655686617, "learning_rate": 0.001, "loss": 0.4239, "step": 8541 }, { "epoch": 0.23569258149184133, "grad_norm": 0.0033329655416309834, "learning_rate": 0.001, "loss": 0.3837, "step": 8542 }, { "epoch": 0.2357201736929057, "grad_norm": 0.006761785596609116, "learning_rate": 0.001, "loss": 0.4624, "step": 8543 }, { "epoch": 0.23574776589397006, "grad_norm": 0.0030948342755436897, "learning_rate": 0.001, "loss": 0.4237, "step": 8544 }, { "epoch": 0.23577535809503444, "grad_norm": 0.003666854929178953, "learning_rate": 0.001, "loss": 0.4007, "step": 8545 }, { "epoch": 0.2358029502960988, "grad_norm": 0.0034998871851712465, "learning_rate": 0.001, "loss": 0.4306, "step": 8546 }, { "epoch": 0.23583054249716318, "grad_norm": 0.002683976199477911, "learning_rate": 0.001, "loss": 0.4485, "step": 8547 }, { "epoch": 0.23585813469822756, "grad_norm": 0.003037314862012863, "learning_rate": 0.001, "loss": 0.38, "step": 8548 }, { "epoch": 0.2358857268992919, "grad_norm": 0.002266938565298915, "learning_rate": 0.001, "loss": 0.3883, "step": 8549 }, { "epoch": 0.2359133191003563, "grad_norm": 0.004051513969898224, "learning_rate": 0.001, "loss": 0.3992, "step": 8550 }, { "epoch": 0.23594091130142064, "grad_norm": 0.0025013620033860207, "learning_rate": 0.001, "loss": 0.4445, "step": 8551 }, { "epoch": 0.23596850350248502, "grad_norm": 0.0030788013245910406, "learning_rate": 0.001, "loss": 0.3761, "step": 8552 }, { "epoch": 0.2359960957035494, "grad_norm": 0.003969093784689903, "learning_rate": 0.001, "loss": 0.3943, "step": 8553 }, { "epoch": 0.23602368790461375, "grad_norm": 0.003966695163398981, "learning_rate": 0.001, "loss": 0.3912, "step": 8554 }, { "epoch": 0.23605128010567814, "grad_norm": 0.009804654866456985, "learning_rate": 0.001, "loss": 0.425, "step": 8555 }, { "epoch": 0.2360788723067425, "grad_norm": 0.0031127145048230886, "learning_rate": 0.001, "loss": 0.3728, "step": 8556 }, { "epoch": 0.23610646450780687, "grad_norm": 0.0036678691394627094, "learning_rate": 0.001, "loss": 0.3702, "step": 8557 }, { "epoch": 0.23613405670887125, "grad_norm": 0.003199309343472123, "learning_rate": 0.001, "loss": 0.4266, "step": 8558 }, { "epoch": 0.2361616489099356, "grad_norm": 0.0028935642912983894, "learning_rate": 0.001, "loss": 0.3864, "step": 8559 }, { "epoch": 0.23618924111099998, "grad_norm": 0.0033331215381622314, "learning_rate": 0.001, "loss": 0.3761, "step": 8560 }, { "epoch": 0.23621683331206433, "grad_norm": 0.004135193768888712, "learning_rate": 0.001, "loss": 0.4333, "step": 8561 }, { "epoch": 0.23624442551312871, "grad_norm": 0.008535767905414104, "learning_rate": 0.001, "loss": 0.4235, "step": 8562 }, { "epoch": 0.2362720177141931, "grad_norm": 0.005464480258524418, "learning_rate": 0.001, "loss": 0.3991, "step": 8563 }, { "epoch": 0.23629960991525745, "grad_norm": 0.0026870830915868282, "learning_rate": 0.001, "loss": 0.4111, "step": 8564 }, { "epoch": 0.23632720211632183, "grad_norm": 0.002947178203612566, "learning_rate": 0.001, "loss": 0.3947, "step": 8565 }, { "epoch": 0.23635479431738618, "grad_norm": 0.004835926927626133, "learning_rate": 0.001, "loss": 0.44, "step": 8566 }, { "epoch": 0.23638238651845056, "grad_norm": 0.004437744617462158, "learning_rate": 0.001, "loss": 0.4173, "step": 8567 }, { "epoch": 0.23640997871951494, "grad_norm": 0.0033282919321209192, "learning_rate": 0.001, "loss": 0.3913, "step": 8568 }, { "epoch": 0.2364375709205793, "grad_norm": 0.0035205124877393246, "learning_rate": 0.001, "loss": 0.3725, "step": 8569 }, { "epoch": 0.23646516312164367, "grad_norm": 0.004044828005135059, "learning_rate": 0.001, "loss": 0.4158, "step": 8570 }, { "epoch": 0.23649275532270803, "grad_norm": 0.008543507196009159, "learning_rate": 0.001, "loss": 0.4246, "step": 8571 }, { "epoch": 0.2365203475237724, "grad_norm": 0.005565674975514412, "learning_rate": 0.001, "loss": 0.4339, "step": 8572 }, { "epoch": 0.2365479397248368, "grad_norm": 0.0032614252995699644, "learning_rate": 0.001, "loss": 0.4018, "step": 8573 }, { "epoch": 0.23657553192590114, "grad_norm": 0.0036154796835035086, "learning_rate": 0.001, "loss": 0.4157, "step": 8574 }, { "epoch": 0.23660312412696552, "grad_norm": 0.003126727417111397, "learning_rate": 0.001, "loss": 0.375, "step": 8575 }, { "epoch": 0.23663071632802987, "grad_norm": 0.002990932669490576, "learning_rate": 0.001, "loss": 0.4282, "step": 8576 }, { "epoch": 0.23665830852909425, "grad_norm": 0.0033934074454009533, "learning_rate": 0.001, "loss": 0.3771, "step": 8577 }, { "epoch": 0.23668590073015863, "grad_norm": 0.002845100359991193, "learning_rate": 0.001, "loss": 0.4331, "step": 8578 }, { "epoch": 0.23671349293122299, "grad_norm": 0.005931117571890354, "learning_rate": 0.001, "loss": 0.4225, "step": 8579 }, { "epoch": 0.23674108513228737, "grad_norm": 0.0030062024015933275, "learning_rate": 0.001, "loss": 0.4249, "step": 8580 }, { "epoch": 0.23676867733335172, "grad_norm": 0.004353534895926714, "learning_rate": 0.001, "loss": 0.4043, "step": 8581 }, { "epoch": 0.2367962695344161, "grad_norm": 0.007340231444686651, "learning_rate": 0.001, "loss": 0.368, "step": 8582 }, { "epoch": 0.23682386173548048, "grad_norm": 0.0033272774890065193, "learning_rate": 0.001, "loss": 0.3833, "step": 8583 }, { "epoch": 0.23685145393654483, "grad_norm": 0.002648326801136136, "learning_rate": 0.001, "loss": 0.4067, "step": 8584 }, { "epoch": 0.2368790461376092, "grad_norm": 0.0026433998718857765, "learning_rate": 0.001, "loss": 0.3819, "step": 8585 }, { "epoch": 0.23690663833867356, "grad_norm": 0.0039000390097498894, "learning_rate": 0.001, "loss": 0.3706, "step": 8586 }, { "epoch": 0.23693423053973794, "grad_norm": 0.00217796815559268, "learning_rate": 0.001, "loss": 0.4424, "step": 8587 }, { "epoch": 0.23696182274080232, "grad_norm": 0.007604559417814016, "learning_rate": 0.001, "loss": 0.4186, "step": 8588 }, { "epoch": 0.23698941494186668, "grad_norm": 0.003747879760339856, "learning_rate": 0.001, "loss": 0.4283, "step": 8589 }, { "epoch": 0.23701700714293106, "grad_norm": 0.0028189239092171192, "learning_rate": 0.001, "loss": 0.3938, "step": 8590 }, { "epoch": 0.2370445993439954, "grad_norm": 0.0033880542032420635, "learning_rate": 0.001, "loss": 0.3977, "step": 8591 }, { "epoch": 0.2370721915450598, "grad_norm": 0.004370769020169973, "learning_rate": 0.001, "loss": 0.4062, "step": 8592 }, { "epoch": 0.23709978374612417, "grad_norm": 0.0029521717224270105, "learning_rate": 0.001, "loss": 0.3783, "step": 8593 }, { "epoch": 0.23712737594718852, "grad_norm": 0.0032671017106622458, "learning_rate": 0.001, "loss": 0.4022, "step": 8594 }, { "epoch": 0.2371549681482529, "grad_norm": 0.002473097527399659, "learning_rate": 0.001, "loss": 0.4058, "step": 8595 }, { "epoch": 0.23718256034931726, "grad_norm": 0.0034314331132918596, "learning_rate": 0.001, "loss": 0.4229, "step": 8596 }, { "epoch": 0.23721015255038164, "grad_norm": 0.003747452748939395, "learning_rate": 0.001, "loss": 0.3886, "step": 8597 }, { "epoch": 0.23723774475144602, "grad_norm": 0.0053464872762560844, "learning_rate": 0.001, "loss": 0.3828, "step": 8598 }, { "epoch": 0.23726533695251037, "grad_norm": 0.004069317597895861, "learning_rate": 0.001, "loss": 0.4012, "step": 8599 }, { "epoch": 0.23729292915357475, "grad_norm": 0.00245997728779912, "learning_rate": 0.001, "loss": 0.3766, "step": 8600 }, { "epoch": 0.2373205213546391, "grad_norm": 0.0024410157930105925, "learning_rate": 0.001, "loss": 0.4366, "step": 8601 }, { "epoch": 0.23734811355570348, "grad_norm": 0.002567254938185215, "learning_rate": 0.001, "loss": 0.3979, "step": 8602 }, { "epoch": 0.23737570575676786, "grad_norm": 0.003037130692973733, "learning_rate": 0.001, "loss": 0.366, "step": 8603 }, { "epoch": 0.23740329795783222, "grad_norm": 0.0020308520179241896, "learning_rate": 0.001, "loss": 0.3905, "step": 8604 }, { "epoch": 0.2374308901588966, "grad_norm": 0.008115851320326328, "learning_rate": 0.001, "loss": 0.3839, "step": 8605 }, { "epoch": 0.23745848235996095, "grad_norm": 0.002992655150592327, "learning_rate": 0.001, "loss": 0.3865, "step": 8606 }, { "epoch": 0.23748607456102533, "grad_norm": 0.004864429123699665, "learning_rate": 0.001, "loss": 0.4524, "step": 8607 }, { "epoch": 0.2375136667620897, "grad_norm": 0.0024918217677623034, "learning_rate": 0.001, "loss": 0.4263, "step": 8608 }, { "epoch": 0.23754125896315406, "grad_norm": 0.0024185108486562967, "learning_rate": 0.001, "loss": 0.4077, "step": 8609 }, { "epoch": 0.23756885116421844, "grad_norm": 0.0024980721063911915, "learning_rate": 0.001, "loss": 0.4185, "step": 8610 }, { "epoch": 0.2375964433652828, "grad_norm": 0.002587017137557268, "learning_rate": 0.001, "loss": 0.3884, "step": 8611 }, { "epoch": 0.23762403556634717, "grad_norm": 0.002519281581044197, "learning_rate": 0.001, "loss": 0.3612, "step": 8612 }, { "epoch": 0.23765162776741153, "grad_norm": 0.0031468705274164677, "learning_rate": 0.001, "loss": 0.3984, "step": 8613 }, { "epoch": 0.2376792199684759, "grad_norm": 0.0039115361869335175, "learning_rate": 0.001, "loss": 0.3788, "step": 8614 }, { "epoch": 0.2377068121695403, "grad_norm": 0.002601664513349533, "learning_rate": 0.001, "loss": 0.4121, "step": 8615 }, { "epoch": 0.23773440437060464, "grad_norm": 0.0024887637700885534, "learning_rate": 0.001, "loss": 0.3928, "step": 8616 }, { "epoch": 0.23776199657166902, "grad_norm": 0.0022282125428318977, "learning_rate": 0.001, "loss": 0.4265, "step": 8617 }, { "epoch": 0.23778958877273337, "grad_norm": 0.00493327621370554, "learning_rate": 0.001, "loss": 0.3815, "step": 8618 }, { "epoch": 0.23781718097379775, "grad_norm": 0.003642312716692686, "learning_rate": 0.001, "loss": 0.3982, "step": 8619 }, { "epoch": 0.23784477317486213, "grad_norm": 0.002251163125038147, "learning_rate": 0.001, "loss": 0.4593, "step": 8620 }, { "epoch": 0.2378723653759265, "grad_norm": 0.0038927237037569284, "learning_rate": 0.001, "loss": 0.4109, "step": 8621 }, { "epoch": 0.23789995757699087, "grad_norm": 0.002186572877690196, "learning_rate": 0.001, "loss": 0.4184, "step": 8622 }, { "epoch": 0.23792754977805522, "grad_norm": 0.008558029308915138, "learning_rate": 0.001, "loss": 0.4067, "step": 8623 }, { "epoch": 0.2379551419791196, "grad_norm": 0.0047720009461045265, "learning_rate": 0.001, "loss": 0.395, "step": 8624 }, { "epoch": 0.23798273418018398, "grad_norm": 0.007841572165489197, "learning_rate": 0.001, "loss": 0.3685, "step": 8625 }, { "epoch": 0.23801032638124833, "grad_norm": 0.006973532494157553, "learning_rate": 0.001, "loss": 0.3952, "step": 8626 }, { "epoch": 0.2380379185823127, "grad_norm": 0.002645769389346242, "learning_rate": 0.001, "loss": 0.3651, "step": 8627 }, { "epoch": 0.23806551078337707, "grad_norm": 0.013875133357942104, "learning_rate": 0.001, "loss": 0.4192, "step": 8628 }, { "epoch": 0.23809310298444145, "grad_norm": 0.005976676940917969, "learning_rate": 0.001, "loss": 0.4268, "step": 8629 }, { "epoch": 0.23812069518550583, "grad_norm": 0.0029498045332729816, "learning_rate": 0.001, "loss": 0.368, "step": 8630 }, { "epoch": 0.23814828738657018, "grad_norm": 0.00462282495573163, "learning_rate": 0.001, "loss": 0.445, "step": 8631 }, { "epoch": 0.23817587958763456, "grad_norm": 0.003022623248398304, "learning_rate": 0.001, "loss": 0.3804, "step": 8632 }, { "epoch": 0.2382034717886989, "grad_norm": 0.003926178906112909, "learning_rate": 0.001, "loss": 0.3651, "step": 8633 }, { "epoch": 0.2382310639897633, "grad_norm": 0.0035945766139775515, "learning_rate": 0.001, "loss": 0.3681, "step": 8634 }, { "epoch": 0.23825865619082767, "grad_norm": 0.00597176980227232, "learning_rate": 0.001, "loss": 0.3961, "step": 8635 }, { "epoch": 0.23828624839189202, "grad_norm": 0.004440873861312866, "learning_rate": 0.001, "loss": 0.3614, "step": 8636 }, { "epoch": 0.2383138405929564, "grad_norm": 0.002808907302096486, "learning_rate": 0.001, "loss": 0.3908, "step": 8637 }, { "epoch": 0.23834143279402076, "grad_norm": 0.002455639885738492, "learning_rate": 0.001, "loss": 0.3661, "step": 8638 }, { "epoch": 0.23836902499508514, "grad_norm": 0.003995021339505911, "learning_rate": 0.001, "loss": 0.4142, "step": 8639 }, { "epoch": 0.23839661719614952, "grad_norm": 0.0029346742667257786, "learning_rate": 0.001, "loss": 0.4076, "step": 8640 }, { "epoch": 0.23842420939721387, "grad_norm": 0.0027218139730393887, "learning_rate": 0.001, "loss": 0.4117, "step": 8641 }, { "epoch": 0.23845180159827825, "grad_norm": 0.002403157763183117, "learning_rate": 0.001, "loss": 0.3999, "step": 8642 }, { "epoch": 0.2384793937993426, "grad_norm": 0.0038983020931482315, "learning_rate": 0.001, "loss": 0.3968, "step": 8643 }, { "epoch": 0.23850698600040698, "grad_norm": 0.002467036945745349, "learning_rate": 0.001, "loss": 0.4214, "step": 8644 }, { "epoch": 0.23853457820147136, "grad_norm": 0.003569959197193384, "learning_rate": 0.001, "loss": 0.4, "step": 8645 }, { "epoch": 0.23856217040253572, "grad_norm": 0.0020964080467820168, "learning_rate": 0.001, "loss": 0.3975, "step": 8646 }, { "epoch": 0.2385897626036001, "grad_norm": 0.0025515384040772915, "learning_rate": 0.001, "loss": 0.3767, "step": 8647 }, { "epoch": 0.23861735480466445, "grad_norm": 0.0025259265676140785, "learning_rate": 0.001, "loss": 0.3789, "step": 8648 }, { "epoch": 0.23864494700572883, "grad_norm": 0.002351469825953245, "learning_rate": 0.001, "loss": 0.4007, "step": 8649 }, { "epoch": 0.2386725392067932, "grad_norm": 0.002201459836214781, "learning_rate": 0.001, "loss": 0.4112, "step": 8650 }, { "epoch": 0.23870013140785756, "grad_norm": 0.0032044921535998583, "learning_rate": 0.001, "loss": 0.3948, "step": 8651 }, { "epoch": 0.23872772360892194, "grad_norm": 0.0036108682397753, "learning_rate": 0.001, "loss": 0.4082, "step": 8652 }, { "epoch": 0.2387553158099863, "grad_norm": 0.0031174325849860907, "learning_rate": 0.001, "loss": 0.3674, "step": 8653 }, { "epoch": 0.23878290801105068, "grad_norm": 0.0035186016466468573, "learning_rate": 0.001, "loss": 0.3882, "step": 8654 }, { "epoch": 0.23881050021211506, "grad_norm": 0.002634822390973568, "learning_rate": 0.001, "loss": 0.4128, "step": 8655 }, { "epoch": 0.2388380924131794, "grad_norm": 0.0029784373473376036, "learning_rate": 0.001, "loss": 0.3823, "step": 8656 }, { "epoch": 0.2388656846142438, "grad_norm": 0.0027631595730781555, "learning_rate": 0.001, "loss": 0.4058, "step": 8657 }, { "epoch": 0.23889327681530814, "grad_norm": 0.007283089216798544, "learning_rate": 0.001, "loss": 0.3752, "step": 8658 }, { "epoch": 0.23892086901637252, "grad_norm": 0.0024284112732857466, "learning_rate": 0.001, "loss": 0.3813, "step": 8659 }, { "epoch": 0.2389484612174369, "grad_norm": 0.0027097521815449, "learning_rate": 0.001, "loss": 0.3926, "step": 8660 }, { "epoch": 0.23897605341850126, "grad_norm": 0.0025627308059483767, "learning_rate": 0.001, "loss": 0.4046, "step": 8661 }, { "epoch": 0.23900364561956564, "grad_norm": 0.003014184534549713, "learning_rate": 0.001, "loss": 0.3913, "step": 8662 }, { "epoch": 0.23903123782063, "grad_norm": 0.0035087834112346172, "learning_rate": 0.001, "loss": 0.3609, "step": 8663 }, { "epoch": 0.23905883002169437, "grad_norm": 0.004521367605775595, "learning_rate": 0.001, "loss": 0.423, "step": 8664 }, { "epoch": 0.23908642222275875, "grad_norm": 0.003859465243294835, "learning_rate": 0.001, "loss": 0.4388, "step": 8665 }, { "epoch": 0.2391140144238231, "grad_norm": 0.0035421785432845354, "learning_rate": 0.001, "loss": 0.3947, "step": 8666 }, { "epoch": 0.23914160662488748, "grad_norm": 0.0022301869466900826, "learning_rate": 0.001, "loss": 0.4046, "step": 8667 }, { "epoch": 0.23916919882595183, "grad_norm": 0.0030494078528136015, "learning_rate": 0.001, "loss": 0.4232, "step": 8668 }, { "epoch": 0.23919679102701621, "grad_norm": 0.0031116558238863945, "learning_rate": 0.001, "loss": 0.411, "step": 8669 }, { "epoch": 0.2392243832280806, "grad_norm": 0.002859594067558646, "learning_rate": 0.001, "loss": 0.3991, "step": 8670 }, { "epoch": 0.23925197542914495, "grad_norm": 0.002495800144970417, "learning_rate": 0.001, "loss": 0.4545, "step": 8671 }, { "epoch": 0.23927956763020933, "grad_norm": 0.0030453058425337076, "learning_rate": 0.001, "loss": 0.4287, "step": 8672 }, { "epoch": 0.23930715983127368, "grad_norm": 0.0048926519230008125, "learning_rate": 0.001, "loss": 0.4131, "step": 8673 }, { "epoch": 0.23933475203233806, "grad_norm": 0.0029445511754602194, "learning_rate": 0.001, "loss": 0.4025, "step": 8674 }, { "epoch": 0.23936234423340244, "grad_norm": 0.00260615604929626, "learning_rate": 0.001, "loss": 0.4355, "step": 8675 }, { "epoch": 0.2393899364344668, "grad_norm": 0.003435730002820492, "learning_rate": 0.001, "loss": 0.4164, "step": 8676 }, { "epoch": 0.23941752863553117, "grad_norm": 0.0024756945203989744, "learning_rate": 0.001, "loss": 0.4061, "step": 8677 }, { "epoch": 0.23944512083659553, "grad_norm": 0.0030621823389083147, "learning_rate": 0.001, "loss": 0.3652, "step": 8678 }, { "epoch": 0.2394727130376599, "grad_norm": 0.003644311334937811, "learning_rate": 0.001, "loss": 0.3834, "step": 8679 }, { "epoch": 0.2395003052387243, "grad_norm": 0.0026737714651972055, "learning_rate": 0.001, "loss": 0.4288, "step": 8680 }, { "epoch": 0.23952789743978864, "grad_norm": 0.0027650066185742617, "learning_rate": 0.001, "loss": 0.3592, "step": 8681 }, { "epoch": 0.23955548964085302, "grad_norm": 0.010126309469342232, "learning_rate": 0.001, "loss": 0.4196, "step": 8682 }, { "epoch": 0.23958308184191737, "grad_norm": 0.008825338445603848, "learning_rate": 0.001, "loss": 0.41, "step": 8683 }, { "epoch": 0.23961067404298175, "grad_norm": 0.0028963929507881403, "learning_rate": 0.001, "loss": 0.3777, "step": 8684 }, { "epoch": 0.23963826624404613, "grad_norm": 0.006523852702230215, "learning_rate": 0.001, "loss": 0.3625, "step": 8685 }, { "epoch": 0.23966585844511049, "grad_norm": 0.0027353796176612377, "learning_rate": 0.001, "loss": 0.3735, "step": 8686 }, { "epoch": 0.23969345064617487, "grad_norm": 0.0034795296378433704, "learning_rate": 0.001, "loss": 0.3765, "step": 8687 }, { "epoch": 0.23972104284723922, "grad_norm": 0.002904376946389675, "learning_rate": 0.001, "loss": 0.4083, "step": 8688 }, { "epoch": 0.2397486350483036, "grad_norm": 0.00341939739882946, "learning_rate": 0.001, "loss": 0.3886, "step": 8689 }, { "epoch": 0.23977622724936798, "grad_norm": 0.00255574774928391, "learning_rate": 0.001, "loss": 0.4052, "step": 8690 }, { "epoch": 0.23980381945043233, "grad_norm": 0.002928397851064801, "learning_rate": 0.001, "loss": 0.3964, "step": 8691 }, { "epoch": 0.2398314116514967, "grad_norm": 0.003814896335825324, "learning_rate": 0.001, "loss": 0.3993, "step": 8692 }, { "epoch": 0.23985900385256106, "grad_norm": 0.004918345715850592, "learning_rate": 0.001, "loss": 0.3962, "step": 8693 }, { "epoch": 0.23988659605362544, "grad_norm": 0.00214549177326262, "learning_rate": 0.001, "loss": 0.4043, "step": 8694 }, { "epoch": 0.23991418825468983, "grad_norm": 0.00271332124248147, "learning_rate": 0.001, "loss": 0.3752, "step": 8695 }, { "epoch": 0.23994178045575418, "grad_norm": 0.005185315851122141, "learning_rate": 0.001, "loss": 0.4045, "step": 8696 }, { "epoch": 0.23996937265681856, "grad_norm": 0.003353232517838478, "learning_rate": 0.001, "loss": 0.4263, "step": 8697 }, { "epoch": 0.2399969648578829, "grad_norm": 0.0031625647097826004, "learning_rate": 0.001, "loss": 0.3659, "step": 8698 }, { "epoch": 0.2400245570589473, "grad_norm": 0.0053766947239637375, "learning_rate": 0.001, "loss": 0.4522, "step": 8699 }, { "epoch": 0.24005214926001167, "grad_norm": 0.005576374474912882, "learning_rate": 0.001, "loss": 0.431, "step": 8700 }, { "epoch": 0.24007974146107602, "grad_norm": 0.003967816010117531, "learning_rate": 0.001, "loss": 0.3623, "step": 8701 }, { "epoch": 0.2401073336621404, "grad_norm": 0.0026094361674040556, "learning_rate": 0.001, "loss": 0.3997, "step": 8702 }, { "epoch": 0.24013492586320476, "grad_norm": 0.049603283405303955, "learning_rate": 0.001, "loss": 0.4175, "step": 8703 }, { "epoch": 0.24016251806426914, "grad_norm": 0.0027699440252035856, "learning_rate": 0.001, "loss": 0.388, "step": 8704 }, { "epoch": 0.2401901102653335, "grad_norm": 0.0032966039143502712, "learning_rate": 0.001, "loss": 0.3743, "step": 8705 }, { "epoch": 0.24021770246639787, "grad_norm": 0.0029616530518978834, "learning_rate": 0.001, "loss": 0.4218, "step": 8706 }, { "epoch": 0.24024529466746225, "grad_norm": 0.0031759492121636868, "learning_rate": 0.001, "loss": 0.3906, "step": 8707 }, { "epoch": 0.2402728868685266, "grad_norm": 0.002272370969876647, "learning_rate": 0.001, "loss": 0.4132, "step": 8708 }, { "epoch": 0.24030047906959098, "grad_norm": 0.004187437240034342, "learning_rate": 0.001, "loss": 0.4007, "step": 8709 }, { "epoch": 0.24032807127065534, "grad_norm": 0.0026218758430331945, "learning_rate": 0.001, "loss": 0.3878, "step": 8710 }, { "epoch": 0.24035566347171972, "grad_norm": 0.0024012320209294558, "learning_rate": 0.001, "loss": 0.4175, "step": 8711 }, { "epoch": 0.2403832556727841, "grad_norm": 0.004809997510164976, "learning_rate": 0.001, "loss": 0.4036, "step": 8712 }, { "epoch": 0.24041084787384845, "grad_norm": 0.0030046291649341583, "learning_rate": 0.001, "loss": 0.4037, "step": 8713 }, { "epoch": 0.24043844007491283, "grad_norm": 0.0032057911157608032, "learning_rate": 0.001, "loss": 0.4252, "step": 8714 }, { "epoch": 0.24046603227597718, "grad_norm": 0.0034800011198967695, "learning_rate": 0.001, "loss": 0.404, "step": 8715 }, { "epoch": 0.24049362447704156, "grad_norm": 0.0029743260238319635, "learning_rate": 0.001, "loss": 0.3756, "step": 8716 }, { "epoch": 0.24052121667810594, "grad_norm": 0.0031264862045645714, "learning_rate": 0.001, "loss": 0.3836, "step": 8717 }, { "epoch": 0.2405488088791703, "grad_norm": 0.0027079239953309298, "learning_rate": 0.001, "loss": 0.3933, "step": 8718 }, { "epoch": 0.24057640108023468, "grad_norm": 0.0026093865744769573, "learning_rate": 0.001, "loss": 0.3818, "step": 8719 }, { "epoch": 0.24060399328129903, "grad_norm": 0.004578443244099617, "learning_rate": 0.001, "loss": 0.3829, "step": 8720 }, { "epoch": 0.2406315854823634, "grad_norm": 0.0024112521205097437, "learning_rate": 0.001, "loss": 0.4194, "step": 8721 }, { "epoch": 0.2406591776834278, "grad_norm": 0.0029260313604027033, "learning_rate": 0.001, "loss": 0.3582, "step": 8722 }, { "epoch": 0.24068676988449214, "grad_norm": 0.003243402810767293, "learning_rate": 0.001, "loss": 0.3539, "step": 8723 }, { "epoch": 0.24071436208555652, "grad_norm": 0.03149477019906044, "learning_rate": 0.001, "loss": 0.4075, "step": 8724 }, { "epoch": 0.24074195428662087, "grad_norm": 0.002621316583827138, "learning_rate": 0.001, "loss": 0.3835, "step": 8725 }, { "epoch": 0.24076954648768525, "grad_norm": 0.007181955501437187, "learning_rate": 0.001, "loss": 0.374, "step": 8726 }, { "epoch": 0.24079713868874963, "grad_norm": 0.006243562325835228, "learning_rate": 0.001, "loss": 0.4195, "step": 8727 }, { "epoch": 0.240824730889814, "grad_norm": 0.005759544670581818, "learning_rate": 0.001, "loss": 0.3862, "step": 8728 }, { "epoch": 0.24085232309087837, "grad_norm": 0.002845682902261615, "learning_rate": 0.001, "loss": 0.4156, "step": 8729 }, { "epoch": 0.24087991529194272, "grad_norm": 0.00525195337831974, "learning_rate": 0.001, "loss": 0.4125, "step": 8730 }, { "epoch": 0.2409075074930071, "grad_norm": 0.004564746282994747, "learning_rate": 0.001, "loss": 0.3639, "step": 8731 }, { "epoch": 0.24093509969407148, "grad_norm": 0.006655642297118902, "learning_rate": 0.001, "loss": 0.4284, "step": 8732 }, { "epoch": 0.24096269189513583, "grad_norm": 0.003846411593258381, "learning_rate": 0.001, "loss": 0.4351, "step": 8733 }, { "epoch": 0.2409902840962002, "grad_norm": 0.003326327772811055, "learning_rate": 0.001, "loss": 0.3892, "step": 8734 }, { "epoch": 0.24101787629726457, "grad_norm": 0.013138518668711185, "learning_rate": 0.001, "loss": 0.3856, "step": 8735 }, { "epoch": 0.24104546849832895, "grad_norm": 0.007260841317474842, "learning_rate": 0.001, "loss": 0.3965, "step": 8736 }, { "epoch": 0.24107306069939333, "grad_norm": 0.0034342585131525993, "learning_rate": 0.001, "loss": 0.3759, "step": 8737 }, { "epoch": 0.24110065290045768, "grad_norm": 0.0038822691421955824, "learning_rate": 0.001, "loss": 0.4026, "step": 8738 }, { "epoch": 0.24112824510152206, "grad_norm": 0.0024222838692367077, "learning_rate": 0.001, "loss": 0.4189, "step": 8739 }, { "epoch": 0.2411558373025864, "grad_norm": 0.0026801645290106535, "learning_rate": 0.001, "loss": 0.3915, "step": 8740 }, { "epoch": 0.2411834295036508, "grad_norm": 0.007557088974863291, "learning_rate": 0.001, "loss": 0.4343, "step": 8741 }, { "epoch": 0.24121102170471517, "grad_norm": 0.02677319385111332, "learning_rate": 0.001, "loss": 0.4009, "step": 8742 }, { "epoch": 0.24123861390577953, "grad_norm": 0.0026736543513834476, "learning_rate": 0.001, "loss": 0.3915, "step": 8743 }, { "epoch": 0.2412662061068439, "grad_norm": 0.0023438511416316032, "learning_rate": 0.001, "loss": 0.4387, "step": 8744 }, { "epoch": 0.24129379830790826, "grad_norm": 0.004221671260893345, "learning_rate": 0.001, "loss": 0.3227, "step": 8745 }, { "epoch": 0.24132139050897264, "grad_norm": 0.004964245017617941, "learning_rate": 0.001, "loss": 0.3824, "step": 8746 }, { "epoch": 0.24134898271003702, "grad_norm": 0.004307127092033625, "learning_rate": 0.001, "loss": 0.3757, "step": 8747 }, { "epoch": 0.24137657491110137, "grad_norm": 0.004791958257555962, "learning_rate": 0.001, "loss": 0.38, "step": 8748 }, { "epoch": 0.24140416711216575, "grad_norm": 0.004529708996415138, "learning_rate": 0.001, "loss": 0.3639, "step": 8749 }, { "epoch": 0.2414317593132301, "grad_norm": 0.004783578682690859, "learning_rate": 0.001, "loss": 0.3648, "step": 8750 }, { "epoch": 0.24145935151429448, "grad_norm": 0.003613299923017621, "learning_rate": 0.001, "loss": 0.4064, "step": 8751 }, { "epoch": 0.24148694371535886, "grad_norm": 0.003846370615065098, "learning_rate": 0.001, "loss": 0.4148, "step": 8752 }, { "epoch": 0.24151453591642322, "grad_norm": 0.002877620980143547, "learning_rate": 0.001, "loss": 0.3835, "step": 8753 }, { "epoch": 0.2415421281174876, "grad_norm": 0.004218699410557747, "learning_rate": 0.001, "loss": 0.4264, "step": 8754 }, { "epoch": 0.24156972031855195, "grad_norm": 0.003392399987205863, "learning_rate": 0.001, "loss": 0.4003, "step": 8755 }, { "epoch": 0.24159731251961633, "grad_norm": 0.0036800485104322433, "learning_rate": 0.001, "loss": 0.4078, "step": 8756 }, { "epoch": 0.2416249047206807, "grad_norm": 0.0023208935745060444, "learning_rate": 0.001, "loss": 0.4371, "step": 8757 }, { "epoch": 0.24165249692174506, "grad_norm": 0.0029652283992618322, "learning_rate": 0.001, "loss": 0.3543, "step": 8758 }, { "epoch": 0.24168008912280944, "grad_norm": 0.00395188620314002, "learning_rate": 0.001, "loss": 0.398, "step": 8759 }, { "epoch": 0.2417076813238738, "grad_norm": 0.0028020909521728754, "learning_rate": 0.001, "loss": 0.423, "step": 8760 }, { "epoch": 0.24173527352493818, "grad_norm": 0.002905269619077444, "learning_rate": 0.001, "loss": 0.3582, "step": 8761 }, { "epoch": 0.24176286572600256, "grad_norm": 0.0026353024877607822, "learning_rate": 0.001, "loss": 0.3987, "step": 8762 }, { "epoch": 0.2417904579270669, "grad_norm": 0.009191271848976612, "learning_rate": 0.001, "loss": 0.3785, "step": 8763 }, { "epoch": 0.2418180501281313, "grad_norm": 0.002872015815228224, "learning_rate": 0.001, "loss": 0.4074, "step": 8764 }, { "epoch": 0.24184564232919564, "grad_norm": 0.0035799501929432154, "learning_rate": 0.001, "loss": 0.3845, "step": 8765 }, { "epoch": 0.24187323453026002, "grad_norm": 0.0039528412744402885, "learning_rate": 0.001, "loss": 0.3875, "step": 8766 }, { "epoch": 0.2419008267313244, "grad_norm": 0.004701048135757446, "learning_rate": 0.001, "loss": 0.3817, "step": 8767 }, { "epoch": 0.24192841893238876, "grad_norm": 0.0029811752028763294, "learning_rate": 0.001, "loss": 0.3966, "step": 8768 }, { "epoch": 0.24195601113345314, "grad_norm": 0.00273152650333941, "learning_rate": 0.001, "loss": 0.3978, "step": 8769 }, { "epoch": 0.2419836033345175, "grad_norm": 0.0036260110791772604, "learning_rate": 0.001, "loss": 0.4006, "step": 8770 }, { "epoch": 0.24201119553558187, "grad_norm": 0.0030035688541829586, "learning_rate": 0.001, "loss": 0.4333, "step": 8771 }, { "epoch": 0.24203878773664625, "grad_norm": 0.003084450261667371, "learning_rate": 0.001, "loss": 0.4114, "step": 8772 }, { "epoch": 0.2420663799377106, "grad_norm": 0.0035567975137382746, "learning_rate": 0.001, "loss": 0.3657, "step": 8773 }, { "epoch": 0.24209397213877498, "grad_norm": 0.0024042977020144463, "learning_rate": 0.001, "loss": 0.4216, "step": 8774 }, { "epoch": 0.24212156433983933, "grad_norm": 0.005611390806734562, "learning_rate": 0.001, "loss": 0.4223, "step": 8775 }, { "epoch": 0.24214915654090371, "grad_norm": 0.0022727535106241703, "learning_rate": 0.001, "loss": 0.403, "step": 8776 }, { "epoch": 0.2421767487419681, "grad_norm": 0.004948400892317295, "learning_rate": 0.001, "loss": 0.3891, "step": 8777 }, { "epoch": 0.24220434094303245, "grad_norm": 0.005593698471784592, "learning_rate": 0.001, "loss": 0.4204, "step": 8778 }, { "epoch": 0.24223193314409683, "grad_norm": 0.002849163953214884, "learning_rate": 0.001, "loss": 0.375, "step": 8779 }, { "epoch": 0.24225952534516118, "grad_norm": 0.0032104626297950745, "learning_rate": 0.001, "loss": 0.4159, "step": 8780 }, { "epoch": 0.24228711754622556, "grad_norm": 0.0051482305862009525, "learning_rate": 0.001, "loss": 0.3794, "step": 8781 }, { "epoch": 0.24231470974728994, "grad_norm": 0.0035567518789321184, "learning_rate": 0.001, "loss": 0.3672, "step": 8782 }, { "epoch": 0.2423423019483543, "grad_norm": 0.0023606910835951567, "learning_rate": 0.001, "loss": 0.4256, "step": 8783 }, { "epoch": 0.24236989414941867, "grad_norm": 0.0028458137530833483, "learning_rate": 0.001, "loss": 0.3894, "step": 8784 }, { "epoch": 0.24239748635048303, "grad_norm": 0.0029930644668638706, "learning_rate": 0.001, "loss": 0.4275, "step": 8785 }, { "epoch": 0.2424250785515474, "grad_norm": 0.0022928270045667887, "learning_rate": 0.001, "loss": 0.399, "step": 8786 }, { "epoch": 0.2424526707526118, "grad_norm": 0.003093563485890627, "learning_rate": 0.001, "loss": 0.3978, "step": 8787 }, { "epoch": 0.24248026295367614, "grad_norm": 0.002772730076685548, "learning_rate": 0.001, "loss": 0.3936, "step": 8788 }, { "epoch": 0.24250785515474052, "grad_norm": 0.00274568609893322, "learning_rate": 0.001, "loss": 0.4049, "step": 8789 }, { "epoch": 0.24253544735580487, "grad_norm": 0.0022818988654762506, "learning_rate": 0.001, "loss": 0.3854, "step": 8790 }, { "epoch": 0.24256303955686925, "grad_norm": 0.004985783249139786, "learning_rate": 0.001, "loss": 0.3894, "step": 8791 }, { "epoch": 0.24259063175793363, "grad_norm": 0.00211884337477386, "learning_rate": 0.001, "loss": 0.4173, "step": 8792 }, { "epoch": 0.24261822395899799, "grad_norm": 0.00450787041336298, "learning_rate": 0.001, "loss": 0.3957, "step": 8793 }, { "epoch": 0.24264581616006237, "grad_norm": 0.003899297444149852, "learning_rate": 0.001, "loss": 0.3815, "step": 8794 }, { "epoch": 0.24267340836112672, "grad_norm": 0.002133857225999236, "learning_rate": 0.001, "loss": 0.3983, "step": 8795 }, { "epoch": 0.2427010005621911, "grad_norm": 0.002347117057070136, "learning_rate": 0.001, "loss": 0.4273, "step": 8796 }, { "epoch": 0.24272859276325548, "grad_norm": 0.0024913426022976637, "learning_rate": 0.001, "loss": 0.4102, "step": 8797 }, { "epoch": 0.24275618496431983, "grad_norm": 0.0028214750345796347, "learning_rate": 0.001, "loss": 0.4053, "step": 8798 }, { "epoch": 0.2427837771653842, "grad_norm": 0.002642587758600712, "learning_rate": 0.001, "loss": 0.3987, "step": 8799 }, { "epoch": 0.24281136936644857, "grad_norm": 0.002706260420382023, "learning_rate": 0.001, "loss": 0.42, "step": 8800 }, { "epoch": 0.24283896156751295, "grad_norm": 0.0023238682188093662, "learning_rate": 0.001, "loss": 0.3988, "step": 8801 }, { "epoch": 0.2428665537685773, "grad_norm": 0.003259580582380295, "learning_rate": 0.001, "loss": 0.3964, "step": 8802 }, { "epoch": 0.24289414596964168, "grad_norm": 0.007316852454096079, "learning_rate": 0.001, "loss": 0.4152, "step": 8803 }, { "epoch": 0.24292173817070606, "grad_norm": 0.003550121560692787, "learning_rate": 0.001, "loss": 0.4007, "step": 8804 }, { "epoch": 0.2429493303717704, "grad_norm": 0.002780807903036475, "learning_rate": 0.001, "loss": 0.3873, "step": 8805 }, { "epoch": 0.2429769225728348, "grad_norm": 0.0035296916030347347, "learning_rate": 0.001, "loss": 0.3655, "step": 8806 }, { "epoch": 0.24300451477389914, "grad_norm": 0.005781420040875673, "learning_rate": 0.001, "loss": 0.4002, "step": 8807 }, { "epoch": 0.24303210697496352, "grad_norm": 0.0032384470105171204, "learning_rate": 0.001, "loss": 0.3871, "step": 8808 }, { "epoch": 0.2430596991760279, "grad_norm": 0.007727981545031071, "learning_rate": 0.001, "loss": 0.4077, "step": 8809 }, { "epoch": 0.24308729137709226, "grad_norm": 0.024563631042838097, "learning_rate": 0.001, "loss": 0.4065, "step": 8810 }, { "epoch": 0.24311488357815664, "grad_norm": 0.00322868674993515, "learning_rate": 0.001, "loss": 0.401, "step": 8811 }, { "epoch": 0.243142475779221, "grad_norm": 0.004947735462337732, "learning_rate": 0.001, "loss": 0.3812, "step": 8812 }, { "epoch": 0.24317006798028537, "grad_norm": 0.004064179491251707, "learning_rate": 0.001, "loss": 0.3854, "step": 8813 }, { "epoch": 0.24319766018134975, "grad_norm": 0.005366888828575611, "learning_rate": 0.001, "loss": 0.4295, "step": 8814 }, { "epoch": 0.2432252523824141, "grad_norm": 0.012629834935069084, "learning_rate": 0.001, "loss": 0.3826, "step": 8815 }, { "epoch": 0.24325284458347848, "grad_norm": 0.004325262736529112, "learning_rate": 0.001, "loss": 0.3958, "step": 8816 }, { "epoch": 0.24328043678454284, "grad_norm": 0.002770826453343034, "learning_rate": 0.001, "loss": 0.4106, "step": 8817 }, { "epoch": 0.24330802898560722, "grad_norm": 0.005562702193856239, "learning_rate": 0.001, "loss": 0.3986, "step": 8818 }, { "epoch": 0.2433356211866716, "grad_norm": 0.002646266482770443, "learning_rate": 0.001, "loss": 0.4243, "step": 8819 }, { "epoch": 0.24336321338773595, "grad_norm": 0.003909732680767775, "learning_rate": 0.001, "loss": 0.3725, "step": 8820 }, { "epoch": 0.24339080558880033, "grad_norm": 0.0035097142681479454, "learning_rate": 0.001, "loss": 0.3911, "step": 8821 }, { "epoch": 0.24341839778986468, "grad_norm": 0.002032148651778698, "learning_rate": 0.001, "loss": 0.4234, "step": 8822 }, { "epoch": 0.24344598999092906, "grad_norm": 0.00408203387632966, "learning_rate": 0.001, "loss": 0.4452, "step": 8823 }, { "epoch": 0.24347358219199344, "grad_norm": 0.0028013025876134634, "learning_rate": 0.001, "loss": 0.3754, "step": 8824 }, { "epoch": 0.2435011743930578, "grad_norm": 0.0023070168681442738, "learning_rate": 0.001, "loss": 0.3962, "step": 8825 }, { "epoch": 0.24352876659412218, "grad_norm": 0.006049746181815863, "learning_rate": 0.001, "loss": 0.3658, "step": 8826 }, { "epoch": 0.24355635879518653, "grad_norm": 0.004005006048828363, "learning_rate": 0.001, "loss": 0.4135, "step": 8827 }, { "epoch": 0.2435839509962509, "grad_norm": 0.003078661160543561, "learning_rate": 0.001, "loss": 0.3725, "step": 8828 }, { "epoch": 0.2436115431973153, "grad_norm": 0.0025324684102088213, "learning_rate": 0.001, "loss": 0.3884, "step": 8829 }, { "epoch": 0.24363913539837964, "grad_norm": 0.0028970125131309032, "learning_rate": 0.001, "loss": 0.3961, "step": 8830 }, { "epoch": 0.24366672759944402, "grad_norm": 0.0022717502433806658, "learning_rate": 0.001, "loss": 0.3677, "step": 8831 }, { "epoch": 0.24369431980050837, "grad_norm": 0.0029859100468456745, "learning_rate": 0.001, "loss": 0.3481, "step": 8832 }, { "epoch": 0.24372191200157275, "grad_norm": 0.002873897785320878, "learning_rate": 0.001, "loss": 0.3884, "step": 8833 }, { "epoch": 0.24374950420263714, "grad_norm": 0.00388680980540812, "learning_rate": 0.001, "loss": 0.3748, "step": 8834 }, { "epoch": 0.2437770964037015, "grad_norm": 0.0036059573758393526, "learning_rate": 0.001, "loss": 0.4325, "step": 8835 }, { "epoch": 0.24380468860476587, "grad_norm": 0.004464290104806423, "learning_rate": 0.001, "loss": 0.3621, "step": 8836 }, { "epoch": 0.24383228080583022, "grad_norm": 0.002686945255845785, "learning_rate": 0.001, "loss": 0.4247, "step": 8837 }, { "epoch": 0.2438598730068946, "grad_norm": 0.0031961945351213217, "learning_rate": 0.001, "loss": 0.3662, "step": 8838 }, { "epoch": 0.24388746520795898, "grad_norm": 0.002320102881640196, "learning_rate": 0.001, "loss": 0.3876, "step": 8839 }, { "epoch": 0.24391505740902333, "grad_norm": 0.003384110052138567, "learning_rate": 0.001, "loss": 0.3677, "step": 8840 }, { "epoch": 0.24394264961008771, "grad_norm": 0.002216142136603594, "learning_rate": 0.001, "loss": 0.399, "step": 8841 }, { "epoch": 0.24397024181115207, "grad_norm": 0.0023850633297115564, "learning_rate": 0.001, "loss": 0.4719, "step": 8842 }, { "epoch": 0.24399783401221645, "grad_norm": 0.0023910165764391422, "learning_rate": 0.001, "loss": 0.3848, "step": 8843 }, { "epoch": 0.24402542621328083, "grad_norm": 0.002412325469776988, "learning_rate": 0.001, "loss": 0.3571, "step": 8844 }, { "epoch": 0.24405301841434518, "grad_norm": 0.003925090655684471, "learning_rate": 0.001, "loss": 0.358, "step": 8845 }, { "epoch": 0.24408061061540956, "grad_norm": 0.002139107324182987, "learning_rate": 0.001, "loss": 0.4108, "step": 8846 }, { "epoch": 0.2441082028164739, "grad_norm": 0.003215159522369504, "learning_rate": 0.001, "loss": 0.3964, "step": 8847 }, { "epoch": 0.2441357950175383, "grad_norm": 0.0029652882367372513, "learning_rate": 0.001, "loss": 0.3926, "step": 8848 }, { "epoch": 0.24416338721860267, "grad_norm": 0.0026338063180446625, "learning_rate": 0.001, "loss": 0.392, "step": 8849 }, { "epoch": 0.24419097941966703, "grad_norm": 0.00244903308339417, "learning_rate": 0.001, "loss": 0.4068, "step": 8850 }, { "epoch": 0.2442185716207314, "grad_norm": 0.002739574061706662, "learning_rate": 0.001, "loss": 0.384, "step": 8851 }, { "epoch": 0.24424616382179576, "grad_norm": 0.003445478854700923, "learning_rate": 0.001, "loss": 0.409, "step": 8852 }, { "epoch": 0.24427375602286014, "grad_norm": 0.0026570416521281004, "learning_rate": 0.001, "loss": 0.4162, "step": 8853 }, { "epoch": 0.24430134822392452, "grad_norm": 0.0024555225390940905, "learning_rate": 0.001, "loss": 0.4071, "step": 8854 }, { "epoch": 0.24432894042498887, "grad_norm": 0.004189019091427326, "learning_rate": 0.001, "loss": 0.3589, "step": 8855 }, { "epoch": 0.24435653262605325, "grad_norm": 0.0035908452700823545, "learning_rate": 0.001, "loss": 0.3956, "step": 8856 }, { "epoch": 0.2443841248271176, "grad_norm": 0.002694958820939064, "learning_rate": 0.001, "loss": 0.3764, "step": 8857 }, { "epoch": 0.24441171702818199, "grad_norm": 0.002945966785773635, "learning_rate": 0.001, "loss": 0.4166, "step": 8858 }, { "epoch": 0.24443930922924637, "grad_norm": 0.002466941252350807, "learning_rate": 0.001, "loss": 0.4007, "step": 8859 }, { "epoch": 0.24446690143031072, "grad_norm": 0.003148929215967655, "learning_rate": 0.001, "loss": 0.4356, "step": 8860 }, { "epoch": 0.2444944936313751, "grad_norm": 0.0029636970721185207, "learning_rate": 0.001, "loss": 0.3543, "step": 8861 }, { "epoch": 0.24452208583243945, "grad_norm": 0.002561190165579319, "learning_rate": 0.001, "loss": 0.3965, "step": 8862 }, { "epoch": 0.24454967803350383, "grad_norm": 0.0031816980335861444, "learning_rate": 0.001, "loss": 0.3796, "step": 8863 }, { "epoch": 0.2445772702345682, "grad_norm": 0.0030445698648691177, "learning_rate": 0.001, "loss": 0.4252, "step": 8864 }, { "epoch": 0.24460486243563256, "grad_norm": 0.003067183308303356, "learning_rate": 0.001, "loss": 0.3822, "step": 8865 }, { "epoch": 0.24463245463669694, "grad_norm": 0.002353479852899909, "learning_rate": 0.001, "loss": 0.3911, "step": 8866 }, { "epoch": 0.2446600468377613, "grad_norm": 0.0058910418301820755, "learning_rate": 0.001, "loss": 0.4077, "step": 8867 }, { "epoch": 0.24468763903882568, "grad_norm": 0.003310910891741514, "learning_rate": 0.001, "loss": 0.38, "step": 8868 }, { "epoch": 0.24471523123989006, "grad_norm": 0.003525955369696021, "learning_rate": 0.001, "loss": 0.4132, "step": 8869 }, { "epoch": 0.2447428234409544, "grad_norm": 0.00252323760651052, "learning_rate": 0.001, "loss": 0.3562, "step": 8870 }, { "epoch": 0.2447704156420188, "grad_norm": 0.0025553128216415644, "learning_rate": 0.001, "loss": 0.41, "step": 8871 }, { "epoch": 0.24479800784308314, "grad_norm": 0.003628962906077504, "learning_rate": 0.001, "loss": 0.4232, "step": 8872 }, { "epoch": 0.24482560004414752, "grad_norm": 0.00299929385073483, "learning_rate": 0.001, "loss": 0.384, "step": 8873 }, { "epoch": 0.2448531922452119, "grad_norm": 0.004137910902500153, "learning_rate": 0.001, "loss": 0.3851, "step": 8874 }, { "epoch": 0.24488078444627626, "grad_norm": 0.0027973924297839403, "learning_rate": 0.001, "loss": 0.3781, "step": 8875 }, { "epoch": 0.24490837664734064, "grad_norm": 0.0029198199044913054, "learning_rate": 0.001, "loss": 0.3936, "step": 8876 }, { "epoch": 0.244935968848405, "grad_norm": 0.0027383132837712765, "learning_rate": 0.001, "loss": 0.4067, "step": 8877 }, { "epoch": 0.24496356104946937, "grad_norm": 0.00903650838881731, "learning_rate": 0.001, "loss": 0.3843, "step": 8878 }, { "epoch": 0.24499115325053375, "grad_norm": 0.01571609638631344, "learning_rate": 0.001, "loss": 0.3934, "step": 8879 }, { "epoch": 0.2450187454515981, "grad_norm": 0.0059790583327412605, "learning_rate": 0.001, "loss": 0.372, "step": 8880 }, { "epoch": 0.24504633765266248, "grad_norm": 0.0021693052258342505, "learning_rate": 0.001, "loss": 0.4197, "step": 8881 }, { "epoch": 0.24507392985372684, "grad_norm": 0.002195873064920306, "learning_rate": 0.001, "loss": 0.4027, "step": 8882 }, { "epoch": 0.24510152205479122, "grad_norm": 0.0032636993564665318, "learning_rate": 0.001, "loss": 0.4107, "step": 8883 }, { "epoch": 0.2451291142558556, "grad_norm": 0.005068526603281498, "learning_rate": 0.001, "loss": 0.4195, "step": 8884 }, { "epoch": 0.24515670645691995, "grad_norm": 0.0020894519984722137, "learning_rate": 0.001, "loss": 0.4464, "step": 8885 }, { "epoch": 0.24518429865798433, "grad_norm": 0.0024781054817140102, "learning_rate": 0.001, "loss": 0.3872, "step": 8886 }, { "epoch": 0.24521189085904868, "grad_norm": 0.002742476761341095, "learning_rate": 0.001, "loss": 0.3852, "step": 8887 }, { "epoch": 0.24523948306011306, "grad_norm": 0.0031709850300103426, "learning_rate": 0.001, "loss": 0.3621, "step": 8888 }, { "epoch": 0.24526707526117744, "grad_norm": 0.00277856457978487, "learning_rate": 0.001, "loss": 0.4364, "step": 8889 }, { "epoch": 0.2452946674622418, "grad_norm": 0.004140378907322884, "learning_rate": 0.001, "loss": 0.4148, "step": 8890 }, { "epoch": 0.24532225966330617, "grad_norm": 0.0283675380051136, "learning_rate": 0.001, "loss": 0.3886, "step": 8891 }, { "epoch": 0.24534985186437053, "grad_norm": 0.010619306936860085, "learning_rate": 0.001, "loss": 0.4202, "step": 8892 }, { "epoch": 0.2453774440654349, "grad_norm": 0.0036299237981438637, "learning_rate": 0.001, "loss": 0.4419, "step": 8893 }, { "epoch": 0.24540503626649926, "grad_norm": 0.00281274551525712, "learning_rate": 0.001, "loss": 0.4136, "step": 8894 }, { "epoch": 0.24543262846756364, "grad_norm": 0.0032774037681519985, "learning_rate": 0.001, "loss": 0.4124, "step": 8895 }, { "epoch": 0.24546022066862802, "grad_norm": 0.004956527147442102, "learning_rate": 0.001, "loss": 0.3636, "step": 8896 }, { "epoch": 0.24548781286969237, "grad_norm": 0.003114642109721899, "learning_rate": 0.001, "loss": 0.3618, "step": 8897 }, { "epoch": 0.24551540507075675, "grad_norm": 0.012209619395434856, "learning_rate": 0.001, "loss": 0.4098, "step": 8898 }, { "epoch": 0.2455429972718211, "grad_norm": 0.005186257418245077, "learning_rate": 0.001, "loss": 0.3989, "step": 8899 }, { "epoch": 0.2455705894728855, "grad_norm": 0.003014913760125637, "learning_rate": 0.001, "loss": 0.3578, "step": 8900 }, { "epoch": 0.24559818167394987, "grad_norm": 0.0029307794757187366, "learning_rate": 0.001, "loss": 0.3941, "step": 8901 }, { "epoch": 0.24562577387501422, "grad_norm": 0.0025628958828747272, "learning_rate": 0.001, "loss": 0.3884, "step": 8902 }, { "epoch": 0.2456533660760786, "grad_norm": 0.0026851852890104055, "learning_rate": 0.001, "loss": 0.4178, "step": 8903 }, { "epoch": 0.24568095827714295, "grad_norm": 0.0029029424767941236, "learning_rate": 0.001, "loss": 0.4017, "step": 8904 }, { "epoch": 0.24570855047820733, "grad_norm": 0.0035970478784292936, "learning_rate": 0.001, "loss": 0.3955, "step": 8905 }, { "epoch": 0.2457361426792717, "grad_norm": 0.006312237121164799, "learning_rate": 0.001, "loss": 0.4197, "step": 8906 }, { "epoch": 0.24576373488033607, "grad_norm": 0.005121266935020685, "learning_rate": 0.001, "loss": 0.4264, "step": 8907 }, { "epoch": 0.24579132708140045, "grad_norm": 0.003577583469450474, "learning_rate": 0.001, "loss": 0.3669, "step": 8908 }, { "epoch": 0.2458189192824648, "grad_norm": 0.0027252668514847755, "learning_rate": 0.001, "loss": 0.3777, "step": 8909 }, { "epoch": 0.24584651148352918, "grad_norm": 0.0026001029182225466, "learning_rate": 0.001, "loss": 0.3571, "step": 8910 }, { "epoch": 0.24587410368459356, "grad_norm": 0.0025208101142197847, "learning_rate": 0.001, "loss": 0.4442, "step": 8911 }, { "epoch": 0.2459016958856579, "grad_norm": 0.004707024432718754, "learning_rate": 0.001, "loss": 0.404, "step": 8912 }, { "epoch": 0.2459292880867223, "grad_norm": 0.0031555844470858574, "learning_rate": 0.001, "loss": 0.3893, "step": 8913 }, { "epoch": 0.24595688028778664, "grad_norm": 0.002120368182659149, "learning_rate": 0.001, "loss": 0.3965, "step": 8914 }, { "epoch": 0.24598447248885102, "grad_norm": 0.002101072110235691, "learning_rate": 0.001, "loss": 0.4142, "step": 8915 }, { "epoch": 0.2460120646899154, "grad_norm": 0.0030265292152762413, "learning_rate": 0.001, "loss": 0.4068, "step": 8916 }, { "epoch": 0.24603965689097976, "grad_norm": 0.0027274913154542446, "learning_rate": 0.001, "loss": 0.3491, "step": 8917 }, { "epoch": 0.24606724909204414, "grad_norm": 0.0028532680589705706, "learning_rate": 0.001, "loss": 0.3599, "step": 8918 }, { "epoch": 0.2460948412931085, "grad_norm": 0.003202751511707902, "learning_rate": 0.001, "loss": 0.4211, "step": 8919 }, { "epoch": 0.24612243349417287, "grad_norm": 0.00257576210424304, "learning_rate": 0.001, "loss": 0.4007, "step": 8920 }, { "epoch": 0.24615002569523725, "grad_norm": 0.002339770086109638, "learning_rate": 0.001, "loss": 0.4208, "step": 8921 }, { "epoch": 0.2461776178963016, "grad_norm": 0.0026211580261588097, "learning_rate": 0.001, "loss": 0.4038, "step": 8922 }, { "epoch": 0.24620521009736598, "grad_norm": 0.002310456009581685, "learning_rate": 0.001, "loss": 0.4027, "step": 8923 }, { "epoch": 0.24623280229843034, "grad_norm": 0.0026841405779123306, "learning_rate": 0.001, "loss": 0.3782, "step": 8924 }, { "epoch": 0.24626039449949472, "grad_norm": 0.0026302135083824396, "learning_rate": 0.001, "loss": 0.3994, "step": 8925 }, { "epoch": 0.2462879867005591, "grad_norm": 0.0022160590160638094, "learning_rate": 0.001, "loss": 0.4267, "step": 8926 }, { "epoch": 0.24631557890162345, "grad_norm": 0.0031570009887218475, "learning_rate": 0.001, "loss": 0.3834, "step": 8927 }, { "epoch": 0.24634317110268783, "grad_norm": 0.0042785764671862125, "learning_rate": 0.001, "loss": 0.3567, "step": 8928 }, { "epoch": 0.24637076330375218, "grad_norm": 0.00619478989392519, "learning_rate": 0.001, "loss": 0.3987, "step": 8929 }, { "epoch": 0.24639835550481656, "grad_norm": 0.0054093278013169765, "learning_rate": 0.001, "loss": 0.4192, "step": 8930 }, { "epoch": 0.24642594770588094, "grad_norm": 0.004795154556632042, "learning_rate": 0.001, "loss": 0.3968, "step": 8931 }, { "epoch": 0.2464535399069453, "grad_norm": 0.003340116934850812, "learning_rate": 0.001, "loss": 0.4441, "step": 8932 }, { "epoch": 0.24648113210800968, "grad_norm": 0.0024299235083162785, "learning_rate": 0.001, "loss": 0.4043, "step": 8933 }, { "epoch": 0.24650872430907403, "grad_norm": 0.0033976933918893337, "learning_rate": 0.001, "loss": 0.4082, "step": 8934 }, { "epoch": 0.2465363165101384, "grad_norm": 0.004061064217239618, "learning_rate": 0.001, "loss": 0.4087, "step": 8935 }, { "epoch": 0.2465639087112028, "grad_norm": 0.002939441241323948, "learning_rate": 0.001, "loss": 0.3877, "step": 8936 }, { "epoch": 0.24659150091226714, "grad_norm": 0.003068729070946574, "learning_rate": 0.001, "loss": 0.4232, "step": 8937 }, { "epoch": 0.24661909311333152, "grad_norm": 0.003941199276596308, "learning_rate": 0.001, "loss": 0.4244, "step": 8938 }, { "epoch": 0.24664668531439587, "grad_norm": 0.0029029848519712687, "learning_rate": 0.001, "loss": 0.4147, "step": 8939 }, { "epoch": 0.24667427751546026, "grad_norm": 0.0041121323592960835, "learning_rate": 0.001, "loss": 0.4205, "step": 8940 }, { "epoch": 0.24670186971652464, "grad_norm": 0.002195686800405383, "learning_rate": 0.001, "loss": 0.4237, "step": 8941 }, { "epoch": 0.246729461917589, "grad_norm": 0.00511584896594286, "learning_rate": 0.001, "loss": 0.4068, "step": 8942 }, { "epoch": 0.24675705411865337, "grad_norm": 0.0033936495892703533, "learning_rate": 0.001, "loss": 0.4118, "step": 8943 }, { "epoch": 0.24678464631971772, "grad_norm": 0.002642909763380885, "learning_rate": 0.001, "loss": 0.3948, "step": 8944 }, { "epoch": 0.2468122385207821, "grad_norm": 0.007020313758403063, "learning_rate": 0.001, "loss": 0.3746, "step": 8945 }, { "epoch": 0.24683983072184648, "grad_norm": 0.0026212832890450954, "learning_rate": 0.001, "loss": 0.4017, "step": 8946 }, { "epoch": 0.24686742292291083, "grad_norm": 0.00335301854647696, "learning_rate": 0.001, "loss": 0.3873, "step": 8947 }, { "epoch": 0.24689501512397521, "grad_norm": 0.004139469005167484, "learning_rate": 0.001, "loss": 0.4164, "step": 8948 }, { "epoch": 0.24692260732503957, "grad_norm": 0.004907173104584217, "learning_rate": 0.001, "loss": 0.4066, "step": 8949 }, { "epoch": 0.24695019952610395, "grad_norm": 0.0036863782443106174, "learning_rate": 0.001, "loss": 0.4069, "step": 8950 }, { "epoch": 0.24697779172716833, "grad_norm": 0.004586333874613047, "learning_rate": 0.001, "loss": 0.3842, "step": 8951 }, { "epoch": 0.24700538392823268, "grad_norm": 0.002670851768925786, "learning_rate": 0.001, "loss": 0.3911, "step": 8952 }, { "epoch": 0.24703297612929706, "grad_norm": 0.003045836230739951, "learning_rate": 0.001, "loss": 0.3962, "step": 8953 }, { "epoch": 0.2470605683303614, "grad_norm": 0.004562459886074066, "learning_rate": 0.001, "loss": 0.3694, "step": 8954 }, { "epoch": 0.2470881605314258, "grad_norm": 0.003939764108508825, "learning_rate": 0.001, "loss": 0.3917, "step": 8955 }, { "epoch": 0.24711575273249017, "grad_norm": 0.00309895072132349, "learning_rate": 0.001, "loss": 0.3878, "step": 8956 }, { "epoch": 0.24714334493355453, "grad_norm": 0.00386620219796896, "learning_rate": 0.001, "loss": 0.3703, "step": 8957 }, { "epoch": 0.2471709371346189, "grad_norm": 0.01034584641456604, "learning_rate": 0.001, "loss": 0.403, "step": 8958 }, { "epoch": 0.24719852933568326, "grad_norm": 0.002349577145650983, "learning_rate": 0.001, "loss": 0.3831, "step": 8959 }, { "epoch": 0.24722612153674764, "grad_norm": 0.003580132033675909, "learning_rate": 0.001, "loss": 0.3947, "step": 8960 }, { "epoch": 0.24725371373781202, "grad_norm": 0.0027131785172969103, "learning_rate": 0.001, "loss": 0.3968, "step": 8961 }, { "epoch": 0.24728130593887637, "grad_norm": 0.0030202209018170834, "learning_rate": 0.001, "loss": 0.3688, "step": 8962 }, { "epoch": 0.24730889813994075, "grad_norm": 0.002842100802809, "learning_rate": 0.001, "loss": 0.3903, "step": 8963 }, { "epoch": 0.2473364903410051, "grad_norm": 0.002618222264572978, "learning_rate": 0.001, "loss": 0.4299, "step": 8964 }, { "epoch": 0.24736408254206949, "grad_norm": 0.0024017065297812223, "learning_rate": 0.001, "loss": 0.37, "step": 8965 }, { "epoch": 0.24739167474313387, "grad_norm": 0.0025306292809545994, "learning_rate": 0.001, "loss": 0.4096, "step": 8966 }, { "epoch": 0.24741926694419822, "grad_norm": 0.004213306587189436, "learning_rate": 0.001, "loss": 0.4261, "step": 8967 }, { "epoch": 0.2474468591452626, "grad_norm": 0.0023831671569496393, "learning_rate": 0.001, "loss": 0.4304, "step": 8968 }, { "epoch": 0.24747445134632695, "grad_norm": 0.002815242623910308, "learning_rate": 0.001, "loss": 0.4499, "step": 8969 }, { "epoch": 0.24750204354739133, "grad_norm": 0.0026342514902353287, "learning_rate": 0.001, "loss": 0.3721, "step": 8970 }, { "epoch": 0.2475296357484557, "grad_norm": 0.004273373633623123, "learning_rate": 0.001, "loss": 0.3637, "step": 8971 }, { "epoch": 0.24755722794952006, "grad_norm": 0.003125197486951947, "learning_rate": 0.001, "loss": 0.3989, "step": 8972 }, { "epoch": 0.24758482015058444, "grad_norm": 0.004526606295257807, "learning_rate": 0.001, "loss": 0.3571, "step": 8973 }, { "epoch": 0.2476124123516488, "grad_norm": 0.0031581746879965067, "learning_rate": 0.001, "loss": 0.3923, "step": 8974 }, { "epoch": 0.24764000455271318, "grad_norm": 0.0028788463678210974, "learning_rate": 0.001, "loss": 0.364, "step": 8975 }, { "epoch": 0.24766759675377756, "grad_norm": 0.004946670029312372, "learning_rate": 0.001, "loss": 0.4213, "step": 8976 }, { "epoch": 0.2476951889548419, "grad_norm": 0.004649583250284195, "learning_rate": 0.001, "loss": 0.419, "step": 8977 }, { "epoch": 0.2477227811559063, "grad_norm": 0.0023594009689986706, "learning_rate": 0.001, "loss": 0.407, "step": 8978 }, { "epoch": 0.24775037335697064, "grad_norm": 0.0028385408222675323, "learning_rate": 0.001, "loss": 0.3887, "step": 8979 }, { "epoch": 0.24777796555803502, "grad_norm": 0.0029238059651106596, "learning_rate": 0.001, "loss": 0.3924, "step": 8980 }, { "epoch": 0.2478055577590994, "grad_norm": 0.002410429297015071, "learning_rate": 0.001, "loss": 0.4243, "step": 8981 }, { "epoch": 0.24783314996016376, "grad_norm": 0.0036725676618516445, "learning_rate": 0.001, "loss": 0.4202, "step": 8982 }, { "epoch": 0.24786074216122814, "grad_norm": 0.0026173696387559175, "learning_rate": 0.001, "loss": 0.4119, "step": 8983 }, { "epoch": 0.2478883343622925, "grad_norm": 0.0019191295141354203, "learning_rate": 0.001, "loss": 0.411, "step": 8984 }, { "epoch": 0.24791592656335687, "grad_norm": 0.003160592168569565, "learning_rate": 0.001, "loss": 0.3958, "step": 8985 }, { "epoch": 0.24794351876442125, "grad_norm": 0.002665070816874504, "learning_rate": 0.001, "loss": 0.3889, "step": 8986 }, { "epoch": 0.2479711109654856, "grad_norm": 0.0035125231370329857, "learning_rate": 0.001, "loss": 0.3749, "step": 8987 }, { "epoch": 0.24799870316654998, "grad_norm": 0.0034389530774205923, "learning_rate": 0.001, "loss": 0.3914, "step": 8988 }, { "epoch": 0.24802629536761434, "grad_norm": 0.00577655341476202, "learning_rate": 0.001, "loss": 0.4068, "step": 8989 }, { "epoch": 0.24805388756867872, "grad_norm": 0.002529504243284464, "learning_rate": 0.001, "loss": 0.4043, "step": 8990 }, { "epoch": 0.24808147976974307, "grad_norm": 0.003672944149002433, "learning_rate": 0.001, "loss": 0.3858, "step": 8991 }, { "epoch": 0.24810907197080745, "grad_norm": 0.004433492664247751, "learning_rate": 0.001, "loss": 0.3826, "step": 8992 }, { "epoch": 0.24813666417187183, "grad_norm": 0.0036997883580625057, "learning_rate": 0.001, "loss": 0.4014, "step": 8993 }, { "epoch": 0.24816425637293618, "grad_norm": 0.0071926964446902275, "learning_rate": 0.001, "loss": 0.4054, "step": 8994 }, { "epoch": 0.24819184857400056, "grad_norm": 0.002511198166757822, "learning_rate": 0.001, "loss": 0.413, "step": 8995 }, { "epoch": 0.24821944077506491, "grad_norm": 0.0027406965382397175, "learning_rate": 0.001, "loss": 0.4401, "step": 8996 }, { "epoch": 0.2482470329761293, "grad_norm": 0.0022513847798109055, "learning_rate": 0.001, "loss": 0.4076, "step": 8997 }, { "epoch": 0.24827462517719368, "grad_norm": 0.011038105934858322, "learning_rate": 0.001, "loss": 0.3784, "step": 8998 }, { "epoch": 0.24830221737825803, "grad_norm": 0.002501958515495062, "learning_rate": 0.001, "loss": 0.3689, "step": 8999 }, { "epoch": 0.2483298095793224, "grad_norm": 0.0029480638913810253, "learning_rate": 0.001, "loss": 0.406, "step": 9000 }, { "epoch": 0.2483298095793224, "eval_runtime": 24.7639, "eval_samples_per_second": 1.292, "eval_steps_per_second": 0.162, "step": 9000 }, { "epoch": 0.24835740178038676, "grad_norm": 0.0028752442449331284, "learning_rate": 0.001, "loss": 0.4275, "step": 9001 }, { "epoch": 0.24838499398145114, "grad_norm": 0.002456225221976638, "learning_rate": 0.001, "loss": 0.4138, "step": 9002 }, { "epoch": 0.24841258618251552, "grad_norm": 0.0035426050890237093, "learning_rate": 0.001, "loss": 0.385, "step": 9003 }, { "epoch": 0.24844017838357987, "grad_norm": 0.0030952002853155136, "learning_rate": 0.001, "loss": 0.3833, "step": 9004 }, { "epoch": 0.24846777058464425, "grad_norm": 0.004819031804800034, "learning_rate": 0.001, "loss": 0.4016, "step": 9005 }, { "epoch": 0.2484953627857086, "grad_norm": 0.007780123967677355, "learning_rate": 0.001, "loss": 0.3896, "step": 9006 }, { "epoch": 0.248522954986773, "grad_norm": 0.005544982384890318, "learning_rate": 0.001, "loss": 0.3812, "step": 9007 }, { "epoch": 0.24855054718783737, "grad_norm": 0.00238407077267766, "learning_rate": 0.001, "loss": 0.4002, "step": 9008 }, { "epoch": 0.24857813938890172, "grad_norm": 0.0024830265901982784, "learning_rate": 0.001, "loss": 0.3894, "step": 9009 }, { "epoch": 0.2486057315899661, "grad_norm": 0.001999202184379101, "learning_rate": 0.001, "loss": 0.4219, "step": 9010 }, { "epoch": 0.24863332379103045, "grad_norm": 0.0038518370129168034, "learning_rate": 0.001, "loss": 0.39, "step": 9011 }, { "epoch": 0.24866091599209483, "grad_norm": 0.0025100288912653923, "learning_rate": 0.001, "loss": 0.4523, "step": 9012 }, { "epoch": 0.2486885081931592, "grad_norm": 0.0034927844535559416, "learning_rate": 0.001, "loss": 0.3875, "step": 9013 }, { "epoch": 0.24871610039422357, "grad_norm": 0.0020984727889299393, "learning_rate": 0.001, "loss": 0.4255, "step": 9014 }, { "epoch": 0.24874369259528795, "grad_norm": 0.003706206800416112, "learning_rate": 0.001, "loss": 0.3502, "step": 9015 }, { "epoch": 0.2487712847963523, "grad_norm": 0.002861752174794674, "learning_rate": 0.001, "loss": 0.3768, "step": 9016 }, { "epoch": 0.24879887699741668, "grad_norm": 0.0027587637305259705, "learning_rate": 0.001, "loss": 0.4082, "step": 9017 }, { "epoch": 0.24882646919848106, "grad_norm": 0.002627007896080613, "learning_rate": 0.001, "loss": 0.4274, "step": 9018 }, { "epoch": 0.2488540613995454, "grad_norm": 0.0047552213072776794, "learning_rate": 0.001, "loss": 0.3754, "step": 9019 }, { "epoch": 0.2488816536006098, "grad_norm": 0.002344539389014244, "learning_rate": 0.001, "loss": 0.4069, "step": 9020 }, { "epoch": 0.24890924580167414, "grad_norm": 0.0031450914684683084, "learning_rate": 0.001, "loss": 0.3758, "step": 9021 }, { "epoch": 0.24893683800273853, "grad_norm": 0.0026123332791030407, "learning_rate": 0.001, "loss": 0.4125, "step": 9022 }, { "epoch": 0.2489644302038029, "grad_norm": 0.005729448515921831, "learning_rate": 0.001, "loss": 0.405, "step": 9023 }, { "epoch": 0.24899202240486726, "grad_norm": 0.003826259169727564, "learning_rate": 0.001, "loss": 0.4105, "step": 9024 }, { "epoch": 0.24901961460593164, "grad_norm": 0.005589526146650314, "learning_rate": 0.001, "loss": 0.3767, "step": 9025 }, { "epoch": 0.249047206806996, "grad_norm": 0.003237026045098901, "learning_rate": 0.001, "loss": 0.3738, "step": 9026 }, { "epoch": 0.24907479900806037, "grad_norm": 0.0024905288591980934, "learning_rate": 0.001, "loss": 0.418, "step": 9027 }, { "epoch": 0.24910239120912475, "grad_norm": 0.0029746349900960922, "learning_rate": 0.001, "loss": 0.437, "step": 9028 }, { "epoch": 0.2491299834101891, "grad_norm": 0.003939430229365826, "learning_rate": 0.001, "loss": 0.3604, "step": 9029 }, { "epoch": 0.24915757561125348, "grad_norm": 0.0025192126631736755, "learning_rate": 0.001, "loss": 0.4166, "step": 9030 }, { "epoch": 0.24918516781231784, "grad_norm": 0.0019102268852293491, "learning_rate": 0.001, "loss": 0.4205, "step": 9031 }, { "epoch": 0.24921276001338222, "grad_norm": 0.008570538833737373, "learning_rate": 0.001, "loss": 0.3891, "step": 9032 }, { "epoch": 0.2492403522144466, "grad_norm": 0.0023212565574795008, "learning_rate": 0.001, "loss": 0.4246, "step": 9033 }, { "epoch": 0.24926794441551095, "grad_norm": 0.0023950086906552315, "learning_rate": 0.001, "loss": 0.3955, "step": 9034 }, { "epoch": 0.24929553661657533, "grad_norm": 0.002595582278445363, "learning_rate": 0.001, "loss": 0.3921, "step": 9035 }, { "epoch": 0.24932312881763968, "grad_norm": 0.012406972236931324, "learning_rate": 0.001, "loss": 0.3759, "step": 9036 }, { "epoch": 0.24935072101870406, "grad_norm": 0.0029333438724279404, "learning_rate": 0.001, "loss": 0.3703, "step": 9037 }, { "epoch": 0.24937831321976844, "grad_norm": 0.004842236638069153, "learning_rate": 0.001, "loss": 0.3641, "step": 9038 }, { "epoch": 0.2494059054208328, "grad_norm": 0.0062524196691811085, "learning_rate": 0.001, "loss": 0.397, "step": 9039 }, { "epoch": 0.24943349762189718, "grad_norm": 0.004324330948293209, "learning_rate": 0.001, "loss": 0.4325, "step": 9040 }, { "epoch": 0.24946108982296153, "grad_norm": 0.0026493435725569725, "learning_rate": 0.001, "loss": 0.4121, "step": 9041 }, { "epoch": 0.2494886820240259, "grad_norm": 0.003301947610452771, "learning_rate": 0.001, "loss": 0.3975, "step": 9042 }, { "epoch": 0.2495162742250903, "grad_norm": 0.0035235814284533262, "learning_rate": 0.001, "loss": 0.3928, "step": 9043 }, { "epoch": 0.24954386642615464, "grad_norm": 0.002071363152936101, "learning_rate": 0.001, "loss": 0.3916, "step": 9044 }, { "epoch": 0.24957145862721902, "grad_norm": 0.0021638255566358566, "learning_rate": 0.001, "loss": 0.3837, "step": 9045 }, { "epoch": 0.24959905082828338, "grad_norm": 0.0027805992867797613, "learning_rate": 0.001, "loss": 0.3732, "step": 9046 }, { "epoch": 0.24962664302934776, "grad_norm": 0.004270479548722506, "learning_rate": 0.001, "loss": 0.3954, "step": 9047 }, { "epoch": 0.24965423523041214, "grad_norm": 0.002844201633706689, "learning_rate": 0.001, "loss": 0.4189, "step": 9048 }, { "epoch": 0.2496818274314765, "grad_norm": 0.0030719267670065165, "learning_rate": 0.001, "loss": 0.3851, "step": 9049 }, { "epoch": 0.24970941963254087, "grad_norm": 0.008080038242042065, "learning_rate": 0.001, "loss": 0.3994, "step": 9050 }, { "epoch": 0.24973701183360522, "grad_norm": 0.0022514360025525093, "learning_rate": 0.001, "loss": 0.4391, "step": 9051 }, { "epoch": 0.2497646040346696, "grad_norm": 0.0026551985647529364, "learning_rate": 0.001, "loss": 0.4058, "step": 9052 }, { "epoch": 0.24979219623573398, "grad_norm": 0.002240521600469947, "learning_rate": 0.001, "loss": 0.3958, "step": 9053 }, { "epoch": 0.24981978843679833, "grad_norm": 0.0024243989028036594, "learning_rate": 0.001, "loss": 0.3804, "step": 9054 }, { "epoch": 0.24984738063786271, "grad_norm": 0.004180778283625841, "learning_rate": 0.001, "loss": 0.4338, "step": 9055 }, { "epoch": 0.24987497283892707, "grad_norm": 0.003084244905039668, "learning_rate": 0.001, "loss": 0.4469, "step": 9056 }, { "epoch": 0.24990256503999145, "grad_norm": 0.0029490680899471045, "learning_rate": 0.001, "loss": 0.4049, "step": 9057 }, { "epoch": 0.24993015724105583, "grad_norm": 0.002250351244583726, "learning_rate": 0.001, "loss": 0.4503, "step": 9058 }, { "epoch": 0.24995774944212018, "grad_norm": 0.0033541766460984945, "learning_rate": 0.001, "loss": 0.3971, "step": 9059 }, { "epoch": 0.24998534164318456, "grad_norm": 0.005710989236831665, "learning_rate": 0.001, "loss": 0.3707, "step": 9060 }, { "epoch": 0.25001293384424894, "grad_norm": 0.002218688605353236, "learning_rate": 0.001, "loss": 0.4303, "step": 9061 }, { "epoch": 0.2500405260453133, "grad_norm": 0.014134782366454601, "learning_rate": 0.001, "loss": 0.3534, "step": 9062 }, { "epoch": 0.25006811824637765, "grad_norm": 0.003705043811351061, "learning_rate": 0.001, "loss": 0.4308, "step": 9063 }, { "epoch": 0.25009571044744205, "grad_norm": 0.0030994920525699854, "learning_rate": 0.001, "loss": 0.4176, "step": 9064 }, { "epoch": 0.2501233026485064, "grad_norm": 0.002610408468171954, "learning_rate": 0.001, "loss": 0.3696, "step": 9065 }, { "epoch": 0.25015089484957076, "grad_norm": 0.0018913348903879523, "learning_rate": 0.001, "loss": 0.4023, "step": 9066 }, { "epoch": 0.2501784870506351, "grad_norm": 0.002292625606060028, "learning_rate": 0.001, "loss": 0.4287, "step": 9067 }, { "epoch": 0.2502060792516995, "grad_norm": 0.0036623626947402954, "learning_rate": 0.001, "loss": 0.3768, "step": 9068 }, { "epoch": 0.2502336714527639, "grad_norm": 0.002552927238866687, "learning_rate": 0.001, "loss": 0.3931, "step": 9069 }, { "epoch": 0.2502612636538282, "grad_norm": 0.005650006700307131, "learning_rate": 0.001, "loss": 0.4034, "step": 9070 }, { "epoch": 0.25028885585489263, "grad_norm": 0.0022689776960760355, "learning_rate": 0.001, "loss": 0.4568, "step": 9071 }, { "epoch": 0.250316448055957, "grad_norm": 0.002632437041029334, "learning_rate": 0.001, "loss": 0.4326, "step": 9072 }, { "epoch": 0.25034404025702134, "grad_norm": 0.004504368640482426, "learning_rate": 0.001, "loss": 0.3648, "step": 9073 }, { "epoch": 0.25037163245808575, "grad_norm": 0.0025573919992893934, "learning_rate": 0.001, "loss": 0.4094, "step": 9074 }, { "epoch": 0.2503992246591501, "grad_norm": 0.00638620974496007, "learning_rate": 0.001, "loss": 0.3792, "step": 9075 }, { "epoch": 0.25042681686021445, "grad_norm": 0.002866227412596345, "learning_rate": 0.001, "loss": 0.4103, "step": 9076 }, { "epoch": 0.2504544090612788, "grad_norm": 0.002649690955877304, "learning_rate": 0.001, "loss": 0.3756, "step": 9077 }, { "epoch": 0.2504820012623432, "grad_norm": 0.002525368705391884, "learning_rate": 0.001, "loss": 0.444, "step": 9078 }, { "epoch": 0.25050959346340756, "grad_norm": 0.008922641165554523, "learning_rate": 0.001, "loss": 0.4392, "step": 9079 }, { "epoch": 0.2505371856644719, "grad_norm": 0.0028822512831538916, "learning_rate": 0.001, "loss": 0.4405, "step": 9080 }, { "epoch": 0.2505647778655363, "grad_norm": 0.0031333775259554386, "learning_rate": 0.001, "loss": 0.3556, "step": 9081 }, { "epoch": 0.2505923700666007, "grad_norm": 0.0029146878514438868, "learning_rate": 0.001, "loss": 0.3944, "step": 9082 }, { "epoch": 0.25061996226766503, "grad_norm": 0.005818530917167664, "learning_rate": 0.001, "loss": 0.4303, "step": 9083 }, { "epoch": 0.25064755446872944, "grad_norm": 0.0026978689711540937, "learning_rate": 0.001, "loss": 0.4441, "step": 9084 }, { "epoch": 0.2506751466697938, "grad_norm": 0.005949284881353378, "learning_rate": 0.001, "loss": 0.3697, "step": 9085 }, { "epoch": 0.25070273887085814, "grad_norm": 0.003694251412525773, "learning_rate": 0.001, "loss": 0.3965, "step": 9086 }, { "epoch": 0.2507303310719225, "grad_norm": 0.005075919907540083, "learning_rate": 0.001, "loss": 0.4207, "step": 9087 }, { "epoch": 0.2507579232729869, "grad_norm": 0.002228903118520975, "learning_rate": 0.001, "loss": 0.4303, "step": 9088 }, { "epoch": 0.25078551547405126, "grad_norm": 0.002552257152274251, "learning_rate": 0.001, "loss": 0.4026, "step": 9089 }, { "epoch": 0.2508131076751156, "grad_norm": 0.004208039958029985, "learning_rate": 0.001, "loss": 0.4442, "step": 9090 }, { "epoch": 0.25084069987618, "grad_norm": 0.004520753864198923, "learning_rate": 0.001, "loss": 0.3442, "step": 9091 }, { "epoch": 0.25086829207724437, "grad_norm": 0.002299990737810731, "learning_rate": 0.001, "loss": 0.4183, "step": 9092 }, { "epoch": 0.2508958842783087, "grad_norm": 0.003526929300278425, "learning_rate": 0.001, "loss": 0.3555, "step": 9093 }, { "epoch": 0.25092347647937313, "grad_norm": 0.0026393721345812082, "learning_rate": 0.001, "loss": 0.4189, "step": 9094 }, { "epoch": 0.2509510686804375, "grad_norm": 0.0042528389021754265, "learning_rate": 0.001, "loss": 0.4108, "step": 9095 }, { "epoch": 0.25097866088150184, "grad_norm": 0.004804633557796478, "learning_rate": 0.001, "loss": 0.3721, "step": 9096 }, { "epoch": 0.2510062530825662, "grad_norm": 0.002310951007530093, "learning_rate": 0.001, "loss": 0.3838, "step": 9097 }, { "epoch": 0.2510338452836306, "grad_norm": 0.0024983736220747232, "learning_rate": 0.001, "loss": 0.4178, "step": 9098 }, { "epoch": 0.25106143748469495, "grad_norm": 0.0018971741665154696, "learning_rate": 0.001, "loss": 0.4081, "step": 9099 }, { "epoch": 0.2510890296857593, "grad_norm": 0.002223706105723977, "learning_rate": 0.001, "loss": 0.4055, "step": 9100 }, { "epoch": 0.2511166218868237, "grad_norm": 0.0028201346285641193, "learning_rate": 0.001, "loss": 0.4068, "step": 9101 }, { "epoch": 0.25114421408788806, "grad_norm": 0.0036441651172935963, "learning_rate": 0.001, "loss": 0.3565, "step": 9102 }, { "epoch": 0.2511718062889524, "grad_norm": 0.002532258862629533, "learning_rate": 0.001, "loss": 0.4385, "step": 9103 }, { "epoch": 0.2511993984900168, "grad_norm": 0.0029783290810883045, "learning_rate": 0.001, "loss": 0.3741, "step": 9104 }, { "epoch": 0.2512269906910812, "grad_norm": 0.003864760510623455, "learning_rate": 0.001, "loss": 0.401, "step": 9105 }, { "epoch": 0.25125458289214553, "grad_norm": 0.004010128788650036, "learning_rate": 0.001, "loss": 0.3796, "step": 9106 }, { "epoch": 0.2512821750932099, "grad_norm": 0.004032640252262354, "learning_rate": 0.001, "loss": 0.373, "step": 9107 }, { "epoch": 0.2513097672942743, "grad_norm": 0.003456498496234417, "learning_rate": 0.001, "loss": 0.3834, "step": 9108 }, { "epoch": 0.25133735949533864, "grad_norm": 0.0041527096182107925, "learning_rate": 0.001, "loss": 0.4172, "step": 9109 }, { "epoch": 0.251364951696403, "grad_norm": 0.0033256958704441786, "learning_rate": 0.001, "loss": 0.4157, "step": 9110 }, { "epoch": 0.2513925438974674, "grad_norm": 0.009706409648060799, "learning_rate": 0.001, "loss": 0.3826, "step": 9111 }, { "epoch": 0.25142013609853175, "grad_norm": 0.005892944522202015, "learning_rate": 0.001, "loss": 0.4361, "step": 9112 }, { "epoch": 0.2514477282995961, "grad_norm": 0.0027680047787725925, "learning_rate": 0.001, "loss": 0.4273, "step": 9113 }, { "epoch": 0.2514753205006605, "grad_norm": 0.0046938760206103325, "learning_rate": 0.001, "loss": 0.3873, "step": 9114 }, { "epoch": 0.25150291270172487, "grad_norm": 0.00436344463378191, "learning_rate": 0.001, "loss": 0.4194, "step": 9115 }, { "epoch": 0.2515305049027892, "grad_norm": 0.002917677629739046, "learning_rate": 0.001, "loss": 0.4181, "step": 9116 }, { "epoch": 0.2515580971038536, "grad_norm": 0.0041982559487223625, "learning_rate": 0.001, "loss": 0.3528, "step": 9117 }, { "epoch": 0.251585689304918, "grad_norm": 0.003178415121510625, "learning_rate": 0.001, "loss": 0.3877, "step": 9118 }, { "epoch": 0.25161328150598233, "grad_norm": 0.00351191614754498, "learning_rate": 0.001, "loss": 0.3981, "step": 9119 }, { "epoch": 0.2516408737070467, "grad_norm": 0.002716810442507267, "learning_rate": 0.001, "loss": 0.4092, "step": 9120 }, { "epoch": 0.2516684659081111, "grad_norm": 0.00262506608851254, "learning_rate": 0.001, "loss": 0.4212, "step": 9121 }, { "epoch": 0.25169605810917545, "grad_norm": 0.005152091383934021, "learning_rate": 0.001, "loss": 0.3949, "step": 9122 }, { "epoch": 0.2517236503102398, "grad_norm": 0.003531603142619133, "learning_rate": 0.001, "loss": 0.379, "step": 9123 }, { "epoch": 0.2517512425113042, "grad_norm": 0.0037356463726609945, "learning_rate": 0.001, "loss": 0.4145, "step": 9124 }, { "epoch": 0.25177883471236856, "grad_norm": 0.02328849956393242, "learning_rate": 0.001, "loss": 0.4216, "step": 9125 }, { "epoch": 0.2518064269134329, "grad_norm": 0.0030895329546183348, "learning_rate": 0.001, "loss": 0.3952, "step": 9126 }, { "epoch": 0.25183401911449727, "grad_norm": 0.002636708552017808, "learning_rate": 0.001, "loss": 0.3837, "step": 9127 }, { "epoch": 0.2518616113155617, "grad_norm": 0.0028558552730828524, "learning_rate": 0.001, "loss": 0.3888, "step": 9128 }, { "epoch": 0.251889203516626, "grad_norm": 0.003687650430947542, "learning_rate": 0.001, "loss": 0.4049, "step": 9129 }, { "epoch": 0.2519167957176904, "grad_norm": 0.002363695530220866, "learning_rate": 0.001, "loss": 0.4072, "step": 9130 }, { "epoch": 0.2519443879187548, "grad_norm": 0.0024190808180719614, "learning_rate": 0.001, "loss": 0.4124, "step": 9131 }, { "epoch": 0.25197198011981914, "grad_norm": 0.002729938132688403, "learning_rate": 0.001, "loss": 0.3879, "step": 9132 }, { "epoch": 0.2519995723208835, "grad_norm": 0.003316995920613408, "learning_rate": 0.001, "loss": 0.3628, "step": 9133 }, { "epoch": 0.25202716452194784, "grad_norm": 0.002829955890774727, "learning_rate": 0.001, "loss": 0.3945, "step": 9134 }, { "epoch": 0.25205475672301225, "grad_norm": 0.0023533031344413757, "learning_rate": 0.001, "loss": 0.4032, "step": 9135 }, { "epoch": 0.2520823489240766, "grad_norm": 0.002993488684296608, "learning_rate": 0.001, "loss": 0.4005, "step": 9136 }, { "epoch": 0.25210994112514096, "grad_norm": 0.002113811671733856, "learning_rate": 0.001, "loss": 0.4032, "step": 9137 }, { "epoch": 0.25213753332620537, "grad_norm": 0.00470153009518981, "learning_rate": 0.001, "loss": 0.3624, "step": 9138 }, { "epoch": 0.2521651255272697, "grad_norm": 0.0033217284362763166, "learning_rate": 0.001, "loss": 0.4062, "step": 9139 }, { "epoch": 0.25219271772833407, "grad_norm": 0.0025939196348190308, "learning_rate": 0.001, "loss": 0.4082, "step": 9140 }, { "epoch": 0.2522203099293985, "grad_norm": 0.0032115280628204346, "learning_rate": 0.001, "loss": 0.3932, "step": 9141 }, { "epoch": 0.25224790213046283, "grad_norm": 0.0023531445767730474, "learning_rate": 0.001, "loss": 0.4231, "step": 9142 }, { "epoch": 0.2522754943315272, "grad_norm": 0.0035788915120065212, "learning_rate": 0.001, "loss": 0.386, "step": 9143 }, { "epoch": 0.25230308653259154, "grad_norm": 0.002639509504660964, "learning_rate": 0.001, "loss": 0.4204, "step": 9144 }, { "epoch": 0.25233067873365594, "grad_norm": 0.0037151076830923557, "learning_rate": 0.001, "loss": 0.4048, "step": 9145 }, { "epoch": 0.2523582709347203, "grad_norm": 0.002693182323127985, "learning_rate": 0.001, "loss": 0.4174, "step": 9146 }, { "epoch": 0.25238586313578465, "grad_norm": 0.00298108346760273, "learning_rate": 0.001, "loss": 0.4452, "step": 9147 }, { "epoch": 0.25241345533684906, "grad_norm": 0.005775284022092819, "learning_rate": 0.001, "loss": 0.381, "step": 9148 }, { "epoch": 0.2524410475379134, "grad_norm": 0.004025799687951803, "learning_rate": 0.001, "loss": 0.3968, "step": 9149 }, { "epoch": 0.25246863973897776, "grad_norm": 0.0029659196734428406, "learning_rate": 0.001, "loss": 0.4274, "step": 9150 }, { "epoch": 0.25249623194004217, "grad_norm": 0.006109724286943674, "learning_rate": 0.001, "loss": 0.4322, "step": 9151 }, { "epoch": 0.2525238241411065, "grad_norm": 0.004189697094261646, "learning_rate": 0.001, "loss": 0.3635, "step": 9152 }, { "epoch": 0.2525514163421709, "grad_norm": 0.004084098618477583, "learning_rate": 0.001, "loss": 0.4044, "step": 9153 }, { "epoch": 0.25257900854323523, "grad_norm": 0.0035397496540099382, "learning_rate": 0.001, "loss": 0.4127, "step": 9154 }, { "epoch": 0.25260660074429964, "grad_norm": 0.002793220803141594, "learning_rate": 0.001, "loss": 0.4159, "step": 9155 }, { "epoch": 0.252634192945364, "grad_norm": 0.0030049553606659174, "learning_rate": 0.001, "loss": 0.4367, "step": 9156 }, { "epoch": 0.25266178514642834, "grad_norm": 0.003896713722497225, "learning_rate": 0.001, "loss": 0.3657, "step": 9157 }, { "epoch": 0.25268937734749275, "grad_norm": 0.0028666965663433075, "learning_rate": 0.001, "loss": 0.4524, "step": 9158 }, { "epoch": 0.2527169695485571, "grad_norm": 0.0026138313114643097, "learning_rate": 0.001, "loss": 0.4174, "step": 9159 }, { "epoch": 0.25274456174962145, "grad_norm": 0.0023572929203510284, "learning_rate": 0.001, "loss": 0.4086, "step": 9160 }, { "epoch": 0.25277215395068586, "grad_norm": 0.003126518102362752, "learning_rate": 0.001, "loss": 0.3762, "step": 9161 }, { "epoch": 0.2527997461517502, "grad_norm": 0.0024071959778666496, "learning_rate": 0.001, "loss": 0.4011, "step": 9162 }, { "epoch": 0.25282733835281457, "grad_norm": 0.0023764509242028, "learning_rate": 0.001, "loss": 0.4642, "step": 9163 }, { "epoch": 0.2528549305538789, "grad_norm": 0.002558658830821514, "learning_rate": 0.001, "loss": 0.387, "step": 9164 }, { "epoch": 0.25288252275494333, "grad_norm": 0.004064169712364674, "learning_rate": 0.001, "loss": 0.3882, "step": 9165 }, { "epoch": 0.2529101149560077, "grad_norm": 0.0038082520477473736, "learning_rate": 0.001, "loss": 0.3963, "step": 9166 }, { "epoch": 0.25293770715707203, "grad_norm": 0.002633225405588746, "learning_rate": 0.001, "loss": 0.3788, "step": 9167 }, { "epoch": 0.25296529935813644, "grad_norm": 0.002508282894268632, "learning_rate": 0.001, "loss": 0.3958, "step": 9168 }, { "epoch": 0.2529928915592008, "grad_norm": 0.0038721607998013496, "learning_rate": 0.001, "loss": 0.3782, "step": 9169 }, { "epoch": 0.25302048376026515, "grad_norm": 0.0033644256182014942, "learning_rate": 0.001, "loss": 0.3851, "step": 9170 }, { "epoch": 0.25304807596132955, "grad_norm": 0.002658225130289793, "learning_rate": 0.001, "loss": 0.3945, "step": 9171 }, { "epoch": 0.2530756681623939, "grad_norm": 0.0030783566180616617, "learning_rate": 0.001, "loss": 0.3867, "step": 9172 }, { "epoch": 0.25310326036345826, "grad_norm": 0.0024686295073479414, "learning_rate": 0.001, "loss": 0.4195, "step": 9173 }, { "epoch": 0.2531308525645226, "grad_norm": 0.0030403723940253258, "learning_rate": 0.001, "loss": 0.3803, "step": 9174 }, { "epoch": 0.253158444765587, "grad_norm": 0.0033355674240738153, "learning_rate": 0.001, "loss": 0.4257, "step": 9175 }, { "epoch": 0.2531860369666514, "grad_norm": 0.0023434923496097326, "learning_rate": 0.001, "loss": 0.4126, "step": 9176 }, { "epoch": 0.2532136291677157, "grad_norm": 0.0037734145298600197, "learning_rate": 0.001, "loss": 0.3675, "step": 9177 }, { "epoch": 0.25324122136878013, "grad_norm": 0.0024306117556989193, "learning_rate": 0.001, "loss": 0.3971, "step": 9178 }, { "epoch": 0.2532688135698445, "grad_norm": 0.002472655149176717, "learning_rate": 0.001, "loss": 0.4281, "step": 9179 }, { "epoch": 0.25329640577090884, "grad_norm": 0.0033675595186650753, "learning_rate": 0.001, "loss": 0.3934, "step": 9180 }, { "epoch": 0.25332399797197325, "grad_norm": 0.00425712438300252, "learning_rate": 0.001, "loss": 0.396, "step": 9181 }, { "epoch": 0.2533515901730376, "grad_norm": 0.002620971528813243, "learning_rate": 0.001, "loss": 0.4149, "step": 9182 }, { "epoch": 0.25337918237410195, "grad_norm": 0.002496513072401285, "learning_rate": 0.001, "loss": 0.4068, "step": 9183 }, { "epoch": 0.2534067745751663, "grad_norm": 0.0037882968317717314, "learning_rate": 0.001, "loss": 0.3728, "step": 9184 }, { "epoch": 0.2534343667762307, "grad_norm": 0.0028284622821956873, "learning_rate": 0.001, "loss": 0.3959, "step": 9185 }, { "epoch": 0.25346195897729507, "grad_norm": 0.0031743019353598356, "learning_rate": 0.001, "loss": 0.4024, "step": 9186 }, { "epoch": 0.2534895511783594, "grad_norm": 0.003066749544814229, "learning_rate": 0.001, "loss": 0.371, "step": 9187 }, { "epoch": 0.2535171433794238, "grad_norm": 0.002977808238938451, "learning_rate": 0.001, "loss": 0.4166, "step": 9188 }, { "epoch": 0.2535447355804882, "grad_norm": 0.0039002352859824896, "learning_rate": 0.001, "loss": 0.3788, "step": 9189 }, { "epoch": 0.25357232778155253, "grad_norm": 0.003922617062926292, "learning_rate": 0.001, "loss": 0.4162, "step": 9190 }, { "epoch": 0.25359991998261694, "grad_norm": 0.004719828721135855, "learning_rate": 0.001, "loss": 0.3872, "step": 9191 }, { "epoch": 0.2536275121836813, "grad_norm": 0.0065970043651759624, "learning_rate": 0.001, "loss": 0.4595, "step": 9192 }, { "epoch": 0.25365510438474564, "grad_norm": 0.002771706786006689, "learning_rate": 0.001, "loss": 0.4079, "step": 9193 }, { "epoch": 0.25368269658581, "grad_norm": 0.002968127839267254, "learning_rate": 0.001, "loss": 0.4369, "step": 9194 }, { "epoch": 0.2537102887868744, "grad_norm": 0.002558299573138356, "learning_rate": 0.001, "loss": 0.4117, "step": 9195 }, { "epoch": 0.25373788098793876, "grad_norm": 0.003306181402876973, "learning_rate": 0.001, "loss": 0.3655, "step": 9196 }, { "epoch": 0.2537654731890031, "grad_norm": 0.005295965354889631, "learning_rate": 0.001, "loss": 0.4163, "step": 9197 }, { "epoch": 0.2537930653900675, "grad_norm": 0.00258263130672276, "learning_rate": 0.001, "loss": 0.4246, "step": 9198 }, { "epoch": 0.25382065759113187, "grad_norm": 0.003539256053045392, "learning_rate": 0.001, "loss": 0.427, "step": 9199 }, { "epoch": 0.2538482497921962, "grad_norm": 0.004078635014593601, "learning_rate": 0.001, "loss": 0.417, "step": 9200 }, { "epoch": 0.25387584199326063, "grad_norm": 0.003579481039196253, "learning_rate": 0.001, "loss": 0.3628, "step": 9201 }, { "epoch": 0.253903434194325, "grad_norm": 0.011261108331382275, "learning_rate": 0.001, "loss": 0.3793, "step": 9202 }, { "epoch": 0.25393102639538934, "grad_norm": 0.005823037121444941, "learning_rate": 0.001, "loss": 0.3973, "step": 9203 }, { "epoch": 0.2539586185964537, "grad_norm": 0.0032197630498558283, "learning_rate": 0.001, "loss": 0.4038, "step": 9204 }, { "epoch": 0.2539862107975181, "grad_norm": 0.007648573722690344, "learning_rate": 0.001, "loss": 0.4349, "step": 9205 }, { "epoch": 0.25401380299858245, "grad_norm": 0.0039948225021362305, "learning_rate": 0.001, "loss": 0.3903, "step": 9206 }, { "epoch": 0.2540413951996468, "grad_norm": 0.008907606825232506, "learning_rate": 0.001, "loss": 0.3874, "step": 9207 }, { "epoch": 0.2540689874007112, "grad_norm": 0.0032174009829759598, "learning_rate": 0.001, "loss": 0.3872, "step": 9208 }, { "epoch": 0.25409657960177556, "grad_norm": 0.0036075576208531857, "learning_rate": 0.001, "loss": 0.4109, "step": 9209 }, { "epoch": 0.2541241718028399, "grad_norm": 0.002537550637498498, "learning_rate": 0.001, "loss": 0.3815, "step": 9210 }, { "epoch": 0.2541517640039043, "grad_norm": 0.004352409392595291, "learning_rate": 0.001, "loss": 0.394, "step": 9211 }, { "epoch": 0.2541793562049687, "grad_norm": 0.004635980818420649, "learning_rate": 0.001, "loss": 0.3785, "step": 9212 }, { "epoch": 0.25420694840603303, "grad_norm": 0.0028103957884013653, "learning_rate": 0.001, "loss": 0.3936, "step": 9213 }, { "epoch": 0.2542345406070974, "grad_norm": 0.0023787037935107946, "learning_rate": 0.001, "loss": 0.4018, "step": 9214 }, { "epoch": 0.2542621328081618, "grad_norm": 0.002404349623247981, "learning_rate": 0.001, "loss": 0.4314, "step": 9215 }, { "epoch": 0.25428972500922614, "grad_norm": 0.0025460943579673767, "learning_rate": 0.001, "loss": 0.4137, "step": 9216 }, { "epoch": 0.2543173172102905, "grad_norm": 0.003394089639186859, "learning_rate": 0.001, "loss": 0.3982, "step": 9217 }, { "epoch": 0.2543449094113549, "grad_norm": 0.003400506917387247, "learning_rate": 0.001, "loss": 0.386, "step": 9218 }, { "epoch": 0.25437250161241926, "grad_norm": 0.007282086182385683, "learning_rate": 0.001, "loss": 0.394, "step": 9219 }, { "epoch": 0.2544000938134836, "grad_norm": 0.0025983904488384724, "learning_rate": 0.001, "loss": 0.3971, "step": 9220 }, { "epoch": 0.254427686014548, "grad_norm": 0.0024682951625436544, "learning_rate": 0.001, "loss": 0.4116, "step": 9221 }, { "epoch": 0.25445527821561237, "grad_norm": 0.002498073736205697, "learning_rate": 0.001, "loss": 0.381, "step": 9222 }, { "epoch": 0.2544828704166767, "grad_norm": 0.004814106039702892, "learning_rate": 0.001, "loss": 0.4072, "step": 9223 }, { "epoch": 0.2545104626177411, "grad_norm": 0.004485031124204397, "learning_rate": 0.001, "loss": 0.4057, "step": 9224 }, { "epoch": 0.2545380548188055, "grad_norm": 0.002533281221985817, "learning_rate": 0.001, "loss": 0.4129, "step": 9225 }, { "epoch": 0.25456564701986983, "grad_norm": 0.0037844269536435604, "learning_rate": 0.001, "loss": 0.3937, "step": 9226 }, { "epoch": 0.2545932392209342, "grad_norm": 0.0033858432434499264, "learning_rate": 0.001, "loss": 0.3821, "step": 9227 }, { "epoch": 0.2546208314219986, "grad_norm": 0.006454948335886002, "learning_rate": 0.001, "loss": 0.3779, "step": 9228 }, { "epoch": 0.25464842362306295, "grad_norm": 0.0032489451114088297, "learning_rate": 0.001, "loss": 0.4399, "step": 9229 }, { "epoch": 0.2546760158241273, "grad_norm": 0.0027783990371972322, "learning_rate": 0.001, "loss": 0.4115, "step": 9230 }, { "epoch": 0.25470360802519165, "grad_norm": 0.0032559032551944256, "learning_rate": 0.001, "loss": 0.4134, "step": 9231 }, { "epoch": 0.25473120022625606, "grad_norm": 0.006878309417515993, "learning_rate": 0.001, "loss": 0.4187, "step": 9232 }, { "epoch": 0.2547587924273204, "grad_norm": 0.003954887855798006, "learning_rate": 0.001, "loss": 0.4045, "step": 9233 }, { "epoch": 0.25478638462838477, "grad_norm": 0.0032106926664710045, "learning_rate": 0.001, "loss": 0.4152, "step": 9234 }, { "epoch": 0.2548139768294492, "grad_norm": 0.002547104610130191, "learning_rate": 0.001, "loss": 0.4171, "step": 9235 }, { "epoch": 0.2548415690305135, "grad_norm": 0.00274961581453681, "learning_rate": 0.001, "loss": 0.3988, "step": 9236 }, { "epoch": 0.2548691612315779, "grad_norm": 0.002504730597138405, "learning_rate": 0.001, "loss": 0.3737, "step": 9237 }, { "epoch": 0.2548967534326423, "grad_norm": 0.00348614901304245, "learning_rate": 0.001, "loss": 0.3953, "step": 9238 }, { "epoch": 0.25492434563370664, "grad_norm": 0.0030160502064973116, "learning_rate": 0.001, "loss": 0.4211, "step": 9239 }, { "epoch": 0.254951937834771, "grad_norm": 0.0032628930639475584, "learning_rate": 0.001, "loss": 0.4242, "step": 9240 }, { "epoch": 0.25497953003583534, "grad_norm": 0.0022108100820332766, "learning_rate": 0.001, "loss": 0.4129, "step": 9241 }, { "epoch": 0.25500712223689975, "grad_norm": 0.003723006695508957, "learning_rate": 0.001, "loss": 0.4371, "step": 9242 }, { "epoch": 0.2550347144379641, "grad_norm": 0.00470739183947444, "learning_rate": 0.001, "loss": 0.3936, "step": 9243 }, { "epoch": 0.25506230663902846, "grad_norm": 0.00285819242708385, "learning_rate": 0.001, "loss": 0.4128, "step": 9244 }, { "epoch": 0.25508989884009287, "grad_norm": 0.0027255092281848192, "learning_rate": 0.001, "loss": 0.4063, "step": 9245 }, { "epoch": 0.2551174910411572, "grad_norm": 0.0023286372888833284, "learning_rate": 0.001, "loss": 0.3999, "step": 9246 }, { "epoch": 0.25514508324222157, "grad_norm": 0.006774048320949078, "learning_rate": 0.001, "loss": 0.3874, "step": 9247 }, { "epoch": 0.255172675443286, "grad_norm": 0.003444313770160079, "learning_rate": 0.001, "loss": 0.3923, "step": 9248 }, { "epoch": 0.25520026764435033, "grad_norm": 0.003187762573361397, "learning_rate": 0.001, "loss": 0.4253, "step": 9249 }, { "epoch": 0.2552278598454147, "grad_norm": 0.00499644735828042, "learning_rate": 0.001, "loss": 0.4378, "step": 9250 }, { "epoch": 0.25525545204647904, "grad_norm": 0.00488440552726388, "learning_rate": 0.001, "loss": 0.4275, "step": 9251 }, { "epoch": 0.25528304424754344, "grad_norm": 0.0026155610103160143, "learning_rate": 0.001, "loss": 0.3975, "step": 9252 }, { "epoch": 0.2553106364486078, "grad_norm": 0.0035269884392619133, "learning_rate": 0.001, "loss": 0.406, "step": 9253 }, { "epoch": 0.25533822864967215, "grad_norm": 0.005496086087077856, "learning_rate": 0.001, "loss": 0.3742, "step": 9254 }, { "epoch": 0.25536582085073656, "grad_norm": 0.003939601127058268, "learning_rate": 0.001, "loss": 0.4224, "step": 9255 }, { "epoch": 0.2553934130518009, "grad_norm": 0.0052981991320848465, "learning_rate": 0.001, "loss": 0.3673, "step": 9256 }, { "epoch": 0.25542100525286526, "grad_norm": 0.0025523051153868437, "learning_rate": 0.001, "loss": 0.4095, "step": 9257 }, { "epoch": 0.25544859745392967, "grad_norm": 0.004392458591610193, "learning_rate": 0.001, "loss": 0.4057, "step": 9258 }, { "epoch": 0.255476189654994, "grad_norm": 0.005645066034048796, "learning_rate": 0.001, "loss": 0.3755, "step": 9259 }, { "epoch": 0.2555037818560584, "grad_norm": 0.00333485659211874, "learning_rate": 0.001, "loss": 0.3992, "step": 9260 }, { "epoch": 0.25553137405712273, "grad_norm": 0.003476017387583852, "learning_rate": 0.001, "loss": 0.3854, "step": 9261 }, { "epoch": 0.25555896625818714, "grad_norm": 0.007261955179274082, "learning_rate": 0.001, "loss": 0.4203, "step": 9262 }, { "epoch": 0.2555865584592515, "grad_norm": 0.004596728831529617, "learning_rate": 0.001, "loss": 0.3866, "step": 9263 }, { "epoch": 0.25561415066031584, "grad_norm": 0.0033560562878847122, "learning_rate": 0.001, "loss": 0.3764, "step": 9264 }, { "epoch": 0.25564174286138025, "grad_norm": 0.003836827352643013, "learning_rate": 0.001, "loss": 0.3753, "step": 9265 }, { "epoch": 0.2556693350624446, "grad_norm": 0.0031401992309838533, "learning_rate": 0.001, "loss": 0.4131, "step": 9266 }, { "epoch": 0.25569692726350896, "grad_norm": 0.003163114422932267, "learning_rate": 0.001, "loss": 0.3708, "step": 9267 }, { "epoch": 0.25572451946457336, "grad_norm": 0.002614476252347231, "learning_rate": 0.001, "loss": 0.4264, "step": 9268 }, { "epoch": 0.2557521116656377, "grad_norm": 0.0029321182519197464, "learning_rate": 0.001, "loss": 0.4019, "step": 9269 }, { "epoch": 0.25577970386670207, "grad_norm": 0.0028678993694484234, "learning_rate": 0.001, "loss": 0.3902, "step": 9270 }, { "epoch": 0.2558072960677664, "grad_norm": 0.0034806549083441496, "learning_rate": 0.001, "loss": 0.4039, "step": 9271 }, { "epoch": 0.25583488826883083, "grad_norm": 0.0030649881809949875, "learning_rate": 0.001, "loss": 0.3957, "step": 9272 }, { "epoch": 0.2558624804698952, "grad_norm": 0.004192110616713762, "learning_rate": 0.001, "loss": 0.3838, "step": 9273 }, { "epoch": 0.25589007267095953, "grad_norm": 0.0024996623396873474, "learning_rate": 0.001, "loss": 0.3587, "step": 9274 }, { "epoch": 0.25591766487202394, "grad_norm": 0.002356313867494464, "learning_rate": 0.001, "loss": 0.4042, "step": 9275 }, { "epoch": 0.2559452570730883, "grad_norm": 0.0032441243529319763, "learning_rate": 0.001, "loss": 0.4102, "step": 9276 }, { "epoch": 0.25597284927415265, "grad_norm": 0.003050130559131503, "learning_rate": 0.001, "loss": 0.3998, "step": 9277 }, { "epoch": 0.25600044147521706, "grad_norm": 0.0028984618838876486, "learning_rate": 0.001, "loss": 0.4252, "step": 9278 }, { "epoch": 0.2560280336762814, "grad_norm": 0.0024825683794915676, "learning_rate": 0.001, "loss": 0.4067, "step": 9279 }, { "epoch": 0.25605562587734576, "grad_norm": 0.007617509458214045, "learning_rate": 0.001, "loss": 0.3855, "step": 9280 }, { "epoch": 0.2560832180784101, "grad_norm": 0.002734134206548333, "learning_rate": 0.001, "loss": 0.411, "step": 9281 }, { "epoch": 0.2561108102794745, "grad_norm": 0.0032497511710971594, "learning_rate": 0.001, "loss": 0.3858, "step": 9282 }, { "epoch": 0.2561384024805389, "grad_norm": 0.0038412590511143208, "learning_rate": 0.001, "loss": 0.4383, "step": 9283 }, { "epoch": 0.2561659946816032, "grad_norm": 0.002717244438827038, "learning_rate": 0.001, "loss": 0.3815, "step": 9284 }, { "epoch": 0.25619358688266763, "grad_norm": 0.002895118435844779, "learning_rate": 0.001, "loss": 0.4379, "step": 9285 }, { "epoch": 0.256221179083732, "grad_norm": 0.0027081493753939867, "learning_rate": 0.001, "loss": 0.3759, "step": 9286 }, { "epoch": 0.25624877128479634, "grad_norm": 0.002953689079731703, "learning_rate": 0.001, "loss": 0.4152, "step": 9287 }, { "epoch": 0.25627636348586075, "grad_norm": 0.0018930652877315879, "learning_rate": 0.001, "loss": 0.4461, "step": 9288 }, { "epoch": 0.2563039556869251, "grad_norm": 0.0027619446627795696, "learning_rate": 0.001, "loss": 0.393, "step": 9289 }, { "epoch": 0.25633154788798945, "grad_norm": 0.0028445671778172255, "learning_rate": 0.001, "loss": 0.3958, "step": 9290 }, { "epoch": 0.2563591400890538, "grad_norm": 0.004813406616449356, "learning_rate": 0.001, "loss": 0.3977, "step": 9291 }, { "epoch": 0.2563867322901182, "grad_norm": 0.0027178998570889235, "learning_rate": 0.001, "loss": 0.3842, "step": 9292 }, { "epoch": 0.25641432449118257, "grad_norm": 0.0027016105595976114, "learning_rate": 0.001, "loss": 0.409, "step": 9293 }, { "epoch": 0.2564419166922469, "grad_norm": 0.0038917434867471457, "learning_rate": 0.001, "loss": 0.391, "step": 9294 }, { "epoch": 0.2564695088933113, "grad_norm": 0.004160382319241762, "learning_rate": 0.001, "loss": 0.3978, "step": 9295 }, { "epoch": 0.2564971010943757, "grad_norm": 0.004111155401915312, "learning_rate": 0.001, "loss": 0.3929, "step": 9296 }, { "epoch": 0.25652469329544003, "grad_norm": 0.00411924347281456, "learning_rate": 0.001, "loss": 0.3913, "step": 9297 }, { "epoch": 0.25655228549650444, "grad_norm": 0.0026549468748271465, "learning_rate": 0.001, "loss": 0.4281, "step": 9298 }, { "epoch": 0.2565798776975688, "grad_norm": 0.004998568445444107, "learning_rate": 0.001, "loss": 0.3905, "step": 9299 }, { "epoch": 0.25660746989863314, "grad_norm": 0.002522986149415374, "learning_rate": 0.001, "loss": 0.4141, "step": 9300 }, { "epoch": 0.2566350620996975, "grad_norm": 0.0028745641466230154, "learning_rate": 0.001, "loss": 0.3847, "step": 9301 }, { "epoch": 0.2566626543007619, "grad_norm": 0.0056059998460114, "learning_rate": 0.001, "loss": 0.4198, "step": 9302 }, { "epoch": 0.25669024650182626, "grad_norm": 0.002385946689173579, "learning_rate": 0.001, "loss": 0.4059, "step": 9303 }, { "epoch": 0.2567178387028906, "grad_norm": 0.0057910652831196785, "learning_rate": 0.001, "loss": 0.4043, "step": 9304 }, { "epoch": 0.256745430903955, "grad_norm": 0.003377356566488743, "learning_rate": 0.001, "loss": 0.4097, "step": 9305 }, { "epoch": 0.25677302310501937, "grad_norm": 0.002434053458273411, "learning_rate": 0.001, "loss": 0.4205, "step": 9306 }, { "epoch": 0.2568006153060837, "grad_norm": 0.0029004281386733055, "learning_rate": 0.001, "loss": 0.3941, "step": 9307 }, { "epoch": 0.25682820750714813, "grad_norm": 0.0032230631913989782, "learning_rate": 0.001, "loss": 0.3965, "step": 9308 }, { "epoch": 0.2568557997082125, "grad_norm": 0.00245184195227921, "learning_rate": 0.001, "loss": 0.4297, "step": 9309 }, { "epoch": 0.25688339190927684, "grad_norm": 0.0030043041333556175, "learning_rate": 0.001, "loss": 0.3851, "step": 9310 }, { "epoch": 0.2569109841103412, "grad_norm": 0.007739334367215633, "learning_rate": 0.001, "loss": 0.3723, "step": 9311 }, { "epoch": 0.2569385763114056, "grad_norm": 0.002734127687290311, "learning_rate": 0.001, "loss": 0.4155, "step": 9312 }, { "epoch": 0.25696616851246995, "grad_norm": 0.004879895132035017, "learning_rate": 0.001, "loss": 0.3962, "step": 9313 }, { "epoch": 0.2569937607135343, "grad_norm": 0.002906453562900424, "learning_rate": 0.001, "loss": 0.4332, "step": 9314 }, { "epoch": 0.2570213529145987, "grad_norm": 0.004463196266442537, "learning_rate": 0.001, "loss": 0.3455, "step": 9315 }, { "epoch": 0.25704894511566306, "grad_norm": 0.004677020013332367, "learning_rate": 0.001, "loss": 0.3984, "step": 9316 }, { "epoch": 0.2570765373167274, "grad_norm": 0.006812858395278454, "learning_rate": 0.001, "loss": 0.415, "step": 9317 }, { "epoch": 0.25710412951779177, "grad_norm": 0.0026047630235552788, "learning_rate": 0.001, "loss": 0.4258, "step": 9318 }, { "epoch": 0.2571317217188562, "grad_norm": 0.004368063993752003, "learning_rate": 0.001, "loss": 0.3617, "step": 9319 }, { "epoch": 0.25715931391992053, "grad_norm": 0.0024550901725888252, "learning_rate": 0.001, "loss": 0.4326, "step": 9320 }, { "epoch": 0.2571869061209849, "grad_norm": 0.003168846946209669, "learning_rate": 0.001, "loss": 0.4061, "step": 9321 }, { "epoch": 0.2572144983220493, "grad_norm": 0.004993709735572338, "learning_rate": 0.001, "loss": 0.3897, "step": 9322 }, { "epoch": 0.25724209052311364, "grad_norm": 0.0027966087218374014, "learning_rate": 0.001, "loss": 0.3959, "step": 9323 }, { "epoch": 0.257269682724178, "grad_norm": 0.0036490256898105145, "learning_rate": 0.001, "loss": 0.3662, "step": 9324 }, { "epoch": 0.2572972749252424, "grad_norm": 0.006444889586418867, "learning_rate": 0.001, "loss": 0.3677, "step": 9325 }, { "epoch": 0.25732486712630676, "grad_norm": 0.005108532030135393, "learning_rate": 0.001, "loss": 0.4314, "step": 9326 }, { "epoch": 0.2573524593273711, "grad_norm": 0.004126682877540588, "learning_rate": 0.001, "loss": 0.3793, "step": 9327 }, { "epoch": 0.25738005152843546, "grad_norm": 0.002818217733874917, "learning_rate": 0.001, "loss": 0.4286, "step": 9328 }, { "epoch": 0.25740764372949987, "grad_norm": 0.0042424676939845085, "learning_rate": 0.001, "loss": 0.4339, "step": 9329 }, { "epoch": 0.2574352359305642, "grad_norm": 0.003638759721070528, "learning_rate": 0.001, "loss": 0.4323, "step": 9330 }, { "epoch": 0.2574628281316286, "grad_norm": 0.003619089489802718, "learning_rate": 0.001, "loss": 0.366, "step": 9331 }, { "epoch": 0.257490420332693, "grad_norm": 0.0029553293716162443, "learning_rate": 0.001, "loss": 0.3955, "step": 9332 }, { "epoch": 0.25751801253375733, "grad_norm": 0.003728683805093169, "learning_rate": 0.001, "loss": 0.4057, "step": 9333 }, { "epoch": 0.2575456047348217, "grad_norm": 0.006002207286655903, "learning_rate": 0.001, "loss": 0.3933, "step": 9334 }, { "epoch": 0.2575731969358861, "grad_norm": 0.005744462367147207, "learning_rate": 0.001, "loss": 0.3832, "step": 9335 }, { "epoch": 0.25760078913695045, "grad_norm": 0.006803566124290228, "learning_rate": 0.001, "loss": 0.4151, "step": 9336 }, { "epoch": 0.2576283813380148, "grad_norm": 0.0028065780643373728, "learning_rate": 0.001, "loss": 0.4086, "step": 9337 }, { "epoch": 0.25765597353907915, "grad_norm": 0.003158635227009654, "learning_rate": 0.001, "loss": 0.4321, "step": 9338 }, { "epoch": 0.25768356574014356, "grad_norm": 0.0043748971074819565, "learning_rate": 0.001, "loss": 0.3857, "step": 9339 }, { "epoch": 0.2577111579412079, "grad_norm": 0.0038016666658222675, "learning_rate": 0.001, "loss": 0.4322, "step": 9340 }, { "epoch": 0.25773875014227227, "grad_norm": 0.005203484557569027, "learning_rate": 0.001, "loss": 0.3926, "step": 9341 }, { "epoch": 0.2577663423433367, "grad_norm": 0.0031318182591348886, "learning_rate": 0.001, "loss": 0.3828, "step": 9342 }, { "epoch": 0.257793934544401, "grad_norm": 0.0021927165798842907, "learning_rate": 0.001, "loss": 0.3997, "step": 9343 }, { "epoch": 0.2578215267454654, "grad_norm": 0.008451412431895733, "learning_rate": 0.001, "loss": 0.3882, "step": 9344 }, { "epoch": 0.2578491189465298, "grad_norm": 0.012602372094988823, "learning_rate": 0.001, "loss": 0.3981, "step": 9345 }, { "epoch": 0.25787671114759414, "grad_norm": 0.004658313933759928, "learning_rate": 0.001, "loss": 0.3969, "step": 9346 }, { "epoch": 0.2579043033486585, "grad_norm": 0.029415573924779892, "learning_rate": 0.001, "loss": 0.3801, "step": 9347 }, { "epoch": 0.25793189554972284, "grad_norm": 0.005347141530364752, "learning_rate": 0.001, "loss": 0.395, "step": 9348 }, { "epoch": 0.25795948775078725, "grad_norm": 0.008220274932682514, "learning_rate": 0.001, "loss": 0.3721, "step": 9349 }, { "epoch": 0.2579870799518516, "grad_norm": 0.0034864528570324183, "learning_rate": 0.001, "loss": 0.396, "step": 9350 }, { "epoch": 0.25801467215291596, "grad_norm": 0.0023793810978531837, "learning_rate": 0.001, "loss": 0.4201, "step": 9351 }, { "epoch": 0.25804226435398037, "grad_norm": 0.00437847338616848, "learning_rate": 0.001, "loss": 0.4082, "step": 9352 }, { "epoch": 0.2580698565550447, "grad_norm": 0.011434728279709816, "learning_rate": 0.001, "loss": 0.4019, "step": 9353 }, { "epoch": 0.25809744875610907, "grad_norm": 0.002974115777760744, "learning_rate": 0.001, "loss": 0.3866, "step": 9354 }, { "epoch": 0.2581250409571735, "grad_norm": 0.0029822078067809343, "learning_rate": 0.001, "loss": 0.3945, "step": 9355 }, { "epoch": 0.25815263315823783, "grad_norm": 0.004817003384232521, "learning_rate": 0.001, "loss": 0.413, "step": 9356 }, { "epoch": 0.2581802253593022, "grad_norm": 0.005547203589230776, "learning_rate": 0.001, "loss": 0.3835, "step": 9357 }, { "epoch": 0.25820781756036654, "grad_norm": 0.002377490047365427, "learning_rate": 0.001, "loss": 0.3958, "step": 9358 }, { "epoch": 0.25823540976143095, "grad_norm": 0.0028770265635102987, "learning_rate": 0.001, "loss": 0.376, "step": 9359 }, { "epoch": 0.2582630019624953, "grad_norm": 0.003406745847314596, "learning_rate": 0.001, "loss": 0.3971, "step": 9360 }, { "epoch": 0.25829059416355965, "grad_norm": 0.002956562442705035, "learning_rate": 0.001, "loss": 0.3923, "step": 9361 }, { "epoch": 0.25831818636462406, "grad_norm": 0.0038173554930835962, "learning_rate": 0.001, "loss": 0.4468, "step": 9362 }, { "epoch": 0.2583457785656884, "grad_norm": 0.003059924114495516, "learning_rate": 0.001, "loss": 0.3992, "step": 9363 }, { "epoch": 0.25837337076675276, "grad_norm": 0.002305653179064393, "learning_rate": 0.001, "loss": 0.4215, "step": 9364 }, { "epoch": 0.25840096296781717, "grad_norm": 0.00254283519461751, "learning_rate": 0.001, "loss": 0.4171, "step": 9365 }, { "epoch": 0.2584285551688815, "grad_norm": 0.00530956732109189, "learning_rate": 0.001, "loss": 0.3919, "step": 9366 }, { "epoch": 0.2584561473699459, "grad_norm": 0.0036723476368933916, "learning_rate": 0.001, "loss": 0.3638, "step": 9367 }, { "epoch": 0.25848373957101023, "grad_norm": 0.008182425051927567, "learning_rate": 0.001, "loss": 0.4096, "step": 9368 }, { "epoch": 0.25851133177207464, "grad_norm": 0.01312658004462719, "learning_rate": 0.001, "loss": 0.3924, "step": 9369 }, { "epoch": 0.258538923973139, "grad_norm": 0.0029170040506869555, "learning_rate": 0.001, "loss": 0.3817, "step": 9370 }, { "epoch": 0.25856651617420334, "grad_norm": 0.00417022779583931, "learning_rate": 0.001, "loss": 0.3712, "step": 9371 }, { "epoch": 0.25859410837526775, "grad_norm": 0.007455759681761265, "learning_rate": 0.001, "loss": 0.377, "step": 9372 }, { "epoch": 0.2586217005763321, "grad_norm": 0.0027683484368026257, "learning_rate": 0.001, "loss": 0.4119, "step": 9373 }, { "epoch": 0.25864929277739646, "grad_norm": 0.005918456707149744, "learning_rate": 0.001, "loss": 0.3668, "step": 9374 }, { "epoch": 0.25867688497846086, "grad_norm": 0.002821930916979909, "learning_rate": 0.001, "loss": 0.4186, "step": 9375 }, { "epoch": 0.2587044771795252, "grad_norm": 0.003820334793999791, "learning_rate": 0.001, "loss": 0.4133, "step": 9376 }, { "epoch": 0.25873206938058957, "grad_norm": 0.003256513038650155, "learning_rate": 0.001, "loss": 0.3759, "step": 9377 }, { "epoch": 0.2587596615816539, "grad_norm": 0.010328397154808044, "learning_rate": 0.001, "loss": 0.3832, "step": 9378 }, { "epoch": 0.25878725378271833, "grad_norm": 0.002478259615600109, "learning_rate": 0.001, "loss": 0.4065, "step": 9379 }, { "epoch": 0.2588148459837827, "grad_norm": 0.0034857571590691805, "learning_rate": 0.001, "loss": 0.3946, "step": 9380 }, { "epoch": 0.25884243818484703, "grad_norm": 0.00216446490958333, "learning_rate": 0.001, "loss": 0.4009, "step": 9381 }, { "epoch": 0.25887003038591144, "grad_norm": 0.0028666919097304344, "learning_rate": 0.001, "loss": 0.3897, "step": 9382 }, { "epoch": 0.2588976225869758, "grad_norm": 0.002316189929842949, "learning_rate": 0.001, "loss": 0.3896, "step": 9383 }, { "epoch": 0.25892521478804015, "grad_norm": 0.0033454406075179577, "learning_rate": 0.001, "loss": 0.3701, "step": 9384 }, { "epoch": 0.25895280698910456, "grad_norm": 0.0029785428196191788, "learning_rate": 0.001, "loss": 0.379, "step": 9385 }, { "epoch": 0.2589803991901689, "grad_norm": 0.0043611531145870686, "learning_rate": 0.001, "loss": 0.4064, "step": 9386 }, { "epoch": 0.25900799139123326, "grad_norm": 0.004450157284736633, "learning_rate": 0.001, "loss": 0.4026, "step": 9387 }, { "epoch": 0.2590355835922976, "grad_norm": 0.003939083311706781, "learning_rate": 0.001, "loss": 0.3898, "step": 9388 }, { "epoch": 0.259063175793362, "grad_norm": 0.0035476302728056908, "learning_rate": 0.001, "loss": 0.3751, "step": 9389 }, { "epoch": 0.2590907679944264, "grad_norm": 0.005521733313798904, "learning_rate": 0.001, "loss": 0.4249, "step": 9390 }, { "epoch": 0.2591183601954907, "grad_norm": 0.0055731479078531265, "learning_rate": 0.001, "loss": 0.3996, "step": 9391 }, { "epoch": 0.25914595239655513, "grad_norm": 0.003587324172258377, "learning_rate": 0.001, "loss": 0.374, "step": 9392 }, { "epoch": 0.2591735445976195, "grad_norm": 0.018497616052627563, "learning_rate": 0.001, "loss": 0.3912, "step": 9393 }, { "epoch": 0.25920113679868384, "grad_norm": 0.005223001353442669, "learning_rate": 0.001, "loss": 0.4013, "step": 9394 }, { "epoch": 0.25922872899974825, "grad_norm": 0.0024736267514526844, "learning_rate": 0.001, "loss": 0.3715, "step": 9395 }, { "epoch": 0.2592563212008126, "grad_norm": 0.0023903397377580404, "learning_rate": 0.001, "loss": 0.4097, "step": 9396 }, { "epoch": 0.25928391340187695, "grad_norm": 0.0038790139369666576, "learning_rate": 0.001, "loss": 0.3862, "step": 9397 }, { "epoch": 0.2593115056029413, "grad_norm": 0.0027185981161892414, "learning_rate": 0.001, "loss": 0.3727, "step": 9398 }, { "epoch": 0.2593390978040057, "grad_norm": 0.007733003702014685, "learning_rate": 0.001, "loss": 0.368, "step": 9399 }, { "epoch": 0.25936669000507007, "grad_norm": 0.003809612710028887, "learning_rate": 0.001, "loss": 0.3936, "step": 9400 }, { "epoch": 0.2593942822061344, "grad_norm": 0.004529821220785379, "learning_rate": 0.001, "loss": 0.4428, "step": 9401 }, { "epoch": 0.2594218744071988, "grad_norm": 0.0025704330764710903, "learning_rate": 0.001, "loss": 0.4154, "step": 9402 }, { "epoch": 0.2594494666082632, "grad_norm": 0.0027423694264143705, "learning_rate": 0.001, "loss": 0.4307, "step": 9403 }, { "epoch": 0.25947705880932753, "grad_norm": 0.0036277722101658583, "learning_rate": 0.001, "loss": 0.3904, "step": 9404 }, { "epoch": 0.25950465101039194, "grad_norm": 0.003103514201939106, "learning_rate": 0.001, "loss": 0.4137, "step": 9405 }, { "epoch": 0.2595322432114563, "grad_norm": 0.005228511989116669, "learning_rate": 0.001, "loss": 0.3989, "step": 9406 }, { "epoch": 0.25955983541252065, "grad_norm": 0.0027565350756049156, "learning_rate": 0.001, "loss": 0.4142, "step": 9407 }, { "epoch": 0.259587427613585, "grad_norm": 0.005467361304908991, "learning_rate": 0.001, "loss": 0.38, "step": 9408 }, { "epoch": 0.2596150198146494, "grad_norm": 0.002642360283061862, "learning_rate": 0.001, "loss": 0.3789, "step": 9409 }, { "epoch": 0.25964261201571376, "grad_norm": 0.00389938335865736, "learning_rate": 0.001, "loss": 0.416, "step": 9410 }, { "epoch": 0.2596702042167781, "grad_norm": 0.0047807167284190655, "learning_rate": 0.001, "loss": 0.41, "step": 9411 }, { "epoch": 0.2596977964178425, "grad_norm": 0.017033278942108154, "learning_rate": 0.001, "loss": 0.4064, "step": 9412 }, { "epoch": 0.25972538861890687, "grad_norm": 0.014154274947941303, "learning_rate": 0.001, "loss": 0.4051, "step": 9413 }, { "epoch": 0.2597529808199712, "grad_norm": 0.009238173253834248, "learning_rate": 0.001, "loss": 0.3483, "step": 9414 }, { "epoch": 0.2597805730210356, "grad_norm": 0.008399531245231628, "learning_rate": 0.001, "loss": 0.4039, "step": 9415 }, { "epoch": 0.2598081652221, "grad_norm": 0.002651994815096259, "learning_rate": 0.001, "loss": 0.389, "step": 9416 }, { "epoch": 0.25983575742316434, "grad_norm": 0.003266576211899519, "learning_rate": 0.001, "loss": 0.3792, "step": 9417 }, { "epoch": 0.2598633496242287, "grad_norm": 0.0043223886750638485, "learning_rate": 0.001, "loss": 0.3893, "step": 9418 }, { "epoch": 0.2598909418252931, "grad_norm": 0.004282352980226278, "learning_rate": 0.001, "loss": 0.382, "step": 9419 }, { "epoch": 0.25991853402635745, "grad_norm": 0.006613946054130793, "learning_rate": 0.001, "loss": 0.3995, "step": 9420 }, { "epoch": 0.2599461262274218, "grad_norm": 0.0021815176587551832, "learning_rate": 0.001, "loss": 0.415, "step": 9421 }, { "epoch": 0.2599737184284862, "grad_norm": 0.0028681547846645117, "learning_rate": 0.001, "loss": 0.4084, "step": 9422 }, { "epoch": 0.26000131062955056, "grad_norm": 0.0035515769850462675, "learning_rate": 0.001, "loss": 0.3948, "step": 9423 }, { "epoch": 0.2600289028306149, "grad_norm": 0.004106239881366491, "learning_rate": 0.001, "loss": 0.3822, "step": 9424 }, { "epoch": 0.26005649503167927, "grad_norm": 0.005105991847813129, "learning_rate": 0.001, "loss": 0.3999, "step": 9425 }, { "epoch": 0.2600840872327437, "grad_norm": 0.004727422725409269, "learning_rate": 0.001, "loss": 0.391, "step": 9426 }, { "epoch": 0.26011167943380803, "grad_norm": 0.005350995343178511, "learning_rate": 0.001, "loss": 0.4007, "step": 9427 }, { "epoch": 0.2601392716348724, "grad_norm": 0.005172072909772396, "learning_rate": 0.001, "loss": 0.3803, "step": 9428 }, { "epoch": 0.2601668638359368, "grad_norm": 0.004639607388526201, "learning_rate": 0.001, "loss": 0.4244, "step": 9429 }, { "epoch": 0.26019445603700114, "grad_norm": 0.004586922936141491, "learning_rate": 0.001, "loss": 0.3792, "step": 9430 }, { "epoch": 0.2602220482380655, "grad_norm": 0.004610531497746706, "learning_rate": 0.001, "loss": 0.3964, "step": 9431 }, { "epoch": 0.2602496404391299, "grad_norm": 0.0026218774728477, "learning_rate": 0.001, "loss": 0.4178, "step": 9432 }, { "epoch": 0.26027723264019426, "grad_norm": 0.006149280350655317, "learning_rate": 0.001, "loss": 0.3892, "step": 9433 }, { "epoch": 0.2603048248412586, "grad_norm": 0.0048711239360272884, "learning_rate": 0.001, "loss": 0.4503, "step": 9434 }, { "epoch": 0.26033241704232296, "grad_norm": 0.03840470314025879, "learning_rate": 0.001, "loss": 0.4005, "step": 9435 }, { "epoch": 0.26036000924338737, "grad_norm": 0.0037160839419811964, "learning_rate": 0.001, "loss": 0.3907, "step": 9436 }, { "epoch": 0.2603876014444517, "grad_norm": 0.0032652821391820908, "learning_rate": 0.001, "loss": 0.4121, "step": 9437 }, { "epoch": 0.2604151936455161, "grad_norm": 0.007966517470777035, "learning_rate": 0.001, "loss": 0.375, "step": 9438 }, { "epoch": 0.2604427858465805, "grad_norm": 0.006473064888268709, "learning_rate": 0.001, "loss": 0.4054, "step": 9439 }, { "epoch": 0.26047037804764483, "grad_norm": 0.005094799678772688, "learning_rate": 0.001, "loss": 0.3895, "step": 9440 }, { "epoch": 0.2604979702487092, "grad_norm": 0.049805864691734314, "learning_rate": 0.001, "loss": 0.3923, "step": 9441 }, { "epoch": 0.2605255624497736, "grad_norm": 0.0036678947508335114, "learning_rate": 0.001, "loss": 0.4396, "step": 9442 }, { "epoch": 0.26055315465083795, "grad_norm": 0.004897846840322018, "learning_rate": 0.001, "loss": 0.366, "step": 9443 }, { "epoch": 0.2605807468519023, "grad_norm": 0.00426288740709424, "learning_rate": 0.001, "loss": 0.3929, "step": 9444 }, { "epoch": 0.26060833905296665, "grad_norm": 0.009089482948184013, "learning_rate": 0.001, "loss": 0.3799, "step": 9445 }, { "epoch": 0.26063593125403106, "grad_norm": 0.00743220467120409, "learning_rate": 0.001, "loss": 0.3701, "step": 9446 }, { "epoch": 0.2606635234550954, "grad_norm": 0.004399383440613747, "learning_rate": 0.001, "loss": 0.4017, "step": 9447 }, { "epoch": 0.26069111565615977, "grad_norm": 0.0023268854711204767, "learning_rate": 0.001, "loss": 0.3957, "step": 9448 }, { "epoch": 0.2607187078572242, "grad_norm": 0.003956654574722052, "learning_rate": 0.001, "loss": 0.3912, "step": 9449 }, { "epoch": 0.2607463000582885, "grad_norm": 0.00389308063313365, "learning_rate": 0.001, "loss": 0.3898, "step": 9450 }, { "epoch": 0.2607738922593529, "grad_norm": 0.0036460221745073795, "learning_rate": 0.001, "loss": 0.3999, "step": 9451 }, { "epoch": 0.2608014844604173, "grad_norm": 0.003133069258183241, "learning_rate": 0.001, "loss": 0.3932, "step": 9452 }, { "epoch": 0.26082907666148164, "grad_norm": 0.0027508933562785387, "learning_rate": 0.001, "loss": 0.3912, "step": 9453 }, { "epoch": 0.260856668862546, "grad_norm": 0.003370395628735423, "learning_rate": 0.001, "loss": 0.4176, "step": 9454 }, { "epoch": 0.26088426106361035, "grad_norm": 0.0036432910710573196, "learning_rate": 0.001, "loss": 0.4114, "step": 9455 }, { "epoch": 0.26091185326467475, "grad_norm": 0.004142069257795811, "learning_rate": 0.001, "loss": 0.3825, "step": 9456 }, { "epoch": 0.2609394454657391, "grad_norm": 0.003075930755585432, "learning_rate": 0.001, "loss": 0.3992, "step": 9457 }, { "epoch": 0.26096703766680346, "grad_norm": 0.002679844619706273, "learning_rate": 0.001, "loss": 0.371, "step": 9458 }, { "epoch": 0.26099462986786787, "grad_norm": 0.0034837801940739155, "learning_rate": 0.001, "loss": 0.3892, "step": 9459 }, { "epoch": 0.2610222220689322, "grad_norm": 0.002534442814067006, "learning_rate": 0.001, "loss": 0.408, "step": 9460 }, { "epoch": 0.26104981426999657, "grad_norm": 0.005891683977097273, "learning_rate": 0.001, "loss": 0.3751, "step": 9461 }, { "epoch": 0.261077406471061, "grad_norm": 0.005468081682920456, "learning_rate": 0.001, "loss": 0.353, "step": 9462 }, { "epoch": 0.26110499867212533, "grad_norm": 0.002956255106255412, "learning_rate": 0.001, "loss": 0.4211, "step": 9463 }, { "epoch": 0.2611325908731897, "grad_norm": 0.003849986707791686, "learning_rate": 0.001, "loss": 0.3962, "step": 9464 }, { "epoch": 0.26116018307425404, "grad_norm": 0.002705160528421402, "learning_rate": 0.001, "loss": 0.4156, "step": 9465 }, { "epoch": 0.26118777527531845, "grad_norm": 0.002549894852563739, "learning_rate": 0.001, "loss": 0.4244, "step": 9466 }, { "epoch": 0.2612153674763828, "grad_norm": 0.003938885871320963, "learning_rate": 0.001, "loss": 0.3799, "step": 9467 }, { "epoch": 0.26124295967744715, "grad_norm": 0.002657047938555479, "learning_rate": 0.001, "loss": 0.3941, "step": 9468 }, { "epoch": 0.26127055187851156, "grad_norm": 0.004167834762483835, "learning_rate": 0.001, "loss": 0.3993, "step": 9469 }, { "epoch": 0.2612981440795759, "grad_norm": 0.0034376648254692554, "learning_rate": 0.001, "loss": 0.3597, "step": 9470 }, { "epoch": 0.26132573628064026, "grad_norm": 0.0034596873447299004, "learning_rate": 0.001, "loss": 0.4028, "step": 9471 }, { "epoch": 0.26135332848170467, "grad_norm": 0.002702021738514304, "learning_rate": 0.001, "loss": 0.3876, "step": 9472 }, { "epoch": 0.261380920682769, "grad_norm": 0.0027853166684508324, "learning_rate": 0.001, "loss": 0.4026, "step": 9473 }, { "epoch": 0.2614085128838334, "grad_norm": 0.0032341419719159603, "learning_rate": 0.001, "loss": 0.4335, "step": 9474 }, { "epoch": 0.26143610508489773, "grad_norm": 0.0034284372813999653, "learning_rate": 0.001, "loss": 0.3815, "step": 9475 }, { "epoch": 0.26146369728596214, "grad_norm": 0.003640861948952079, "learning_rate": 0.001, "loss": 0.4027, "step": 9476 }, { "epoch": 0.2614912894870265, "grad_norm": 0.002866639755666256, "learning_rate": 0.001, "loss": 0.3967, "step": 9477 }, { "epoch": 0.26151888168809084, "grad_norm": 0.0023725773207843304, "learning_rate": 0.001, "loss": 0.3974, "step": 9478 }, { "epoch": 0.26154647388915525, "grad_norm": 0.0032590448390692472, "learning_rate": 0.001, "loss": 0.3543, "step": 9479 }, { "epoch": 0.2615740660902196, "grad_norm": 0.004378579091280699, "learning_rate": 0.001, "loss": 0.3743, "step": 9480 }, { "epoch": 0.26160165829128396, "grad_norm": 0.0037949122488498688, "learning_rate": 0.001, "loss": 0.4333, "step": 9481 }, { "epoch": 0.26162925049234836, "grad_norm": 0.002791197504848242, "learning_rate": 0.001, "loss": 0.3499, "step": 9482 }, { "epoch": 0.2616568426934127, "grad_norm": 0.0038585460279136896, "learning_rate": 0.001, "loss": 0.3701, "step": 9483 }, { "epoch": 0.26168443489447707, "grad_norm": 0.002629459137097001, "learning_rate": 0.001, "loss": 0.423, "step": 9484 }, { "epoch": 0.2617120270955414, "grad_norm": 0.0032473283354192972, "learning_rate": 0.001, "loss": 0.3866, "step": 9485 }, { "epoch": 0.26173961929660583, "grad_norm": 0.0029964474961161613, "learning_rate": 0.001, "loss": 0.3792, "step": 9486 }, { "epoch": 0.2617672114976702, "grad_norm": 0.0031551928259432316, "learning_rate": 0.001, "loss": 0.4245, "step": 9487 }, { "epoch": 0.26179480369873454, "grad_norm": 0.00244794599711895, "learning_rate": 0.001, "loss": 0.3962, "step": 9488 }, { "epoch": 0.26182239589979894, "grad_norm": 0.003302481258288026, "learning_rate": 0.001, "loss": 0.4174, "step": 9489 }, { "epoch": 0.2618499881008633, "grad_norm": 0.0027688101399689913, "learning_rate": 0.001, "loss": 0.4026, "step": 9490 }, { "epoch": 0.26187758030192765, "grad_norm": 0.0031254065688699484, "learning_rate": 0.001, "loss": 0.3972, "step": 9491 }, { "epoch": 0.26190517250299206, "grad_norm": 0.00236527225933969, "learning_rate": 0.001, "loss": 0.4213, "step": 9492 }, { "epoch": 0.2619327647040564, "grad_norm": 0.0028193267062306404, "learning_rate": 0.001, "loss": 0.3627, "step": 9493 }, { "epoch": 0.26196035690512076, "grad_norm": 0.002419235184788704, "learning_rate": 0.001, "loss": 0.4039, "step": 9494 }, { "epoch": 0.2619879491061851, "grad_norm": 0.006053110118955374, "learning_rate": 0.001, "loss": 0.4159, "step": 9495 }, { "epoch": 0.2620155413072495, "grad_norm": 0.004557557869702578, "learning_rate": 0.001, "loss": 0.3878, "step": 9496 }, { "epoch": 0.2620431335083139, "grad_norm": 0.002896302379667759, "learning_rate": 0.001, "loss": 0.3909, "step": 9497 }, { "epoch": 0.2620707257093782, "grad_norm": 0.002858472056686878, "learning_rate": 0.001, "loss": 0.3965, "step": 9498 }, { "epoch": 0.26209831791044264, "grad_norm": 0.004048534668982029, "learning_rate": 0.001, "loss": 0.3825, "step": 9499 }, { "epoch": 0.262125910111507, "grad_norm": 0.003960879985243082, "learning_rate": 0.001, "loss": 0.4, "step": 9500 }, { "epoch": 0.262125910111507, "eval_runtime": 25.194, "eval_samples_per_second": 1.27, "eval_steps_per_second": 0.159, "step": 9500 }, { "epoch": 0.26215350231257134, "grad_norm": 0.0026859226636588573, "learning_rate": 0.001, "loss": 0.3963, "step": 9501 }, { "epoch": 0.26218109451363575, "grad_norm": 0.004340642131865025, "learning_rate": 0.001, "loss": 0.3599, "step": 9502 }, { "epoch": 0.2622086867147001, "grad_norm": 0.003308952320367098, "learning_rate": 0.001, "loss": 0.3751, "step": 9503 }, { "epoch": 0.26223627891576445, "grad_norm": 0.003607766004279256, "learning_rate": 0.001, "loss": 0.4285, "step": 9504 }, { "epoch": 0.2622638711168288, "grad_norm": 0.004522815812379122, "learning_rate": 0.001, "loss": 0.3602, "step": 9505 }, { "epoch": 0.2622914633178932, "grad_norm": 0.004235134460031986, "learning_rate": 0.001, "loss": 0.3815, "step": 9506 }, { "epoch": 0.26231905551895757, "grad_norm": 0.010735023766756058, "learning_rate": 0.001, "loss": 0.4159, "step": 9507 }, { "epoch": 0.2623466477200219, "grad_norm": 0.004076390992850065, "learning_rate": 0.001, "loss": 0.4218, "step": 9508 }, { "epoch": 0.2623742399210863, "grad_norm": 0.007312237750738859, "learning_rate": 0.001, "loss": 0.4081, "step": 9509 }, { "epoch": 0.2624018321221507, "grad_norm": 0.007191124372184277, "learning_rate": 0.001, "loss": 0.38, "step": 9510 }, { "epoch": 0.26242942432321503, "grad_norm": 0.0035255979746580124, "learning_rate": 0.001, "loss": 0.4116, "step": 9511 }, { "epoch": 0.2624570165242794, "grad_norm": 0.004981547594070435, "learning_rate": 0.001, "loss": 0.4575, "step": 9512 }, { "epoch": 0.2624846087253438, "grad_norm": 0.006985159125179052, "learning_rate": 0.001, "loss": 0.3878, "step": 9513 }, { "epoch": 0.26251220092640815, "grad_norm": 0.005956006236374378, "learning_rate": 0.001, "loss": 0.3624, "step": 9514 }, { "epoch": 0.2625397931274725, "grad_norm": 0.005447957664728165, "learning_rate": 0.001, "loss": 0.4185, "step": 9515 }, { "epoch": 0.2625673853285369, "grad_norm": 0.007735871244221926, "learning_rate": 0.001, "loss": 0.4083, "step": 9516 }, { "epoch": 0.26259497752960126, "grad_norm": 0.004465447273105383, "learning_rate": 0.001, "loss": 0.4081, "step": 9517 }, { "epoch": 0.2626225697306656, "grad_norm": 0.0033752690069377422, "learning_rate": 0.001, "loss": 0.3773, "step": 9518 }, { "epoch": 0.26265016193173, "grad_norm": 0.006316347047686577, "learning_rate": 0.001, "loss": 0.4389, "step": 9519 }, { "epoch": 0.26267775413279437, "grad_norm": 0.019447481259703636, "learning_rate": 0.001, "loss": 0.3625, "step": 9520 }, { "epoch": 0.2627053463338587, "grad_norm": 0.0039499313570559025, "learning_rate": 0.001, "loss": 0.42, "step": 9521 }, { "epoch": 0.2627329385349231, "grad_norm": 0.005296787712723017, "learning_rate": 0.001, "loss": 0.397, "step": 9522 }, { "epoch": 0.2627605307359875, "grad_norm": 0.0028091988060623407, "learning_rate": 0.001, "loss": 0.3897, "step": 9523 }, { "epoch": 0.26278812293705184, "grad_norm": 0.004335075616836548, "learning_rate": 0.001, "loss": 0.3796, "step": 9524 }, { "epoch": 0.2628157151381162, "grad_norm": 0.0030884994193911552, "learning_rate": 0.001, "loss": 0.415, "step": 9525 }, { "epoch": 0.2628433073391806, "grad_norm": 0.004353479482233524, "learning_rate": 0.001, "loss": 0.4152, "step": 9526 }, { "epoch": 0.26287089954024495, "grad_norm": 0.004048333968967199, "learning_rate": 0.001, "loss": 0.4178, "step": 9527 }, { "epoch": 0.2628984917413093, "grad_norm": 0.0034877778962254524, "learning_rate": 0.001, "loss": 0.3853, "step": 9528 }, { "epoch": 0.2629260839423737, "grad_norm": 0.0037483058404177427, "learning_rate": 0.001, "loss": 0.399, "step": 9529 }, { "epoch": 0.26295367614343806, "grad_norm": 0.005867067724466324, "learning_rate": 0.001, "loss": 0.3985, "step": 9530 }, { "epoch": 0.2629812683445024, "grad_norm": 0.004038537386804819, "learning_rate": 0.001, "loss": 0.3983, "step": 9531 }, { "epoch": 0.26300886054556677, "grad_norm": 0.005649103317409754, "learning_rate": 0.001, "loss": 0.4183, "step": 9532 }, { "epoch": 0.2630364527466312, "grad_norm": 0.0033814937341958284, "learning_rate": 0.001, "loss": 0.3688, "step": 9533 }, { "epoch": 0.26306404494769553, "grad_norm": 0.0035686511546373367, "learning_rate": 0.001, "loss": 0.3774, "step": 9534 }, { "epoch": 0.2630916371487599, "grad_norm": 0.004245868884027004, "learning_rate": 0.001, "loss": 0.3896, "step": 9535 }, { "epoch": 0.2631192293498243, "grad_norm": 0.006375094410032034, "learning_rate": 0.001, "loss": 0.3639, "step": 9536 }, { "epoch": 0.26314682155088864, "grad_norm": 0.008856172673404217, "learning_rate": 0.001, "loss": 0.3961, "step": 9537 }, { "epoch": 0.263174413751953, "grad_norm": 0.005304283462464809, "learning_rate": 0.001, "loss": 0.3915, "step": 9538 }, { "epoch": 0.2632020059530174, "grad_norm": 0.003037388902157545, "learning_rate": 0.001, "loss": 0.4013, "step": 9539 }, { "epoch": 0.26322959815408176, "grad_norm": 0.00462722172960639, "learning_rate": 0.001, "loss": 0.3675, "step": 9540 }, { "epoch": 0.2632571903551461, "grad_norm": 0.009490979835391045, "learning_rate": 0.001, "loss": 0.3635, "step": 9541 }, { "epoch": 0.26328478255621046, "grad_norm": 0.0032088488806039095, "learning_rate": 0.001, "loss": 0.3793, "step": 9542 }, { "epoch": 0.26331237475727487, "grad_norm": 0.006467896047979593, "learning_rate": 0.001, "loss": 0.3777, "step": 9543 }, { "epoch": 0.2633399669583392, "grad_norm": 0.005221541505306959, "learning_rate": 0.001, "loss": 0.395, "step": 9544 }, { "epoch": 0.2633675591594036, "grad_norm": 0.0027723468374460936, "learning_rate": 0.001, "loss": 0.3899, "step": 9545 }, { "epoch": 0.263395151360468, "grad_norm": 0.0038048315327614546, "learning_rate": 0.001, "loss": 0.4034, "step": 9546 }, { "epoch": 0.26342274356153234, "grad_norm": 0.004077599383890629, "learning_rate": 0.001, "loss": 0.438, "step": 9547 }, { "epoch": 0.2634503357625967, "grad_norm": 0.0045372615568339825, "learning_rate": 0.001, "loss": 0.3785, "step": 9548 }, { "epoch": 0.2634779279636611, "grad_norm": 0.0027856752276420593, "learning_rate": 0.001, "loss": 0.4039, "step": 9549 }, { "epoch": 0.26350552016472545, "grad_norm": 0.0026442657690495253, "learning_rate": 0.001, "loss": 0.4157, "step": 9550 }, { "epoch": 0.2635331123657898, "grad_norm": 0.003073024796321988, "learning_rate": 0.001, "loss": 0.4273, "step": 9551 }, { "epoch": 0.26356070456685415, "grad_norm": 0.0030135842971503735, "learning_rate": 0.001, "loss": 0.4209, "step": 9552 }, { "epoch": 0.26358829676791856, "grad_norm": 0.004977943375706673, "learning_rate": 0.001, "loss": 0.4246, "step": 9553 }, { "epoch": 0.2636158889689829, "grad_norm": 0.0030254484154284, "learning_rate": 0.001, "loss": 0.3803, "step": 9554 }, { "epoch": 0.26364348117004727, "grad_norm": 0.003810320282354951, "learning_rate": 0.001, "loss": 0.3984, "step": 9555 }, { "epoch": 0.2636710733711117, "grad_norm": 0.002874776953831315, "learning_rate": 0.001, "loss": 0.4319, "step": 9556 }, { "epoch": 0.263698665572176, "grad_norm": 0.00934076588600874, "learning_rate": 0.001, "loss": 0.4137, "step": 9557 }, { "epoch": 0.2637262577732404, "grad_norm": 0.0022208630107343197, "learning_rate": 0.001, "loss": 0.4178, "step": 9558 }, { "epoch": 0.2637538499743048, "grad_norm": 0.002492060884833336, "learning_rate": 0.001, "loss": 0.4334, "step": 9559 }, { "epoch": 0.26378144217536914, "grad_norm": 0.005849645473062992, "learning_rate": 0.001, "loss": 0.3854, "step": 9560 }, { "epoch": 0.2638090343764335, "grad_norm": 0.004015061073005199, "learning_rate": 0.001, "loss": 0.3674, "step": 9561 }, { "epoch": 0.26383662657749785, "grad_norm": 0.0033112233504652977, "learning_rate": 0.001, "loss": 0.3804, "step": 9562 }, { "epoch": 0.26386421877856225, "grad_norm": 0.002749896375462413, "learning_rate": 0.001, "loss": 0.4021, "step": 9563 }, { "epoch": 0.2638918109796266, "grad_norm": 0.0027315288316458464, "learning_rate": 0.001, "loss": 0.3964, "step": 9564 }, { "epoch": 0.26391940318069096, "grad_norm": 0.008834882639348507, "learning_rate": 0.001, "loss": 0.3689, "step": 9565 }, { "epoch": 0.26394699538175537, "grad_norm": 0.01456514373421669, "learning_rate": 0.001, "loss": 0.3782, "step": 9566 }, { "epoch": 0.2639745875828197, "grad_norm": 0.0028064576908946037, "learning_rate": 0.001, "loss": 0.4158, "step": 9567 }, { "epoch": 0.2640021797838841, "grad_norm": 0.00802356656640768, "learning_rate": 0.001, "loss": 0.415, "step": 9568 }, { "epoch": 0.2640297719849485, "grad_norm": 0.0031302161514759064, "learning_rate": 0.001, "loss": 0.4031, "step": 9569 }, { "epoch": 0.26405736418601283, "grad_norm": 0.0035431873984634876, "learning_rate": 0.001, "loss": 0.3863, "step": 9570 }, { "epoch": 0.2640849563870772, "grad_norm": 0.002664680825546384, "learning_rate": 0.001, "loss": 0.4043, "step": 9571 }, { "epoch": 0.26411254858814154, "grad_norm": 0.006673896219581366, "learning_rate": 0.001, "loss": 0.3966, "step": 9572 }, { "epoch": 0.26414014078920595, "grad_norm": 0.003176551777869463, "learning_rate": 0.001, "loss": 0.3871, "step": 9573 }, { "epoch": 0.2641677329902703, "grad_norm": 0.01657606102526188, "learning_rate": 0.001, "loss": 0.4126, "step": 9574 }, { "epoch": 0.26419532519133465, "grad_norm": 0.0069385310634970665, "learning_rate": 0.001, "loss": 0.4318, "step": 9575 }, { "epoch": 0.26422291739239906, "grad_norm": 0.009939515963196754, "learning_rate": 0.001, "loss": 0.4081, "step": 9576 }, { "epoch": 0.2642505095934634, "grad_norm": 0.002801012946292758, "learning_rate": 0.001, "loss": 0.4001, "step": 9577 }, { "epoch": 0.26427810179452776, "grad_norm": 0.003218452911823988, "learning_rate": 0.001, "loss": 0.3829, "step": 9578 }, { "epoch": 0.2643056939955922, "grad_norm": 0.0029445989057421684, "learning_rate": 0.001, "loss": 0.4012, "step": 9579 }, { "epoch": 0.2643332861966565, "grad_norm": 0.003089828649535775, "learning_rate": 0.001, "loss": 0.4091, "step": 9580 }, { "epoch": 0.2643608783977209, "grad_norm": 0.0025015585124492645, "learning_rate": 0.001, "loss": 0.4079, "step": 9581 }, { "epoch": 0.26438847059878523, "grad_norm": 0.0021664395462721586, "learning_rate": 0.001, "loss": 0.4003, "step": 9582 }, { "epoch": 0.26441606279984964, "grad_norm": 0.003756561316549778, "learning_rate": 0.001, "loss": 0.388, "step": 9583 }, { "epoch": 0.264443655000914, "grad_norm": 0.003459861734881997, "learning_rate": 0.001, "loss": 0.41, "step": 9584 }, { "epoch": 0.26447124720197834, "grad_norm": 0.0028741243295371532, "learning_rate": 0.001, "loss": 0.4028, "step": 9585 }, { "epoch": 0.26449883940304275, "grad_norm": 0.00595883559435606, "learning_rate": 0.001, "loss": 0.4392, "step": 9586 }, { "epoch": 0.2645264316041071, "grad_norm": 0.011750889010727406, "learning_rate": 0.001, "loss": 0.402, "step": 9587 }, { "epoch": 0.26455402380517146, "grad_norm": 0.005314968526363373, "learning_rate": 0.001, "loss": 0.354, "step": 9588 }, { "epoch": 0.26458161600623586, "grad_norm": 0.007636393420398235, "learning_rate": 0.001, "loss": 0.4063, "step": 9589 }, { "epoch": 0.2646092082073002, "grad_norm": 0.003898282302543521, "learning_rate": 0.001, "loss": 0.3966, "step": 9590 }, { "epoch": 0.26463680040836457, "grad_norm": 0.0038661775179207325, "learning_rate": 0.001, "loss": 0.4047, "step": 9591 }, { "epoch": 0.2646643926094289, "grad_norm": 0.006659380160272121, "learning_rate": 0.001, "loss": 0.3853, "step": 9592 }, { "epoch": 0.26469198481049333, "grad_norm": 0.0044357809238135815, "learning_rate": 0.001, "loss": 0.3755, "step": 9593 }, { "epoch": 0.2647195770115577, "grad_norm": 0.0028593826573342085, "learning_rate": 0.001, "loss": 0.3733, "step": 9594 }, { "epoch": 0.26474716921262204, "grad_norm": 0.003539180150255561, "learning_rate": 0.001, "loss": 0.3903, "step": 9595 }, { "epoch": 0.26477476141368644, "grad_norm": 0.002942041726782918, "learning_rate": 0.001, "loss": 0.3914, "step": 9596 }, { "epoch": 0.2648023536147508, "grad_norm": 0.0028285589069128036, "learning_rate": 0.001, "loss": 0.3797, "step": 9597 }, { "epoch": 0.26482994581581515, "grad_norm": 0.0025839281734079123, "learning_rate": 0.001, "loss": 0.4079, "step": 9598 }, { "epoch": 0.2648575380168795, "grad_norm": 0.00478973425924778, "learning_rate": 0.001, "loss": 0.3935, "step": 9599 }, { "epoch": 0.2648851302179439, "grad_norm": 0.00439714128151536, "learning_rate": 0.001, "loss": 0.3596, "step": 9600 }, { "epoch": 0.26491272241900826, "grad_norm": 0.0023904817644506693, "learning_rate": 0.001, "loss": 0.3985, "step": 9601 }, { "epoch": 0.2649403146200726, "grad_norm": 0.0025250171311199665, "learning_rate": 0.001, "loss": 0.3991, "step": 9602 }, { "epoch": 0.264967906821137, "grad_norm": 0.0065780081786215305, "learning_rate": 0.001, "loss": 0.3759, "step": 9603 }, { "epoch": 0.2649954990222014, "grad_norm": 0.0026748040691018105, "learning_rate": 0.001, "loss": 0.4106, "step": 9604 }, { "epoch": 0.2650230912232657, "grad_norm": 0.0031709959730505943, "learning_rate": 0.001, "loss": 0.4583, "step": 9605 }, { "epoch": 0.26505068342433014, "grad_norm": 0.0026485987473279238, "learning_rate": 0.001, "loss": 0.4135, "step": 9606 }, { "epoch": 0.2650782756253945, "grad_norm": 0.0029246548656374216, "learning_rate": 0.001, "loss": 0.4141, "step": 9607 }, { "epoch": 0.26510586782645884, "grad_norm": 0.0031324047595262527, "learning_rate": 0.001, "loss": 0.3857, "step": 9608 }, { "epoch": 0.2651334600275232, "grad_norm": 0.0024871290661394596, "learning_rate": 0.001, "loss": 0.4121, "step": 9609 }, { "epoch": 0.2651610522285876, "grad_norm": 0.00248337653465569, "learning_rate": 0.001, "loss": 0.3977, "step": 9610 }, { "epoch": 0.26518864442965195, "grad_norm": 0.002592930570244789, "learning_rate": 0.001, "loss": 0.4398, "step": 9611 }, { "epoch": 0.2652162366307163, "grad_norm": 0.0031219925731420517, "learning_rate": 0.001, "loss": 0.4022, "step": 9612 }, { "epoch": 0.2652438288317807, "grad_norm": 0.002096372190862894, "learning_rate": 0.001, "loss": 0.4162, "step": 9613 }, { "epoch": 0.26527142103284507, "grad_norm": 0.0021825828589498997, "learning_rate": 0.001, "loss": 0.4235, "step": 9614 }, { "epoch": 0.2652990132339094, "grad_norm": 0.0035961021203547716, "learning_rate": 0.001, "loss": 0.3995, "step": 9615 }, { "epoch": 0.26532660543497383, "grad_norm": 0.003122879657894373, "learning_rate": 0.001, "loss": 0.4624, "step": 9616 }, { "epoch": 0.2653541976360382, "grad_norm": 0.004705473314970732, "learning_rate": 0.001, "loss": 0.4148, "step": 9617 }, { "epoch": 0.26538178983710253, "grad_norm": 0.0024439608678221703, "learning_rate": 0.001, "loss": 0.4063, "step": 9618 }, { "epoch": 0.2654093820381669, "grad_norm": 0.0027413556817919016, "learning_rate": 0.001, "loss": 0.3999, "step": 9619 }, { "epoch": 0.2654369742392313, "grad_norm": 0.004719111602753401, "learning_rate": 0.001, "loss": 0.4197, "step": 9620 }, { "epoch": 0.26546456644029565, "grad_norm": 0.003787653986364603, "learning_rate": 0.001, "loss": 0.4061, "step": 9621 }, { "epoch": 0.26549215864136, "grad_norm": 0.007587883621454239, "learning_rate": 0.001, "loss": 0.3865, "step": 9622 }, { "epoch": 0.2655197508424244, "grad_norm": 0.0023858651984483004, "learning_rate": 0.001, "loss": 0.3952, "step": 9623 }, { "epoch": 0.26554734304348876, "grad_norm": 0.005965366493910551, "learning_rate": 0.001, "loss": 0.3829, "step": 9624 }, { "epoch": 0.2655749352445531, "grad_norm": 0.003585060592740774, "learning_rate": 0.001, "loss": 0.4503, "step": 9625 }, { "epoch": 0.2656025274456175, "grad_norm": 0.006620787549763918, "learning_rate": 0.001, "loss": 0.3972, "step": 9626 }, { "epoch": 0.2656301196466819, "grad_norm": 0.002199475420638919, "learning_rate": 0.001, "loss": 0.4479, "step": 9627 }, { "epoch": 0.2656577118477462, "grad_norm": 0.003352593397721648, "learning_rate": 0.001, "loss": 0.4099, "step": 9628 }, { "epoch": 0.2656853040488106, "grad_norm": 0.00883631780743599, "learning_rate": 0.001, "loss": 0.4001, "step": 9629 }, { "epoch": 0.265712896249875, "grad_norm": 0.0033785421401262283, "learning_rate": 0.001, "loss": 0.4285, "step": 9630 }, { "epoch": 0.26574048845093934, "grad_norm": 0.00507304398342967, "learning_rate": 0.001, "loss": 0.3795, "step": 9631 }, { "epoch": 0.2657680806520037, "grad_norm": 0.010923560708761215, "learning_rate": 0.001, "loss": 0.3992, "step": 9632 }, { "epoch": 0.2657956728530681, "grad_norm": 0.003360697068274021, "learning_rate": 0.001, "loss": 0.3868, "step": 9633 }, { "epoch": 0.26582326505413245, "grad_norm": 0.003309650346636772, "learning_rate": 0.001, "loss": 0.4029, "step": 9634 }, { "epoch": 0.2658508572551968, "grad_norm": 0.003346043173223734, "learning_rate": 0.001, "loss": 0.395, "step": 9635 }, { "epoch": 0.2658784494562612, "grad_norm": 0.002929107751697302, "learning_rate": 0.001, "loss": 0.4192, "step": 9636 }, { "epoch": 0.26590604165732556, "grad_norm": 0.0035214154049754143, "learning_rate": 0.001, "loss": 0.4382, "step": 9637 }, { "epoch": 0.2659336338583899, "grad_norm": 0.004796061664819717, "learning_rate": 0.001, "loss": 0.3684, "step": 9638 }, { "epoch": 0.26596122605945427, "grad_norm": 0.002335017779842019, "learning_rate": 0.001, "loss": 0.4417, "step": 9639 }, { "epoch": 0.2659888182605187, "grad_norm": 0.002536866581067443, "learning_rate": 0.001, "loss": 0.4012, "step": 9640 }, { "epoch": 0.26601641046158303, "grad_norm": 0.0021245151292532682, "learning_rate": 0.001, "loss": 0.4175, "step": 9641 }, { "epoch": 0.2660440026626474, "grad_norm": 0.002907783491536975, "learning_rate": 0.001, "loss": 0.3827, "step": 9642 }, { "epoch": 0.2660715948637118, "grad_norm": 0.00233438890427351, "learning_rate": 0.001, "loss": 0.4061, "step": 9643 }, { "epoch": 0.26609918706477614, "grad_norm": 0.002872066106647253, "learning_rate": 0.001, "loss": 0.4071, "step": 9644 }, { "epoch": 0.2661267792658405, "grad_norm": 0.002991445129737258, "learning_rate": 0.001, "loss": 0.3818, "step": 9645 }, { "epoch": 0.2661543714669049, "grad_norm": 0.004284476395696402, "learning_rate": 0.001, "loss": 0.3798, "step": 9646 }, { "epoch": 0.26618196366796926, "grad_norm": 0.0026576148811727762, "learning_rate": 0.001, "loss": 0.3893, "step": 9647 }, { "epoch": 0.2662095558690336, "grad_norm": 0.0023093062918633223, "learning_rate": 0.001, "loss": 0.4186, "step": 9648 }, { "epoch": 0.26623714807009796, "grad_norm": 0.0027488505002111197, "learning_rate": 0.001, "loss": 0.3849, "step": 9649 }, { "epoch": 0.26626474027116237, "grad_norm": 0.002484068041667342, "learning_rate": 0.001, "loss": 0.4064, "step": 9650 }, { "epoch": 0.2662923324722267, "grad_norm": 0.003056896850466728, "learning_rate": 0.001, "loss": 0.4155, "step": 9651 }, { "epoch": 0.2663199246732911, "grad_norm": 0.0031155794858932495, "learning_rate": 0.001, "loss": 0.4079, "step": 9652 }, { "epoch": 0.2663475168743555, "grad_norm": 0.002967652166262269, "learning_rate": 0.001, "loss": 0.3771, "step": 9653 }, { "epoch": 0.26637510907541984, "grad_norm": 0.004259769804775715, "learning_rate": 0.001, "loss": 0.3793, "step": 9654 }, { "epoch": 0.2664027012764842, "grad_norm": 0.0025000995956361294, "learning_rate": 0.001, "loss": 0.4392, "step": 9655 }, { "epoch": 0.2664302934775486, "grad_norm": 0.0023649544455111027, "learning_rate": 0.001, "loss": 0.4357, "step": 9656 }, { "epoch": 0.26645788567861295, "grad_norm": 0.005546201951801777, "learning_rate": 0.001, "loss": 0.3846, "step": 9657 }, { "epoch": 0.2664854778796773, "grad_norm": 0.005744127556681633, "learning_rate": 0.001, "loss": 0.3935, "step": 9658 }, { "epoch": 0.26651307008074165, "grad_norm": 0.004911984317004681, "learning_rate": 0.001, "loss": 0.4594, "step": 9659 }, { "epoch": 0.26654066228180606, "grad_norm": 0.016794539988040924, "learning_rate": 0.001, "loss": 0.4044, "step": 9660 }, { "epoch": 0.2665682544828704, "grad_norm": 0.004329861607402563, "learning_rate": 0.001, "loss": 0.4239, "step": 9661 }, { "epoch": 0.26659584668393477, "grad_norm": 0.002347084926441312, "learning_rate": 0.001, "loss": 0.4268, "step": 9662 }, { "epoch": 0.2666234388849992, "grad_norm": 0.003959767986088991, "learning_rate": 0.001, "loss": 0.4072, "step": 9663 }, { "epoch": 0.26665103108606353, "grad_norm": 0.0030292284209281206, "learning_rate": 0.001, "loss": 0.3974, "step": 9664 }, { "epoch": 0.2666786232871279, "grad_norm": 0.0031279707327485085, "learning_rate": 0.001, "loss": 0.4021, "step": 9665 }, { "epoch": 0.2667062154881923, "grad_norm": 0.0038146632723510265, "learning_rate": 0.001, "loss": 0.3889, "step": 9666 }, { "epoch": 0.26673380768925664, "grad_norm": 0.002462350530549884, "learning_rate": 0.001, "loss": 0.4424, "step": 9667 }, { "epoch": 0.266761399890321, "grad_norm": 0.0025544725358486176, "learning_rate": 0.001, "loss": 0.4037, "step": 9668 }, { "epoch": 0.26678899209138535, "grad_norm": 0.008749466389417648, "learning_rate": 0.001, "loss": 0.388, "step": 9669 }, { "epoch": 0.26681658429244975, "grad_norm": 0.004481594543904066, "learning_rate": 0.001, "loss": 0.387, "step": 9670 }, { "epoch": 0.2668441764935141, "grad_norm": 0.0027323602698743343, "learning_rate": 0.001, "loss": 0.4081, "step": 9671 }, { "epoch": 0.26687176869457846, "grad_norm": 0.005246414337307215, "learning_rate": 0.001, "loss": 0.4027, "step": 9672 }, { "epoch": 0.26689936089564287, "grad_norm": 0.0037931909319013357, "learning_rate": 0.001, "loss": 0.361, "step": 9673 }, { "epoch": 0.2669269530967072, "grad_norm": 0.0028204875998198986, "learning_rate": 0.001, "loss": 0.3814, "step": 9674 }, { "epoch": 0.2669545452977716, "grad_norm": 0.002369094407185912, "learning_rate": 0.001, "loss": 0.4217, "step": 9675 }, { "epoch": 0.266982137498836, "grad_norm": 0.0028346215840429068, "learning_rate": 0.001, "loss": 0.4193, "step": 9676 }, { "epoch": 0.26700972969990033, "grad_norm": 0.002556850900873542, "learning_rate": 0.001, "loss": 0.4062, "step": 9677 }, { "epoch": 0.2670373219009647, "grad_norm": 0.0030708981212228537, "learning_rate": 0.001, "loss": 0.4116, "step": 9678 }, { "epoch": 0.26706491410202904, "grad_norm": 0.003195315832272172, "learning_rate": 0.001, "loss": 0.397, "step": 9679 }, { "epoch": 0.26709250630309345, "grad_norm": 0.0025848529767245054, "learning_rate": 0.001, "loss": 0.3751, "step": 9680 }, { "epoch": 0.2671200985041578, "grad_norm": 0.0033659334294497967, "learning_rate": 0.001, "loss": 0.3981, "step": 9681 }, { "epoch": 0.26714769070522215, "grad_norm": 0.023203924298286438, "learning_rate": 0.001, "loss": 0.4258, "step": 9682 }, { "epoch": 0.26717528290628656, "grad_norm": 0.016148481518030167, "learning_rate": 0.001, "loss": 0.3905, "step": 9683 }, { "epoch": 0.2672028751073509, "grad_norm": 0.003742114407941699, "learning_rate": 0.001, "loss": 0.4043, "step": 9684 }, { "epoch": 0.26723046730841526, "grad_norm": 0.003448138013482094, "learning_rate": 0.001, "loss": 0.379, "step": 9685 }, { "epoch": 0.2672580595094797, "grad_norm": 0.004335745703428984, "learning_rate": 0.001, "loss": 0.406, "step": 9686 }, { "epoch": 0.267285651710544, "grad_norm": 0.0025457171723246574, "learning_rate": 0.001, "loss": 0.4174, "step": 9687 }, { "epoch": 0.2673132439116084, "grad_norm": 0.0024251220747828484, "learning_rate": 0.001, "loss": 0.4321, "step": 9688 }, { "epoch": 0.26734083611267273, "grad_norm": 0.00228850357234478, "learning_rate": 0.001, "loss": 0.3903, "step": 9689 }, { "epoch": 0.26736842831373714, "grad_norm": 0.0042780437506735325, "learning_rate": 0.001, "loss": 0.387, "step": 9690 }, { "epoch": 0.2673960205148015, "grad_norm": 0.003844790393486619, "learning_rate": 0.001, "loss": 0.3944, "step": 9691 }, { "epoch": 0.26742361271586584, "grad_norm": 0.005739064887166023, "learning_rate": 0.001, "loss": 0.4403, "step": 9692 }, { "epoch": 0.26745120491693025, "grad_norm": 0.0037738836836069822, "learning_rate": 0.001, "loss": 0.3757, "step": 9693 }, { "epoch": 0.2674787971179946, "grad_norm": 0.006218787748366594, "learning_rate": 0.001, "loss": 0.4044, "step": 9694 }, { "epoch": 0.26750638931905896, "grad_norm": 0.002998180454596877, "learning_rate": 0.001, "loss": 0.4152, "step": 9695 }, { "epoch": 0.2675339815201233, "grad_norm": 0.008532087318599224, "learning_rate": 0.001, "loss": 0.3896, "step": 9696 }, { "epoch": 0.2675615737211877, "grad_norm": 0.002803490497171879, "learning_rate": 0.001, "loss": 0.3901, "step": 9697 }, { "epoch": 0.26758916592225207, "grad_norm": 0.003659422043710947, "learning_rate": 0.001, "loss": 0.3915, "step": 9698 }, { "epoch": 0.2676167581233164, "grad_norm": 0.004317516926676035, "learning_rate": 0.001, "loss": 0.4044, "step": 9699 }, { "epoch": 0.26764435032438083, "grad_norm": 0.004090850241482258, "learning_rate": 0.001, "loss": 0.3826, "step": 9700 }, { "epoch": 0.2676719425254452, "grad_norm": 0.005415142513811588, "learning_rate": 0.001, "loss": 0.4334, "step": 9701 }, { "epoch": 0.26769953472650954, "grad_norm": 0.0036655161529779434, "learning_rate": 0.001, "loss": 0.3982, "step": 9702 }, { "epoch": 0.26772712692757394, "grad_norm": 0.008789146319031715, "learning_rate": 0.001, "loss": 0.4141, "step": 9703 }, { "epoch": 0.2677547191286383, "grad_norm": 0.004173342138528824, "learning_rate": 0.001, "loss": 0.3865, "step": 9704 }, { "epoch": 0.26778231132970265, "grad_norm": 0.0031271290499716997, "learning_rate": 0.001, "loss": 0.424, "step": 9705 }, { "epoch": 0.267809903530767, "grad_norm": 0.0030106983613222837, "learning_rate": 0.001, "loss": 0.3698, "step": 9706 }, { "epoch": 0.2678374957318314, "grad_norm": 0.004071452189236879, "learning_rate": 0.001, "loss": 0.3773, "step": 9707 }, { "epoch": 0.26786508793289576, "grad_norm": 0.002735607326030731, "learning_rate": 0.001, "loss": 0.4445, "step": 9708 }, { "epoch": 0.2678926801339601, "grad_norm": 0.0036248862743377686, "learning_rate": 0.001, "loss": 0.3786, "step": 9709 }, { "epoch": 0.2679202723350245, "grad_norm": 0.004495333880186081, "learning_rate": 0.001, "loss": 0.4288, "step": 9710 }, { "epoch": 0.2679478645360889, "grad_norm": 0.003465922549366951, "learning_rate": 0.001, "loss": 0.4178, "step": 9711 }, { "epoch": 0.26797545673715323, "grad_norm": 0.0050760298036038876, "learning_rate": 0.001, "loss": 0.3898, "step": 9712 }, { "epoch": 0.26800304893821764, "grad_norm": 0.0025128766428679228, "learning_rate": 0.001, "loss": 0.4293, "step": 9713 }, { "epoch": 0.268030641139282, "grad_norm": 0.0030928144697099924, "learning_rate": 0.001, "loss": 0.4024, "step": 9714 }, { "epoch": 0.26805823334034634, "grad_norm": 0.0031707047019153833, "learning_rate": 0.001, "loss": 0.4027, "step": 9715 }, { "epoch": 0.2680858255414107, "grad_norm": 0.0074263643473386765, "learning_rate": 0.001, "loss": 0.392, "step": 9716 }, { "epoch": 0.2681134177424751, "grad_norm": 0.004428863059729338, "learning_rate": 0.001, "loss": 0.4157, "step": 9717 }, { "epoch": 0.26814100994353945, "grad_norm": 0.005300541874021292, "learning_rate": 0.001, "loss": 0.421, "step": 9718 }, { "epoch": 0.2681686021446038, "grad_norm": 0.013318234123289585, "learning_rate": 0.001, "loss": 0.3608, "step": 9719 }, { "epoch": 0.2681961943456682, "grad_norm": 0.0030859310645610094, "learning_rate": 0.001, "loss": 0.4177, "step": 9720 }, { "epoch": 0.26822378654673257, "grad_norm": 0.00345474760979414, "learning_rate": 0.001, "loss": 0.3964, "step": 9721 }, { "epoch": 0.2682513787477969, "grad_norm": 0.006549532990902662, "learning_rate": 0.001, "loss": 0.3835, "step": 9722 }, { "epoch": 0.26827897094886133, "grad_norm": 0.013787861913442612, "learning_rate": 0.001, "loss": 0.4332, "step": 9723 }, { "epoch": 0.2683065631499257, "grad_norm": 0.00343018164858222, "learning_rate": 0.001, "loss": 0.4035, "step": 9724 }, { "epoch": 0.26833415535099003, "grad_norm": 0.002628603018820286, "learning_rate": 0.001, "loss": 0.4012, "step": 9725 }, { "epoch": 0.2683617475520544, "grad_norm": 0.004094823729246855, "learning_rate": 0.001, "loss": 0.3817, "step": 9726 }, { "epoch": 0.2683893397531188, "grad_norm": 0.002913734642788768, "learning_rate": 0.001, "loss": 0.3751, "step": 9727 }, { "epoch": 0.26841693195418315, "grad_norm": 0.002792428247630596, "learning_rate": 0.001, "loss": 0.3806, "step": 9728 }, { "epoch": 0.2684445241552475, "grad_norm": 0.0024083973839879036, "learning_rate": 0.001, "loss": 0.4336, "step": 9729 }, { "epoch": 0.2684721163563119, "grad_norm": 0.002965459832921624, "learning_rate": 0.001, "loss": 0.4155, "step": 9730 }, { "epoch": 0.26849970855737626, "grad_norm": 0.01316264271736145, "learning_rate": 0.001, "loss": 0.4539, "step": 9731 }, { "epoch": 0.2685273007584406, "grad_norm": 0.002591692376881838, "learning_rate": 0.001, "loss": 0.3989, "step": 9732 }, { "epoch": 0.268554892959505, "grad_norm": 0.002965483581647277, "learning_rate": 0.001, "loss": 0.3652, "step": 9733 }, { "epoch": 0.2685824851605694, "grad_norm": 0.004762920085340738, "learning_rate": 0.001, "loss": 0.4115, "step": 9734 }, { "epoch": 0.2686100773616337, "grad_norm": 0.0021797195076942444, "learning_rate": 0.001, "loss": 0.3907, "step": 9735 }, { "epoch": 0.2686376695626981, "grad_norm": 0.0029116403311491013, "learning_rate": 0.001, "loss": 0.3816, "step": 9736 }, { "epoch": 0.2686652617637625, "grad_norm": 0.003725858870893717, "learning_rate": 0.001, "loss": 0.3588, "step": 9737 }, { "epoch": 0.26869285396482684, "grad_norm": 0.004337244667112827, "learning_rate": 0.001, "loss": 0.4034, "step": 9738 }, { "epoch": 0.2687204461658912, "grad_norm": 0.0027076283004134893, "learning_rate": 0.001, "loss": 0.391, "step": 9739 }, { "epoch": 0.2687480383669556, "grad_norm": 0.0021667422261089087, "learning_rate": 0.001, "loss": 0.4089, "step": 9740 }, { "epoch": 0.26877563056801995, "grad_norm": 0.0021627771202474833, "learning_rate": 0.001, "loss": 0.4135, "step": 9741 }, { "epoch": 0.2688032227690843, "grad_norm": 0.0031696807127445936, "learning_rate": 0.001, "loss": 0.3744, "step": 9742 }, { "epoch": 0.2688308149701487, "grad_norm": 0.0033821675460785627, "learning_rate": 0.001, "loss": 0.4016, "step": 9743 }, { "epoch": 0.26885840717121307, "grad_norm": 0.0039416286163032055, "learning_rate": 0.001, "loss": 0.3728, "step": 9744 }, { "epoch": 0.2688859993722774, "grad_norm": 0.003314357250928879, "learning_rate": 0.001, "loss": 0.3387, "step": 9745 }, { "epoch": 0.26891359157334177, "grad_norm": 0.0031994907185435295, "learning_rate": 0.001, "loss": 0.4292, "step": 9746 }, { "epoch": 0.2689411837744062, "grad_norm": 0.006605618633329868, "learning_rate": 0.001, "loss": 0.4039, "step": 9747 }, { "epoch": 0.26896877597547053, "grad_norm": 0.0040326593443751335, "learning_rate": 0.001, "loss": 0.3823, "step": 9748 }, { "epoch": 0.2689963681765349, "grad_norm": 0.004000399261713028, "learning_rate": 0.001, "loss": 0.4014, "step": 9749 }, { "epoch": 0.2690239603775993, "grad_norm": 0.005057289730757475, "learning_rate": 0.001, "loss": 0.4383, "step": 9750 }, { "epoch": 0.26905155257866364, "grad_norm": 0.006951628718525171, "learning_rate": 0.001, "loss": 0.4397, "step": 9751 }, { "epoch": 0.269079144779728, "grad_norm": 0.012159796431660652, "learning_rate": 0.001, "loss": 0.3827, "step": 9752 }, { "epoch": 0.2691067369807924, "grad_norm": 0.012929181568324566, "learning_rate": 0.001, "loss": 0.3891, "step": 9753 }, { "epoch": 0.26913432918185676, "grad_norm": 0.028701601549983025, "learning_rate": 0.001, "loss": 0.4264, "step": 9754 }, { "epoch": 0.2691619213829211, "grad_norm": 0.00337967392988503, "learning_rate": 0.001, "loss": 0.3958, "step": 9755 }, { "epoch": 0.26918951358398546, "grad_norm": 0.004440324380993843, "learning_rate": 0.001, "loss": 0.4315, "step": 9756 }, { "epoch": 0.26921710578504987, "grad_norm": 0.009946012869477272, "learning_rate": 0.001, "loss": 0.4012, "step": 9757 }, { "epoch": 0.2692446979861142, "grad_norm": 0.0047539942897856236, "learning_rate": 0.001, "loss": 0.3814, "step": 9758 }, { "epoch": 0.2692722901871786, "grad_norm": 0.002247569151222706, "learning_rate": 0.001, "loss": 0.4414, "step": 9759 }, { "epoch": 0.269299882388243, "grad_norm": 0.004046887159347534, "learning_rate": 0.001, "loss": 0.3617, "step": 9760 }, { "epoch": 0.26932747458930734, "grad_norm": 0.0023645355831831694, "learning_rate": 0.001, "loss": 0.4105, "step": 9761 }, { "epoch": 0.2693550667903717, "grad_norm": 0.006252918858081102, "learning_rate": 0.001, "loss": 0.3906, "step": 9762 }, { "epoch": 0.2693826589914361, "grad_norm": 0.004668123554438353, "learning_rate": 0.001, "loss": 0.4173, "step": 9763 }, { "epoch": 0.26941025119250045, "grad_norm": 0.005385317839682102, "learning_rate": 0.001, "loss": 0.3852, "step": 9764 }, { "epoch": 0.2694378433935648, "grad_norm": 0.004070324823260307, "learning_rate": 0.001, "loss": 0.409, "step": 9765 }, { "epoch": 0.26946543559462915, "grad_norm": 0.002345689572393894, "learning_rate": 0.001, "loss": 0.4281, "step": 9766 }, { "epoch": 0.26949302779569356, "grad_norm": 0.004202275536954403, "learning_rate": 0.001, "loss": 0.4266, "step": 9767 }, { "epoch": 0.2695206199967579, "grad_norm": 0.003104708855971694, "learning_rate": 0.001, "loss": 0.3805, "step": 9768 }, { "epoch": 0.26954821219782227, "grad_norm": 0.003290161257609725, "learning_rate": 0.001, "loss": 0.4162, "step": 9769 }, { "epoch": 0.2695758043988867, "grad_norm": 0.0024769490119069815, "learning_rate": 0.001, "loss": 0.3946, "step": 9770 }, { "epoch": 0.26960339659995103, "grad_norm": 0.002376476302742958, "learning_rate": 0.001, "loss": 0.3567, "step": 9771 }, { "epoch": 0.2696309888010154, "grad_norm": 0.00581009779125452, "learning_rate": 0.001, "loss": 0.3977, "step": 9772 }, { "epoch": 0.2696585810020798, "grad_norm": 0.005047387443482876, "learning_rate": 0.001, "loss": 0.4187, "step": 9773 }, { "epoch": 0.26968617320314414, "grad_norm": 0.002559883752837777, "learning_rate": 0.001, "loss": 0.3871, "step": 9774 }, { "epoch": 0.2697137654042085, "grad_norm": 0.002292527351528406, "learning_rate": 0.001, "loss": 0.402, "step": 9775 }, { "epoch": 0.26974135760527285, "grad_norm": 0.002428713021799922, "learning_rate": 0.001, "loss": 0.393, "step": 9776 }, { "epoch": 0.26976894980633725, "grad_norm": 0.0029613785445690155, "learning_rate": 0.001, "loss": 0.387, "step": 9777 }, { "epoch": 0.2697965420074016, "grad_norm": 0.00751551054418087, "learning_rate": 0.001, "loss": 0.383, "step": 9778 }, { "epoch": 0.26982413420846596, "grad_norm": 0.0034091894049197435, "learning_rate": 0.001, "loss": 0.4048, "step": 9779 }, { "epoch": 0.26985172640953037, "grad_norm": 0.003550524590536952, "learning_rate": 0.001, "loss": 0.3828, "step": 9780 }, { "epoch": 0.2698793186105947, "grad_norm": 0.019099680706858635, "learning_rate": 0.001, "loss": 0.3838, "step": 9781 }, { "epoch": 0.2699069108116591, "grad_norm": 0.013346359133720398, "learning_rate": 0.001, "loss": 0.4195, "step": 9782 }, { "epoch": 0.2699345030127235, "grad_norm": 0.0031708034221082926, "learning_rate": 0.001, "loss": 0.4188, "step": 9783 }, { "epoch": 0.26996209521378783, "grad_norm": 0.009070201776921749, "learning_rate": 0.001, "loss": 0.3902, "step": 9784 }, { "epoch": 0.2699896874148522, "grad_norm": 0.003416346851736307, "learning_rate": 0.001, "loss": 0.3885, "step": 9785 }, { "epoch": 0.27001727961591654, "grad_norm": 0.0025843738112598658, "learning_rate": 0.001, "loss": 0.4058, "step": 9786 }, { "epoch": 0.27004487181698095, "grad_norm": 0.024039220064878464, "learning_rate": 0.001, "loss": 0.3799, "step": 9787 }, { "epoch": 0.2700724640180453, "grad_norm": 0.00454790610820055, "learning_rate": 0.001, "loss": 0.3843, "step": 9788 }, { "epoch": 0.27010005621910965, "grad_norm": 0.0040943981148302555, "learning_rate": 0.001, "loss": 0.4019, "step": 9789 }, { "epoch": 0.27012764842017406, "grad_norm": 0.01507536880671978, "learning_rate": 0.001, "loss": 0.401, "step": 9790 }, { "epoch": 0.2701552406212384, "grad_norm": 0.007708291057497263, "learning_rate": 0.001, "loss": 0.4034, "step": 9791 }, { "epoch": 0.27018283282230277, "grad_norm": 0.002290240256115794, "learning_rate": 0.001, "loss": 0.4356, "step": 9792 }, { "epoch": 0.2702104250233671, "grad_norm": 0.006000965368002653, "learning_rate": 0.001, "loss": 0.3904, "step": 9793 }, { "epoch": 0.2702380172244315, "grad_norm": 0.0025645066052675247, "learning_rate": 0.001, "loss": 0.3787, "step": 9794 }, { "epoch": 0.2702656094254959, "grad_norm": 0.004800410475581884, "learning_rate": 0.001, "loss": 0.3921, "step": 9795 }, { "epoch": 0.27029320162656023, "grad_norm": 0.002543856855481863, "learning_rate": 0.001, "loss": 0.4212, "step": 9796 }, { "epoch": 0.27032079382762464, "grad_norm": 0.0024924466852098703, "learning_rate": 0.001, "loss": 0.4508, "step": 9797 }, { "epoch": 0.270348386028689, "grad_norm": 0.017285561189055443, "learning_rate": 0.001, "loss": 0.4157, "step": 9798 }, { "epoch": 0.27037597822975334, "grad_norm": 0.0034071647096425295, "learning_rate": 0.001, "loss": 0.4031, "step": 9799 }, { "epoch": 0.27040357043081775, "grad_norm": 0.0026173749938607216, "learning_rate": 0.001, "loss": 0.4107, "step": 9800 }, { "epoch": 0.2704311626318821, "grad_norm": 0.0027858337853103876, "learning_rate": 0.001, "loss": 0.3942, "step": 9801 }, { "epoch": 0.27045875483294646, "grad_norm": 0.003459551138803363, "learning_rate": 0.001, "loss": 0.3888, "step": 9802 }, { "epoch": 0.2704863470340108, "grad_norm": 0.0028464416973292828, "learning_rate": 0.001, "loss": 0.3334, "step": 9803 }, { "epoch": 0.2705139392350752, "grad_norm": 0.0025196147616952658, "learning_rate": 0.001, "loss": 0.4198, "step": 9804 }, { "epoch": 0.27054153143613957, "grad_norm": 0.002594658173620701, "learning_rate": 0.001, "loss": 0.3921, "step": 9805 }, { "epoch": 0.2705691236372039, "grad_norm": 0.0055243829265236855, "learning_rate": 0.001, "loss": 0.4163, "step": 9806 }, { "epoch": 0.27059671583826833, "grad_norm": 0.003879512194544077, "learning_rate": 0.001, "loss": 0.3891, "step": 9807 }, { "epoch": 0.2706243080393327, "grad_norm": 0.0029453851748257875, "learning_rate": 0.001, "loss": 0.3562, "step": 9808 }, { "epoch": 0.27065190024039704, "grad_norm": 0.004673081450164318, "learning_rate": 0.001, "loss": 0.394, "step": 9809 }, { "epoch": 0.27067949244146144, "grad_norm": 0.002838741522282362, "learning_rate": 0.001, "loss": 0.4154, "step": 9810 }, { "epoch": 0.2707070846425258, "grad_norm": 0.002747466554865241, "learning_rate": 0.001, "loss": 0.3861, "step": 9811 }, { "epoch": 0.27073467684359015, "grad_norm": 0.003146926872432232, "learning_rate": 0.001, "loss": 0.4179, "step": 9812 }, { "epoch": 0.2707622690446545, "grad_norm": 0.007232631091028452, "learning_rate": 0.001, "loss": 0.3991, "step": 9813 }, { "epoch": 0.2707898612457189, "grad_norm": 0.0033664864022284746, "learning_rate": 0.001, "loss": 0.4017, "step": 9814 }, { "epoch": 0.27081745344678326, "grad_norm": 0.002574306447058916, "learning_rate": 0.001, "loss": 0.4027, "step": 9815 }, { "epoch": 0.2708450456478476, "grad_norm": 0.0030514101963490248, "learning_rate": 0.001, "loss": 0.3745, "step": 9816 }, { "epoch": 0.270872637848912, "grad_norm": 0.0026982761919498444, "learning_rate": 0.001, "loss": 0.4389, "step": 9817 }, { "epoch": 0.2709002300499764, "grad_norm": 0.0034780215937644243, "learning_rate": 0.001, "loss": 0.4446, "step": 9818 }, { "epoch": 0.27092782225104073, "grad_norm": 0.0022655725479125977, "learning_rate": 0.001, "loss": 0.391, "step": 9819 }, { "epoch": 0.27095541445210514, "grad_norm": 0.0031723659485578537, "learning_rate": 0.001, "loss": 0.3482, "step": 9820 }, { "epoch": 0.2709830066531695, "grad_norm": 0.004059515427798033, "learning_rate": 0.001, "loss": 0.4105, "step": 9821 }, { "epoch": 0.27101059885423384, "grad_norm": 0.003187219612300396, "learning_rate": 0.001, "loss": 0.3748, "step": 9822 }, { "epoch": 0.2710381910552982, "grad_norm": 0.002115658251568675, "learning_rate": 0.001, "loss": 0.4208, "step": 9823 }, { "epoch": 0.2710657832563626, "grad_norm": 0.004135797265917063, "learning_rate": 0.001, "loss": 0.4439, "step": 9824 }, { "epoch": 0.27109337545742695, "grad_norm": 0.0046222819946706295, "learning_rate": 0.001, "loss": 0.4101, "step": 9825 }, { "epoch": 0.2711209676584913, "grad_norm": 0.00371656590141356, "learning_rate": 0.001, "loss": 0.3868, "step": 9826 }, { "epoch": 0.2711485598595557, "grad_norm": 0.0033854972571134567, "learning_rate": 0.001, "loss": 0.3818, "step": 9827 }, { "epoch": 0.27117615206062007, "grad_norm": 0.0034048438537865877, "learning_rate": 0.001, "loss": 0.3669, "step": 9828 }, { "epoch": 0.2712037442616844, "grad_norm": 0.005359988659620285, "learning_rate": 0.001, "loss": 0.3695, "step": 9829 }, { "epoch": 0.27123133646274883, "grad_norm": 0.0037438932340592146, "learning_rate": 0.001, "loss": 0.3743, "step": 9830 }, { "epoch": 0.2712589286638132, "grad_norm": 0.002518385648727417, "learning_rate": 0.001, "loss": 0.4065, "step": 9831 }, { "epoch": 0.27128652086487753, "grad_norm": 0.0029225589241832495, "learning_rate": 0.001, "loss": 0.414, "step": 9832 }, { "epoch": 0.2713141130659419, "grad_norm": 0.003086130367591977, "learning_rate": 0.001, "loss": 0.3932, "step": 9833 }, { "epoch": 0.2713417052670063, "grad_norm": 0.005153126548975706, "learning_rate": 0.001, "loss": 0.37, "step": 9834 }, { "epoch": 0.27136929746807065, "grad_norm": 0.0035248089116066694, "learning_rate": 0.001, "loss": 0.4109, "step": 9835 }, { "epoch": 0.271396889669135, "grad_norm": 0.006128870882093906, "learning_rate": 0.001, "loss": 0.4036, "step": 9836 }, { "epoch": 0.2714244818701994, "grad_norm": 0.003302632598206401, "learning_rate": 0.001, "loss": 0.3738, "step": 9837 }, { "epoch": 0.27145207407126376, "grad_norm": 0.004240743815898895, "learning_rate": 0.001, "loss": 0.4279, "step": 9838 }, { "epoch": 0.2714796662723281, "grad_norm": 0.014292960986495018, "learning_rate": 0.001, "loss": 0.4009, "step": 9839 }, { "epoch": 0.2715072584733925, "grad_norm": 0.002007813658565283, "learning_rate": 0.001, "loss": 0.4097, "step": 9840 }, { "epoch": 0.2715348506744569, "grad_norm": 0.004010828211903572, "learning_rate": 0.001, "loss": 0.3683, "step": 9841 }, { "epoch": 0.2715624428755212, "grad_norm": 0.00426107831299305, "learning_rate": 0.001, "loss": 0.3931, "step": 9842 }, { "epoch": 0.2715900350765856, "grad_norm": 0.0019332681549713016, "learning_rate": 0.001, "loss": 0.4438, "step": 9843 }, { "epoch": 0.27161762727765, "grad_norm": 0.0026610195636749268, "learning_rate": 0.001, "loss": 0.406, "step": 9844 }, { "epoch": 0.27164521947871434, "grad_norm": 0.0026648433413356543, "learning_rate": 0.001, "loss": 0.3922, "step": 9845 }, { "epoch": 0.2716728116797787, "grad_norm": 0.0038692099042236805, "learning_rate": 0.001, "loss": 0.423, "step": 9846 }, { "epoch": 0.2717004038808431, "grad_norm": 0.0028008136432617903, "learning_rate": 0.001, "loss": 0.3998, "step": 9847 }, { "epoch": 0.27172799608190745, "grad_norm": 0.0038147938903421164, "learning_rate": 0.001, "loss": 0.394, "step": 9848 }, { "epoch": 0.2717555882829718, "grad_norm": 0.0787762999534607, "learning_rate": 0.001, "loss": 0.4016, "step": 9849 }, { "epoch": 0.2717831804840362, "grad_norm": 0.004193917848169804, "learning_rate": 0.001, "loss": 0.4197, "step": 9850 }, { "epoch": 0.27181077268510057, "grad_norm": 0.0029792841523885727, "learning_rate": 0.001, "loss": 0.3763, "step": 9851 }, { "epoch": 0.2718383648861649, "grad_norm": 0.002425075275823474, "learning_rate": 0.001, "loss": 0.4264, "step": 9852 }, { "epoch": 0.27186595708722927, "grad_norm": 0.0051060812547802925, "learning_rate": 0.001, "loss": 0.3555, "step": 9853 }, { "epoch": 0.2718935492882937, "grad_norm": 0.024291977286338806, "learning_rate": 0.001, "loss": 0.3906, "step": 9854 }, { "epoch": 0.27192114148935803, "grad_norm": 0.005702044349163771, "learning_rate": 0.001, "loss": 0.4261, "step": 9855 }, { "epoch": 0.2719487336904224, "grad_norm": 0.0037687926087528467, "learning_rate": 0.001, "loss": 0.3845, "step": 9856 }, { "epoch": 0.2719763258914868, "grad_norm": 0.007429871242493391, "learning_rate": 0.001, "loss": 0.4084, "step": 9857 }, { "epoch": 0.27200391809255114, "grad_norm": 0.00363955763168633, "learning_rate": 0.001, "loss": 0.3385, "step": 9858 }, { "epoch": 0.2720315102936155, "grad_norm": 0.00260857748799026, "learning_rate": 0.001, "loss": 0.4518, "step": 9859 }, { "epoch": 0.2720591024946799, "grad_norm": 0.002581104403361678, "learning_rate": 0.001, "loss": 0.3907, "step": 9860 }, { "epoch": 0.27208669469574426, "grad_norm": 0.0025525682140141726, "learning_rate": 0.001, "loss": 0.4144, "step": 9861 }, { "epoch": 0.2721142868968086, "grad_norm": 0.00219151028431952, "learning_rate": 0.001, "loss": 0.381, "step": 9862 }, { "epoch": 0.27214187909787296, "grad_norm": 0.0028392940293997526, "learning_rate": 0.001, "loss": 0.4049, "step": 9863 }, { "epoch": 0.27216947129893737, "grad_norm": 0.005825422238558531, "learning_rate": 0.001, "loss": 0.3668, "step": 9864 }, { "epoch": 0.2721970635000017, "grad_norm": 0.00334284920245409, "learning_rate": 0.001, "loss": 0.3837, "step": 9865 }, { "epoch": 0.2722246557010661, "grad_norm": 0.0023583846632391214, "learning_rate": 0.001, "loss": 0.4264, "step": 9866 }, { "epoch": 0.2722522479021305, "grad_norm": 0.004898847080767155, "learning_rate": 0.001, "loss": 0.4439, "step": 9867 }, { "epoch": 0.27227984010319484, "grad_norm": 0.004352132324129343, "learning_rate": 0.001, "loss": 0.3966, "step": 9868 }, { "epoch": 0.2723074323042592, "grad_norm": 0.003136218059808016, "learning_rate": 0.001, "loss": 0.4208, "step": 9869 }, { "epoch": 0.2723350245053236, "grad_norm": 0.003824204672127962, "learning_rate": 0.001, "loss": 0.3764, "step": 9870 }, { "epoch": 0.27236261670638795, "grad_norm": 0.0033238916657865047, "learning_rate": 0.001, "loss": 0.4085, "step": 9871 }, { "epoch": 0.2723902089074523, "grad_norm": 0.005290450528264046, "learning_rate": 0.001, "loss": 0.3666, "step": 9872 }, { "epoch": 0.27241780110851666, "grad_norm": 0.0039006490260362625, "learning_rate": 0.001, "loss": 0.3767, "step": 9873 }, { "epoch": 0.27244539330958106, "grad_norm": 0.006280507426708937, "learning_rate": 0.001, "loss": 0.4119, "step": 9874 }, { "epoch": 0.2724729855106454, "grad_norm": 0.010091300122439861, "learning_rate": 0.001, "loss": 0.3789, "step": 9875 }, { "epoch": 0.27250057771170977, "grad_norm": 0.012804842554032803, "learning_rate": 0.001, "loss": 0.3659, "step": 9876 }, { "epoch": 0.2725281699127742, "grad_norm": 0.005947540979832411, "learning_rate": 0.001, "loss": 0.3872, "step": 9877 }, { "epoch": 0.27255576211383853, "grad_norm": 0.007248189765959978, "learning_rate": 0.001, "loss": 0.4269, "step": 9878 }, { "epoch": 0.2725833543149029, "grad_norm": 0.00791088119149208, "learning_rate": 0.001, "loss": 0.3778, "step": 9879 }, { "epoch": 0.2726109465159673, "grad_norm": 0.011914456263184547, "learning_rate": 0.001, "loss": 0.415, "step": 9880 }, { "epoch": 0.27263853871703164, "grad_norm": 0.005593923386186361, "learning_rate": 0.001, "loss": 0.3928, "step": 9881 }, { "epoch": 0.272666130918096, "grad_norm": 0.0036203390918672085, "learning_rate": 0.001, "loss": 0.4013, "step": 9882 }, { "epoch": 0.27269372311916035, "grad_norm": 0.0030619618482887745, "learning_rate": 0.001, "loss": 0.4023, "step": 9883 }, { "epoch": 0.27272131532022476, "grad_norm": 0.002730242908000946, "learning_rate": 0.001, "loss": 0.3723, "step": 9884 }, { "epoch": 0.2727489075212891, "grad_norm": 0.004023353569209576, "learning_rate": 0.001, "loss": 0.4212, "step": 9885 }, { "epoch": 0.27277649972235346, "grad_norm": 0.0054342905059456825, "learning_rate": 0.001, "loss": 0.3853, "step": 9886 }, { "epoch": 0.27280409192341787, "grad_norm": 0.004413217771798372, "learning_rate": 0.001, "loss": 0.3759, "step": 9887 }, { "epoch": 0.2728316841244822, "grad_norm": 0.0039059999398887157, "learning_rate": 0.001, "loss": 0.4253, "step": 9888 }, { "epoch": 0.2728592763255466, "grad_norm": 0.003089727135375142, "learning_rate": 0.001, "loss": 0.4385, "step": 9889 }, { "epoch": 0.2728868685266109, "grad_norm": 0.002594609744846821, "learning_rate": 0.001, "loss": 0.3901, "step": 9890 }, { "epoch": 0.27291446072767533, "grad_norm": 0.004234743770211935, "learning_rate": 0.001, "loss": 0.3582, "step": 9891 }, { "epoch": 0.2729420529287397, "grad_norm": 0.002928930101916194, "learning_rate": 0.001, "loss": 0.3743, "step": 9892 }, { "epoch": 0.27296964512980404, "grad_norm": 0.002950299996882677, "learning_rate": 0.001, "loss": 0.4439, "step": 9893 }, { "epoch": 0.27299723733086845, "grad_norm": 0.002796403132379055, "learning_rate": 0.001, "loss": 0.4327, "step": 9894 }, { "epoch": 0.2730248295319328, "grad_norm": 0.0047624544240534306, "learning_rate": 0.001, "loss": 0.416, "step": 9895 }, { "epoch": 0.27305242173299715, "grad_norm": 0.002439835574477911, "learning_rate": 0.001, "loss": 0.4364, "step": 9896 }, { "epoch": 0.27308001393406156, "grad_norm": 0.002326715039089322, "learning_rate": 0.001, "loss": 0.4155, "step": 9897 }, { "epoch": 0.2731076061351259, "grad_norm": 0.00256901397369802, "learning_rate": 0.001, "loss": 0.4314, "step": 9898 }, { "epoch": 0.27313519833619027, "grad_norm": 0.002579151652753353, "learning_rate": 0.001, "loss": 0.408, "step": 9899 }, { "epoch": 0.2731627905372546, "grad_norm": 0.00309234787710011, "learning_rate": 0.001, "loss": 0.3471, "step": 9900 }, { "epoch": 0.273190382738319, "grad_norm": 0.0036706968676298857, "learning_rate": 0.001, "loss": 0.3779, "step": 9901 }, { "epoch": 0.2732179749393834, "grad_norm": 0.0024913379456847906, "learning_rate": 0.001, "loss": 0.4071, "step": 9902 }, { "epoch": 0.27324556714044773, "grad_norm": 0.004764418583363295, "learning_rate": 0.001, "loss": 0.432, "step": 9903 }, { "epoch": 0.27327315934151214, "grad_norm": 0.0027252251747995615, "learning_rate": 0.001, "loss": 0.3977, "step": 9904 }, { "epoch": 0.2733007515425765, "grad_norm": 0.0033297077752649784, "learning_rate": 0.001, "loss": 0.4315, "step": 9905 }, { "epoch": 0.27332834374364084, "grad_norm": 0.0033139032311737537, "learning_rate": 0.001, "loss": 0.3994, "step": 9906 }, { "epoch": 0.27335593594470525, "grad_norm": 0.004935343284159899, "learning_rate": 0.001, "loss": 0.3974, "step": 9907 }, { "epoch": 0.2733835281457696, "grad_norm": 0.0046791452914476395, "learning_rate": 0.001, "loss": 0.4204, "step": 9908 }, { "epoch": 0.27341112034683396, "grad_norm": 0.004267543088644743, "learning_rate": 0.001, "loss": 0.4563, "step": 9909 }, { "epoch": 0.2734387125478983, "grad_norm": 0.002623759675770998, "learning_rate": 0.001, "loss": 0.3976, "step": 9910 }, { "epoch": 0.2734663047489627, "grad_norm": 0.002590791555121541, "learning_rate": 0.001, "loss": 0.3993, "step": 9911 }, { "epoch": 0.27349389695002707, "grad_norm": 0.002841070294380188, "learning_rate": 0.001, "loss": 0.3935, "step": 9912 }, { "epoch": 0.2735214891510914, "grad_norm": 0.009606373496353626, "learning_rate": 0.001, "loss": 0.4181, "step": 9913 }, { "epoch": 0.27354908135215583, "grad_norm": 0.0030742899980396032, "learning_rate": 0.001, "loss": 0.3942, "step": 9914 }, { "epoch": 0.2735766735532202, "grad_norm": 0.00670670298859477, "learning_rate": 0.001, "loss": 0.4058, "step": 9915 }, { "epoch": 0.27360426575428454, "grad_norm": 0.002301518339663744, "learning_rate": 0.001, "loss": 0.3965, "step": 9916 }, { "epoch": 0.27363185795534894, "grad_norm": 0.00346144987270236, "learning_rate": 0.001, "loss": 0.3968, "step": 9917 }, { "epoch": 0.2736594501564133, "grad_norm": 0.0030207394156605005, "learning_rate": 0.001, "loss": 0.3899, "step": 9918 }, { "epoch": 0.27368704235747765, "grad_norm": 0.005349605809897184, "learning_rate": 0.001, "loss": 0.3826, "step": 9919 }, { "epoch": 0.273714634558542, "grad_norm": 0.004348041955381632, "learning_rate": 0.001, "loss": 0.3966, "step": 9920 }, { "epoch": 0.2737422267596064, "grad_norm": 0.0030986126512289047, "learning_rate": 0.001, "loss": 0.4256, "step": 9921 }, { "epoch": 0.27376981896067076, "grad_norm": 0.0026239063590765, "learning_rate": 0.001, "loss": 0.3946, "step": 9922 }, { "epoch": 0.2737974111617351, "grad_norm": 0.0023042093962430954, "learning_rate": 0.001, "loss": 0.3869, "step": 9923 }, { "epoch": 0.2738250033627995, "grad_norm": 0.007671200204640627, "learning_rate": 0.001, "loss": 0.3721, "step": 9924 }, { "epoch": 0.2738525955638639, "grad_norm": 0.00476741511374712, "learning_rate": 0.001, "loss": 0.367, "step": 9925 }, { "epoch": 0.27388018776492823, "grad_norm": 0.004138735588639975, "learning_rate": 0.001, "loss": 0.4208, "step": 9926 }, { "epoch": 0.27390777996599264, "grad_norm": 0.007656489033252001, "learning_rate": 0.001, "loss": 0.3586, "step": 9927 }, { "epoch": 0.273935372167057, "grad_norm": 0.003876802045851946, "learning_rate": 0.001, "loss": 0.3793, "step": 9928 }, { "epoch": 0.27396296436812134, "grad_norm": 0.010240191593766212, "learning_rate": 0.001, "loss": 0.4083, "step": 9929 }, { "epoch": 0.2739905565691857, "grad_norm": 0.005146909970790148, "learning_rate": 0.001, "loss": 0.3728, "step": 9930 }, { "epoch": 0.2740181487702501, "grad_norm": 0.00523938424885273, "learning_rate": 0.001, "loss": 0.3844, "step": 9931 }, { "epoch": 0.27404574097131446, "grad_norm": 0.00330050359480083, "learning_rate": 0.001, "loss": 0.3892, "step": 9932 }, { "epoch": 0.2740733331723788, "grad_norm": 0.00420812563970685, "learning_rate": 0.001, "loss": 0.3911, "step": 9933 }, { "epoch": 0.2741009253734432, "grad_norm": 0.00954358745366335, "learning_rate": 0.001, "loss": 0.3833, "step": 9934 }, { "epoch": 0.27412851757450757, "grad_norm": 0.005676385015249252, "learning_rate": 0.001, "loss": 0.401, "step": 9935 }, { "epoch": 0.2741561097755719, "grad_norm": 0.00360414432361722, "learning_rate": 0.001, "loss": 0.3899, "step": 9936 }, { "epoch": 0.27418370197663633, "grad_norm": 0.002916174242272973, "learning_rate": 0.001, "loss": 0.4373, "step": 9937 }, { "epoch": 0.2742112941777007, "grad_norm": 0.0041490960866212845, "learning_rate": 0.001, "loss": 0.3927, "step": 9938 }, { "epoch": 0.27423888637876503, "grad_norm": 0.03103417344391346, "learning_rate": 0.001, "loss": 0.3987, "step": 9939 }, { "epoch": 0.2742664785798294, "grad_norm": 0.00923039298504591, "learning_rate": 0.001, "loss": 0.4004, "step": 9940 }, { "epoch": 0.2742940707808938, "grad_norm": 0.0029965273570269346, "learning_rate": 0.001, "loss": 0.439, "step": 9941 }, { "epoch": 0.27432166298195815, "grad_norm": 0.003088477300480008, "learning_rate": 0.001, "loss": 0.3813, "step": 9942 }, { "epoch": 0.2743492551830225, "grad_norm": 0.0032554971985518932, "learning_rate": 0.001, "loss": 0.4156, "step": 9943 }, { "epoch": 0.2743768473840869, "grad_norm": 0.004142871592193842, "learning_rate": 0.001, "loss": 0.4189, "step": 9944 }, { "epoch": 0.27440443958515126, "grad_norm": 0.003104181494563818, "learning_rate": 0.001, "loss": 0.4042, "step": 9945 }, { "epoch": 0.2744320317862156, "grad_norm": 0.0025867957156151533, "learning_rate": 0.001, "loss": 0.4087, "step": 9946 }, { "epoch": 0.27445962398728, "grad_norm": 0.002821350237354636, "learning_rate": 0.001, "loss": 0.4176, "step": 9947 }, { "epoch": 0.2744872161883444, "grad_norm": 0.003803239669650793, "learning_rate": 0.001, "loss": 0.3958, "step": 9948 }, { "epoch": 0.2745148083894087, "grad_norm": 0.002869410440325737, "learning_rate": 0.001, "loss": 0.3895, "step": 9949 }, { "epoch": 0.2745424005904731, "grad_norm": 0.004910988733172417, "learning_rate": 0.001, "loss": 0.4023, "step": 9950 }, { "epoch": 0.2745699927915375, "grad_norm": 0.00325010740198195, "learning_rate": 0.001, "loss": 0.4318, "step": 9951 }, { "epoch": 0.27459758499260184, "grad_norm": 0.0021431315690279007, "learning_rate": 0.001, "loss": 0.4124, "step": 9952 }, { "epoch": 0.2746251771936662, "grad_norm": 0.003677117172628641, "learning_rate": 0.001, "loss": 0.4193, "step": 9953 }, { "epoch": 0.2746527693947306, "grad_norm": 0.002277594292536378, "learning_rate": 0.001, "loss": 0.4206, "step": 9954 }, { "epoch": 0.27468036159579495, "grad_norm": 0.004701568745076656, "learning_rate": 0.001, "loss": 0.3822, "step": 9955 }, { "epoch": 0.2747079537968593, "grad_norm": 0.002484973520040512, "learning_rate": 0.001, "loss": 0.3838, "step": 9956 }, { "epoch": 0.2747355459979237, "grad_norm": 0.0033851531334221363, "learning_rate": 0.001, "loss": 0.3915, "step": 9957 }, { "epoch": 0.27476313819898807, "grad_norm": 0.0025149055290967226, "learning_rate": 0.001, "loss": 0.4137, "step": 9958 }, { "epoch": 0.2747907304000524, "grad_norm": 0.0030154273845255375, "learning_rate": 0.001, "loss": 0.3872, "step": 9959 }, { "epoch": 0.27481832260111677, "grad_norm": 0.010692945681512356, "learning_rate": 0.001, "loss": 0.3861, "step": 9960 }, { "epoch": 0.2748459148021812, "grad_norm": 0.0041275848634541035, "learning_rate": 0.001, "loss": 0.3891, "step": 9961 }, { "epoch": 0.27487350700324553, "grad_norm": 0.0029004793614149094, "learning_rate": 0.001, "loss": 0.4236, "step": 9962 }, { "epoch": 0.2749010992043099, "grad_norm": 0.009717054665088654, "learning_rate": 0.001, "loss": 0.38, "step": 9963 }, { "epoch": 0.2749286914053743, "grad_norm": 0.00394978653639555, "learning_rate": 0.001, "loss": 0.3602, "step": 9964 }, { "epoch": 0.27495628360643865, "grad_norm": 0.0029732300899922848, "learning_rate": 0.001, "loss": 0.4, "step": 9965 }, { "epoch": 0.274983875807503, "grad_norm": 0.002955190371721983, "learning_rate": 0.001, "loss": 0.3871, "step": 9966 }, { "epoch": 0.2750114680085674, "grad_norm": 0.0038855448365211487, "learning_rate": 0.001, "loss": 0.3838, "step": 9967 }, { "epoch": 0.27503906020963176, "grad_norm": 0.002460125368088484, "learning_rate": 0.001, "loss": 0.4013, "step": 9968 }, { "epoch": 0.2750666524106961, "grad_norm": 0.002936316654086113, "learning_rate": 0.001, "loss": 0.3468, "step": 9969 }, { "epoch": 0.27509424461176046, "grad_norm": 0.003299242816865444, "learning_rate": 0.001, "loss": 0.4191, "step": 9970 }, { "epoch": 0.27512183681282487, "grad_norm": 0.0037559715565294027, "learning_rate": 0.001, "loss": 0.362, "step": 9971 }, { "epoch": 0.2751494290138892, "grad_norm": 0.0035624271258711815, "learning_rate": 0.001, "loss": 0.3944, "step": 9972 }, { "epoch": 0.2751770212149536, "grad_norm": 0.00262457481585443, "learning_rate": 0.001, "loss": 0.3985, "step": 9973 }, { "epoch": 0.275204613416018, "grad_norm": 0.0050546471029520035, "learning_rate": 0.001, "loss": 0.3629, "step": 9974 }, { "epoch": 0.27523220561708234, "grad_norm": 0.004298574756830931, "learning_rate": 0.001, "loss": 0.4059, "step": 9975 }, { "epoch": 0.2752597978181467, "grad_norm": 0.0042538209818303585, "learning_rate": 0.001, "loss": 0.402, "step": 9976 }, { "epoch": 0.27528739001921104, "grad_norm": 0.003947499208152294, "learning_rate": 0.001, "loss": 0.3789, "step": 9977 }, { "epoch": 0.27531498222027545, "grad_norm": 0.0038300196174532175, "learning_rate": 0.001, "loss": 0.3953, "step": 9978 }, { "epoch": 0.2753425744213398, "grad_norm": 0.005689695011824369, "learning_rate": 0.001, "loss": 0.3856, "step": 9979 }, { "epoch": 0.27537016662240416, "grad_norm": 0.0022208907175809145, "learning_rate": 0.001, "loss": 0.3766, "step": 9980 }, { "epoch": 0.27539775882346856, "grad_norm": 0.0022134091705083847, "learning_rate": 0.001, "loss": 0.3945, "step": 9981 }, { "epoch": 0.2754253510245329, "grad_norm": 0.0045527201145887375, "learning_rate": 0.001, "loss": 0.3707, "step": 9982 }, { "epoch": 0.27545294322559727, "grad_norm": 0.004349110182374716, "learning_rate": 0.001, "loss": 0.4218, "step": 9983 }, { "epoch": 0.2754805354266617, "grad_norm": 0.0028283181600272655, "learning_rate": 0.001, "loss": 0.4415, "step": 9984 }, { "epoch": 0.27550812762772603, "grad_norm": 0.0024474281817674637, "learning_rate": 0.001, "loss": 0.368, "step": 9985 }, { "epoch": 0.2755357198287904, "grad_norm": 0.0033072589430958033, "learning_rate": 0.001, "loss": 0.4011, "step": 9986 }, { "epoch": 0.27556331202985473, "grad_norm": 0.002837040927261114, "learning_rate": 0.001, "loss": 0.369, "step": 9987 }, { "epoch": 0.27559090423091914, "grad_norm": 0.014132903888821602, "learning_rate": 0.001, "loss": 0.3712, "step": 9988 }, { "epoch": 0.2756184964319835, "grad_norm": 0.002668624045327306, "learning_rate": 0.001, "loss": 0.3964, "step": 9989 }, { "epoch": 0.27564608863304785, "grad_norm": 0.003035599598661065, "learning_rate": 0.001, "loss": 0.3895, "step": 9990 }, { "epoch": 0.27567368083411226, "grad_norm": 0.0033727851696312428, "learning_rate": 0.001, "loss": 0.402, "step": 9991 }, { "epoch": 0.2757012730351766, "grad_norm": 0.0022625040728598833, "learning_rate": 0.001, "loss": 0.4287, "step": 9992 }, { "epoch": 0.27572886523624096, "grad_norm": 0.004006414674222469, "learning_rate": 0.001, "loss": 0.3875, "step": 9993 }, { "epoch": 0.27575645743730537, "grad_norm": 0.002454617992043495, "learning_rate": 0.001, "loss": 0.424, "step": 9994 }, { "epoch": 0.2757840496383697, "grad_norm": 0.003350407350808382, "learning_rate": 0.001, "loss": 0.4022, "step": 9995 }, { "epoch": 0.2758116418394341, "grad_norm": 0.004660541657358408, "learning_rate": 0.001, "loss": 0.3739, "step": 9996 }, { "epoch": 0.2758392340404984, "grad_norm": 0.002644259948283434, "learning_rate": 0.001, "loss": 0.4017, "step": 9997 }, { "epoch": 0.27586682624156283, "grad_norm": 0.004398273769766092, "learning_rate": 0.001, "loss": 0.3655, "step": 9998 }, { "epoch": 0.2758944184426272, "grad_norm": 0.013148190453648567, "learning_rate": 0.001, "loss": 0.3823, "step": 9999 }, { "epoch": 0.27592201064369154, "grad_norm": 0.002878269413486123, "learning_rate": 0.001, "loss": 0.3966, "step": 10000 }, { "epoch": 0.27592201064369154, "eval_runtime": 24.4086, "eval_samples_per_second": 1.311, "eval_steps_per_second": 0.164, "step": 10000 }, { "epoch": 0.27594960284475595, "grad_norm": 0.0022266616579145193, "learning_rate": 0.001, "loss": 0.4605, "step": 10001 }, { "epoch": 0.2759771950458203, "grad_norm": 0.0019374669063836336, "learning_rate": 0.001, "loss": 0.4417, "step": 10002 }, { "epoch": 0.27600478724688465, "grad_norm": 0.0026133570354431868, "learning_rate": 0.001, "loss": 0.408, "step": 10003 }, { "epoch": 0.27603237944794906, "grad_norm": 0.003653626423329115, "learning_rate": 0.001, "loss": 0.3964, "step": 10004 }, { "epoch": 0.2760599716490134, "grad_norm": 0.002634751843288541, "learning_rate": 0.001, "loss": 0.3812, "step": 10005 }, { "epoch": 0.27608756385007777, "grad_norm": 0.012561113573610783, "learning_rate": 0.001, "loss": 0.4003, "step": 10006 }, { "epoch": 0.2761151560511421, "grad_norm": 0.01040496677160263, "learning_rate": 0.001, "loss": 0.3894, "step": 10007 }, { "epoch": 0.2761427482522065, "grad_norm": 0.005907583516091108, "learning_rate": 0.001, "loss": 0.4064, "step": 10008 }, { "epoch": 0.2761703404532709, "grad_norm": 0.0033373169135302305, "learning_rate": 0.001, "loss": 0.3865, "step": 10009 }, { "epoch": 0.27619793265433523, "grad_norm": 0.003724615555256605, "learning_rate": 0.001, "loss": 0.3988, "step": 10010 }, { "epoch": 0.27622552485539964, "grad_norm": 0.0035398202016949654, "learning_rate": 0.001, "loss": 0.403, "step": 10011 }, { "epoch": 0.276253117056464, "grad_norm": 0.003467197297140956, "learning_rate": 0.001, "loss": 0.4124, "step": 10012 }, { "epoch": 0.27628070925752835, "grad_norm": 0.004464291967451572, "learning_rate": 0.001, "loss": 0.3839, "step": 10013 }, { "epoch": 0.27630830145859275, "grad_norm": 0.0036106444895267487, "learning_rate": 0.001, "loss": 0.3951, "step": 10014 }, { "epoch": 0.2763358936596571, "grad_norm": 0.004417051561176777, "learning_rate": 0.001, "loss": 0.3908, "step": 10015 }, { "epoch": 0.27636348586072146, "grad_norm": 0.0054782601073384285, "learning_rate": 0.001, "loss": 0.4039, "step": 10016 }, { "epoch": 0.2763910780617858, "grad_norm": 0.004640686791390181, "learning_rate": 0.001, "loss": 0.384, "step": 10017 }, { "epoch": 0.2764186702628502, "grad_norm": 0.005945026874542236, "learning_rate": 0.001, "loss": 0.3939, "step": 10018 }, { "epoch": 0.27644626246391457, "grad_norm": 0.002607600996270776, "learning_rate": 0.001, "loss": 0.3993, "step": 10019 }, { "epoch": 0.2764738546649789, "grad_norm": 0.0032611433416604996, "learning_rate": 0.001, "loss": 0.4207, "step": 10020 }, { "epoch": 0.27650144686604333, "grad_norm": 0.0037529822438955307, "learning_rate": 0.001, "loss": 0.372, "step": 10021 }, { "epoch": 0.2765290390671077, "grad_norm": 0.002805948257446289, "learning_rate": 0.001, "loss": 0.4327, "step": 10022 }, { "epoch": 0.27655663126817204, "grad_norm": 0.003274905029684305, "learning_rate": 0.001, "loss": 0.4127, "step": 10023 }, { "epoch": 0.27658422346923645, "grad_norm": 0.00208392646163702, "learning_rate": 0.001, "loss": 0.4326, "step": 10024 }, { "epoch": 0.2766118156703008, "grad_norm": 0.007121518719941378, "learning_rate": 0.001, "loss": 0.3495, "step": 10025 }, { "epoch": 0.27663940787136515, "grad_norm": 0.0040301973931491375, "learning_rate": 0.001, "loss": 0.4095, "step": 10026 }, { "epoch": 0.2766670000724295, "grad_norm": 0.0036494045052677393, "learning_rate": 0.001, "loss": 0.3918, "step": 10027 }, { "epoch": 0.2766945922734939, "grad_norm": 0.0036077816039323807, "learning_rate": 0.001, "loss": 0.3922, "step": 10028 }, { "epoch": 0.27672218447455826, "grad_norm": 0.010453345254063606, "learning_rate": 0.001, "loss": 0.3961, "step": 10029 }, { "epoch": 0.2767497766756226, "grad_norm": 0.004115840420126915, "learning_rate": 0.001, "loss": 0.4289, "step": 10030 }, { "epoch": 0.276777368876687, "grad_norm": 0.0033005299046635628, "learning_rate": 0.001, "loss": 0.4526, "step": 10031 }, { "epoch": 0.2768049610777514, "grad_norm": 0.010283264331519604, "learning_rate": 0.001, "loss": 0.3695, "step": 10032 }, { "epoch": 0.27683255327881573, "grad_norm": 0.0024423354770988226, "learning_rate": 0.001, "loss": 0.4325, "step": 10033 }, { "epoch": 0.27686014547988014, "grad_norm": 0.002194769913330674, "learning_rate": 0.001, "loss": 0.3853, "step": 10034 }, { "epoch": 0.2768877376809445, "grad_norm": 0.00277558621019125, "learning_rate": 0.001, "loss": 0.407, "step": 10035 }, { "epoch": 0.27691532988200884, "grad_norm": 0.0023043700493872166, "learning_rate": 0.001, "loss": 0.4007, "step": 10036 }, { "epoch": 0.2769429220830732, "grad_norm": 0.0019729414489120245, "learning_rate": 0.001, "loss": 0.3824, "step": 10037 }, { "epoch": 0.2769705142841376, "grad_norm": 0.0028671245090663433, "learning_rate": 0.001, "loss": 0.407, "step": 10038 }, { "epoch": 0.27699810648520196, "grad_norm": 0.0039373342879116535, "learning_rate": 0.001, "loss": 0.4104, "step": 10039 }, { "epoch": 0.2770256986862663, "grad_norm": 0.009182474575936794, "learning_rate": 0.001, "loss": 0.3806, "step": 10040 }, { "epoch": 0.2770532908873307, "grad_norm": 0.0029172920621931553, "learning_rate": 0.001, "loss": 0.3708, "step": 10041 }, { "epoch": 0.27708088308839507, "grad_norm": 0.0023737072478979826, "learning_rate": 0.001, "loss": 0.4056, "step": 10042 }, { "epoch": 0.2771084752894594, "grad_norm": 0.002812950173392892, "learning_rate": 0.001, "loss": 0.3643, "step": 10043 }, { "epoch": 0.27713606749052383, "grad_norm": 0.002222485141828656, "learning_rate": 0.001, "loss": 0.4092, "step": 10044 }, { "epoch": 0.2771636596915882, "grad_norm": 0.0037747088354080915, "learning_rate": 0.001, "loss": 0.4252, "step": 10045 }, { "epoch": 0.27719125189265253, "grad_norm": 0.00997960101813078, "learning_rate": 0.001, "loss": 0.3705, "step": 10046 }, { "epoch": 0.2772188440937169, "grad_norm": 0.0026916342321783304, "learning_rate": 0.001, "loss": 0.4323, "step": 10047 }, { "epoch": 0.2772464362947813, "grad_norm": 0.002582460641860962, "learning_rate": 0.001, "loss": 0.3911, "step": 10048 }, { "epoch": 0.27727402849584565, "grad_norm": 0.0027102259919047356, "learning_rate": 0.001, "loss": 0.4339, "step": 10049 }, { "epoch": 0.27730162069691, "grad_norm": 0.0034718411043286324, "learning_rate": 0.001, "loss": 0.4187, "step": 10050 }, { "epoch": 0.2773292128979744, "grad_norm": 0.0027413431089371443, "learning_rate": 0.001, "loss": 0.4083, "step": 10051 }, { "epoch": 0.27735680509903876, "grad_norm": 0.003133729798719287, "learning_rate": 0.001, "loss": 0.4117, "step": 10052 }, { "epoch": 0.2773843973001031, "grad_norm": 0.002180175157263875, "learning_rate": 0.001, "loss": 0.4286, "step": 10053 }, { "epoch": 0.2774119895011675, "grad_norm": 0.0031354373786598444, "learning_rate": 0.001, "loss": 0.412, "step": 10054 }, { "epoch": 0.2774395817022319, "grad_norm": 0.0018822277197614312, "learning_rate": 0.001, "loss": 0.3981, "step": 10055 }, { "epoch": 0.2774671739032962, "grad_norm": 0.013822532258927822, "learning_rate": 0.001, "loss": 0.4179, "step": 10056 }, { "epoch": 0.2774947661043606, "grad_norm": 0.0037403854075819254, "learning_rate": 0.001, "loss": 0.3872, "step": 10057 }, { "epoch": 0.277522358305425, "grad_norm": 0.005285004619508982, "learning_rate": 0.001, "loss": 0.3919, "step": 10058 }, { "epoch": 0.27754995050648934, "grad_norm": 0.0025517537724226713, "learning_rate": 0.001, "loss": 0.4253, "step": 10059 }, { "epoch": 0.2775775427075537, "grad_norm": 0.005149201024323702, "learning_rate": 0.001, "loss": 0.3967, "step": 10060 }, { "epoch": 0.2776051349086181, "grad_norm": 0.012182818725705147, "learning_rate": 0.001, "loss": 0.3998, "step": 10061 }, { "epoch": 0.27763272710968245, "grad_norm": 0.003710835939273238, "learning_rate": 0.001, "loss": 0.3726, "step": 10062 }, { "epoch": 0.2776603193107468, "grad_norm": 0.005408014170825481, "learning_rate": 0.001, "loss": 0.4152, "step": 10063 }, { "epoch": 0.2776879115118112, "grad_norm": 0.0025339778512716293, "learning_rate": 0.001, "loss": 0.3499, "step": 10064 }, { "epoch": 0.27771550371287557, "grad_norm": 0.0028167536947876215, "learning_rate": 0.001, "loss": 0.379, "step": 10065 }, { "epoch": 0.2777430959139399, "grad_norm": 0.0029440028592944145, "learning_rate": 0.001, "loss": 0.4124, "step": 10066 }, { "epoch": 0.27777068811500427, "grad_norm": 0.0024950318038463593, "learning_rate": 0.001, "loss": 0.3861, "step": 10067 }, { "epoch": 0.2777982803160687, "grad_norm": 0.002304122317582369, "learning_rate": 0.001, "loss": 0.4027, "step": 10068 }, { "epoch": 0.27782587251713303, "grad_norm": 0.003218509955331683, "learning_rate": 0.001, "loss": 0.407, "step": 10069 }, { "epoch": 0.2778534647181974, "grad_norm": 0.003081993665546179, "learning_rate": 0.001, "loss": 0.4053, "step": 10070 }, { "epoch": 0.2778810569192618, "grad_norm": 0.003274043556302786, "learning_rate": 0.001, "loss": 0.3837, "step": 10071 }, { "epoch": 0.27790864912032615, "grad_norm": 0.011977877467870712, "learning_rate": 0.001, "loss": 0.4057, "step": 10072 }, { "epoch": 0.2779362413213905, "grad_norm": 0.004248856566846371, "learning_rate": 0.001, "loss": 0.4282, "step": 10073 }, { "epoch": 0.27796383352245485, "grad_norm": 0.0030563895124942064, "learning_rate": 0.001, "loss": 0.3861, "step": 10074 }, { "epoch": 0.27799142572351926, "grad_norm": 0.0027819809038192034, "learning_rate": 0.001, "loss": 0.3906, "step": 10075 }, { "epoch": 0.2780190179245836, "grad_norm": 0.003203164553269744, "learning_rate": 0.001, "loss": 0.4168, "step": 10076 }, { "epoch": 0.27804661012564796, "grad_norm": 0.003680006368085742, "learning_rate": 0.001, "loss": 0.3929, "step": 10077 }, { "epoch": 0.27807420232671237, "grad_norm": 0.0054307919926941395, "learning_rate": 0.001, "loss": 0.4098, "step": 10078 }, { "epoch": 0.2781017945277767, "grad_norm": 0.00977605115622282, "learning_rate": 0.001, "loss": 0.4088, "step": 10079 }, { "epoch": 0.2781293867288411, "grad_norm": 0.003044104902073741, "learning_rate": 0.001, "loss": 0.4212, "step": 10080 }, { "epoch": 0.2781569789299055, "grad_norm": 0.009494941681623459, "learning_rate": 0.001, "loss": 0.3936, "step": 10081 }, { "epoch": 0.27818457113096984, "grad_norm": 0.04275614395737648, "learning_rate": 0.001, "loss": 0.3941, "step": 10082 }, { "epoch": 0.2782121633320342, "grad_norm": 0.0026185614988207817, "learning_rate": 0.001, "loss": 0.4145, "step": 10083 }, { "epoch": 0.27823975553309854, "grad_norm": 0.003999069333076477, "learning_rate": 0.001, "loss": 0.4262, "step": 10084 }, { "epoch": 0.27826734773416295, "grad_norm": 0.00245444243773818, "learning_rate": 0.001, "loss": 0.386, "step": 10085 }, { "epoch": 0.2782949399352273, "grad_norm": 0.00522113312035799, "learning_rate": 0.001, "loss": 0.4011, "step": 10086 }, { "epoch": 0.27832253213629166, "grad_norm": 0.0037140315398573875, "learning_rate": 0.001, "loss": 0.3612, "step": 10087 }, { "epoch": 0.27835012433735606, "grad_norm": 0.0034693137276917696, "learning_rate": 0.001, "loss": 0.4073, "step": 10088 }, { "epoch": 0.2783777165384204, "grad_norm": 0.00494216475635767, "learning_rate": 0.001, "loss": 0.3911, "step": 10089 }, { "epoch": 0.27840530873948477, "grad_norm": 0.0024820046965032816, "learning_rate": 0.001, "loss": 0.4066, "step": 10090 }, { "epoch": 0.2784329009405492, "grad_norm": 0.0024154249113053083, "learning_rate": 0.001, "loss": 0.3744, "step": 10091 }, { "epoch": 0.27846049314161353, "grad_norm": 0.0025213605258613825, "learning_rate": 0.001, "loss": 0.3724, "step": 10092 }, { "epoch": 0.2784880853426779, "grad_norm": 0.003560342825949192, "learning_rate": 0.001, "loss": 0.4064, "step": 10093 }, { "epoch": 0.27851567754374223, "grad_norm": 0.0023579909466207027, "learning_rate": 0.001, "loss": 0.3771, "step": 10094 }, { "epoch": 0.27854326974480664, "grad_norm": 0.0032963070552796125, "learning_rate": 0.001, "loss": 0.3797, "step": 10095 }, { "epoch": 0.278570861945871, "grad_norm": 0.0023323865607380867, "learning_rate": 0.001, "loss": 0.4111, "step": 10096 }, { "epoch": 0.27859845414693535, "grad_norm": 0.0035246131010353565, "learning_rate": 0.001, "loss": 0.3901, "step": 10097 }, { "epoch": 0.27862604634799976, "grad_norm": 0.004129378125071526, "learning_rate": 0.001, "loss": 0.4039, "step": 10098 }, { "epoch": 0.2786536385490641, "grad_norm": 0.0030813373159617186, "learning_rate": 0.001, "loss": 0.3724, "step": 10099 }, { "epoch": 0.27868123075012846, "grad_norm": 0.002692109439522028, "learning_rate": 0.001, "loss": 0.4205, "step": 10100 }, { "epoch": 0.27870882295119287, "grad_norm": 0.0024737734347581863, "learning_rate": 0.001, "loss": 0.4175, "step": 10101 }, { "epoch": 0.2787364151522572, "grad_norm": 0.0027061791624873877, "learning_rate": 0.001, "loss": 0.3762, "step": 10102 }, { "epoch": 0.2787640073533216, "grad_norm": 0.002315426943823695, "learning_rate": 0.001, "loss": 0.3813, "step": 10103 }, { "epoch": 0.2787915995543859, "grad_norm": 0.005130481906235218, "learning_rate": 0.001, "loss": 0.3783, "step": 10104 }, { "epoch": 0.27881919175545034, "grad_norm": 0.006558484397828579, "learning_rate": 0.001, "loss": 0.4091, "step": 10105 }, { "epoch": 0.2788467839565147, "grad_norm": 0.004167409613728523, "learning_rate": 0.001, "loss": 0.3981, "step": 10106 }, { "epoch": 0.27887437615757904, "grad_norm": 0.0057708099484443665, "learning_rate": 0.001, "loss": 0.3973, "step": 10107 }, { "epoch": 0.27890196835864345, "grad_norm": 0.0028495537117123604, "learning_rate": 0.001, "loss": 0.3616, "step": 10108 }, { "epoch": 0.2789295605597078, "grad_norm": 0.002703807782381773, "learning_rate": 0.001, "loss": 0.4447, "step": 10109 }, { "epoch": 0.27895715276077215, "grad_norm": 0.0030382112599909306, "learning_rate": 0.001, "loss": 0.4137, "step": 10110 }, { "epoch": 0.27898474496183656, "grad_norm": 0.0030904843006283045, "learning_rate": 0.001, "loss": 0.4234, "step": 10111 }, { "epoch": 0.2790123371629009, "grad_norm": 0.003971985075622797, "learning_rate": 0.001, "loss": 0.4184, "step": 10112 }, { "epoch": 0.27903992936396527, "grad_norm": 0.002467118203639984, "learning_rate": 0.001, "loss": 0.4372, "step": 10113 }, { "epoch": 0.2790675215650296, "grad_norm": 0.006953855976462364, "learning_rate": 0.001, "loss": 0.4285, "step": 10114 }, { "epoch": 0.279095113766094, "grad_norm": 0.0055779386311769485, "learning_rate": 0.001, "loss": 0.3893, "step": 10115 }, { "epoch": 0.2791227059671584, "grad_norm": 0.004655706230551004, "learning_rate": 0.001, "loss": 0.4083, "step": 10116 }, { "epoch": 0.27915029816822273, "grad_norm": 0.004876443184912205, "learning_rate": 0.001, "loss": 0.3755, "step": 10117 }, { "epoch": 0.27917789036928714, "grad_norm": 0.004932073410600424, "learning_rate": 0.001, "loss": 0.3852, "step": 10118 }, { "epoch": 0.2792054825703515, "grad_norm": 0.007771969772875309, "learning_rate": 0.001, "loss": 0.4056, "step": 10119 }, { "epoch": 0.27923307477141585, "grad_norm": 0.005660747177898884, "learning_rate": 0.001, "loss": 0.3796, "step": 10120 }, { "epoch": 0.27926066697248025, "grad_norm": 0.0034438660368323326, "learning_rate": 0.001, "loss": 0.4038, "step": 10121 }, { "epoch": 0.2792882591735446, "grad_norm": 0.003013996873050928, "learning_rate": 0.001, "loss": 0.444, "step": 10122 }, { "epoch": 0.27931585137460896, "grad_norm": 0.0033822916448116302, "learning_rate": 0.001, "loss": 0.3583, "step": 10123 }, { "epoch": 0.2793434435756733, "grad_norm": 0.0029740110039711, "learning_rate": 0.001, "loss": 0.3905, "step": 10124 }, { "epoch": 0.2793710357767377, "grad_norm": 0.005846134852617979, "learning_rate": 0.001, "loss": 0.3874, "step": 10125 }, { "epoch": 0.27939862797780207, "grad_norm": 0.0038693509995937347, "learning_rate": 0.001, "loss": 0.4188, "step": 10126 }, { "epoch": 0.2794262201788664, "grad_norm": 0.023018015548586845, "learning_rate": 0.001, "loss": 0.4024, "step": 10127 }, { "epoch": 0.27945381237993083, "grad_norm": 0.004444723483175039, "learning_rate": 0.001, "loss": 0.3736, "step": 10128 }, { "epoch": 0.2794814045809952, "grad_norm": 0.00677674962207675, "learning_rate": 0.001, "loss": 0.3877, "step": 10129 }, { "epoch": 0.27950899678205954, "grad_norm": 0.004375552758574486, "learning_rate": 0.001, "loss": 0.4132, "step": 10130 }, { "epoch": 0.27953658898312395, "grad_norm": 0.003335924819111824, "learning_rate": 0.001, "loss": 0.3811, "step": 10131 }, { "epoch": 0.2795641811841883, "grad_norm": 0.0037330735940486193, "learning_rate": 0.001, "loss": 0.4207, "step": 10132 }, { "epoch": 0.27959177338525265, "grad_norm": 0.004045455250889063, "learning_rate": 0.001, "loss": 0.3541, "step": 10133 }, { "epoch": 0.279619365586317, "grad_norm": 0.004944518208503723, "learning_rate": 0.001, "loss": 0.3924, "step": 10134 }, { "epoch": 0.2796469577873814, "grad_norm": 0.003303113393485546, "learning_rate": 0.001, "loss": 0.4458, "step": 10135 }, { "epoch": 0.27967454998844576, "grad_norm": 0.002501634182408452, "learning_rate": 0.001, "loss": 0.4063, "step": 10136 }, { "epoch": 0.2797021421895101, "grad_norm": 0.008220963180065155, "learning_rate": 0.001, "loss": 0.3977, "step": 10137 }, { "epoch": 0.2797297343905745, "grad_norm": 0.015138577669858932, "learning_rate": 0.001, "loss": 0.4192, "step": 10138 }, { "epoch": 0.2797573265916389, "grad_norm": 0.00455261766910553, "learning_rate": 0.001, "loss": 0.377, "step": 10139 }, { "epoch": 0.27978491879270323, "grad_norm": 0.003458748571574688, "learning_rate": 0.001, "loss": 0.4064, "step": 10140 }, { "epoch": 0.27981251099376764, "grad_norm": 0.0061928508803248405, "learning_rate": 0.001, "loss": 0.4298, "step": 10141 }, { "epoch": 0.279840103194832, "grad_norm": 0.004049024078994989, "learning_rate": 0.001, "loss": 0.3936, "step": 10142 }, { "epoch": 0.27986769539589634, "grad_norm": 0.0035144255962222815, "learning_rate": 0.001, "loss": 0.404, "step": 10143 }, { "epoch": 0.2798952875969607, "grad_norm": 0.004885702393949032, "learning_rate": 0.001, "loss": 0.3764, "step": 10144 }, { "epoch": 0.2799228797980251, "grad_norm": 0.004949494265019894, "learning_rate": 0.001, "loss": 0.3898, "step": 10145 }, { "epoch": 0.27995047199908946, "grad_norm": 0.003847777610644698, "learning_rate": 0.001, "loss": 0.4003, "step": 10146 }, { "epoch": 0.2799780642001538, "grad_norm": 0.0029670281801372766, "learning_rate": 0.001, "loss": 0.388, "step": 10147 }, { "epoch": 0.2800056564012182, "grad_norm": 0.0038127326406538486, "learning_rate": 0.001, "loss": 0.3912, "step": 10148 }, { "epoch": 0.28003324860228257, "grad_norm": 0.002967024687677622, "learning_rate": 0.001, "loss": 0.3976, "step": 10149 }, { "epoch": 0.2800608408033469, "grad_norm": 0.004500363487750292, "learning_rate": 0.001, "loss": 0.4126, "step": 10150 }, { "epoch": 0.28008843300441133, "grad_norm": 0.0036577654536813498, "learning_rate": 0.001, "loss": 0.3864, "step": 10151 }, { "epoch": 0.2801160252054757, "grad_norm": 0.00338082667440176, "learning_rate": 0.001, "loss": 0.4216, "step": 10152 }, { "epoch": 0.28014361740654004, "grad_norm": 0.004477594047784805, "learning_rate": 0.001, "loss": 0.3779, "step": 10153 }, { "epoch": 0.2801712096076044, "grad_norm": 0.005331623367965221, "learning_rate": 0.001, "loss": 0.3674, "step": 10154 }, { "epoch": 0.2801988018086688, "grad_norm": 0.0034174472093582153, "learning_rate": 0.001, "loss": 0.3847, "step": 10155 }, { "epoch": 0.28022639400973315, "grad_norm": 0.004699235316365957, "learning_rate": 0.001, "loss": 0.3842, "step": 10156 }, { "epoch": 0.2802539862107975, "grad_norm": 0.0027989267837256193, "learning_rate": 0.001, "loss": 0.3757, "step": 10157 }, { "epoch": 0.2802815784118619, "grad_norm": 0.003362043295055628, "learning_rate": 0.001, "loss": 0.3973, "step": 10158 }, { "epoch": 0.28030917061292626, "grad_norm": 0.0024922748561948538, "learning_rate": 0.001, "loss": 0.4422, "step": 10159 }, { "epoch": 0.2803367628139906, "grad_norm": 0.0043379804119467735, "learning_rate": 0.001, "loss": 0.391, "step": 10160 }, { "epoch": 0.280364355015055, "grad_norm": 0.0037757758982479572, "learning_rate": 0.001, "loss": 0.4115, "step": 10161 }, { "epoch": 0.2803919472161194, "grad_norm": 0.003047212492674589, "learning_rate": 0.001, "loss": 0.4139, "step": 10162 }, { "epoch": 0.2804195394171837, "grad_norm": 0.0027652375865727663, "learning_rate": 0.001, "loss": 0.3968, "step": 10163 }, { "epoch": 0.2804471316182481, "grad_norm": 0.0034420695155858994, "learning_rate": 0.001, "loss": 0.3819, "step": 10164 }, { "epoch": 0.2804747238193125, "grad_norm": 0.0024248689878731966, "learning_rate": 0.001, "loss": 0.4071, "step": 10165 }, { "epoch": 0.28050231602037684, "grad_norm": 0.0045005762949585915, "learning_rate": 0.001, "loss": 0.3774, "step": 10166 }, { "epoch": 0.2805299082214412, "grad_norm": 0.002247242256999016, "learning_rate": 0.001, "loss": 0.409, "step": 10167 }, { "epoch": 0.2805575004225056, "grad_norm": 0.004416101146489382, "learning_rate": 0.001, "loss": 0.4311, "step": 10168 }, { "epoch": 0.28058509262356995, "grad_norm": 0.003067211015149951, "learning_rate": 0.001, "loss": 0.3713, "step": 10169 }, { "epoch": 0.2806126848246343, "grad_norm": 0.0030722361989319324, "learning_rate": 0.001, "loss": 0.446, "step": 10170 }, { "epoch": 0.28064027702569866, "grad_norm": 0.003344991710036993, "learning_rate": 0.001, "loss": 0.3825, "step": 10171 }, { "epoch": 0.28066786922676307, "grad_norm": 0.0028220931999385357, "learning_rate": 0.001, "loss": 0.4381, "step": 10172 }, { "epoch": 0.2806954614278274, "grad_norm": 0.0032366979867219925, "learning_rate": 0.001, "loss": 0.3769, "step": 10173 }, { "epoch": 0.28072305362889177, "grad_norm": 0.004435013514012098, "learning_rate": 0.001, "loss": 0.41, "step": 10174 }, { "epoch": 0.2807506458299562, "grad_norm": 0.002561743138357997, "learning_rate": 0.001, "loss": 0.4298, "step": 10175 }, { "epoch": 0.28077823803102053, "grad_norm": 0.0032213281374424696, "learning_rate": 0.001, "loss": 0.4292, "step": 10176 }, { "epoch": 0.2808058302320849, "grad_norm": 0.0033837456721812487, "learning_rate": 0.001, "loss": 0.4082, "step": 10177 }, { "epoch": 0.2808334224331493, "grad_norm": 0.0026573874056339264, "learning_rate": 0.001, "loss": 0.4523, "step": 10178 }, { "epoch": 0.28086101463421365, "grad_norm": 0.004164142068475485, "learning_rate": 0.001, "loss": 0.3699, "step": 10179 }, { "epoch": 0.280888606835278, "grad_norm": 0.005075597669929266, "learning_rate": 0.001, "loss": 0.3859, "step": 10180 }, { "epoch": 0.28091619903634235, "grad_norm": 0.007850431837141514, "learning_rate": 0.001, "loss": 0.3754, "step": 10181 }, { "epoch": 0.28094379123740676, "grad_norm": 0.0037918195594102144, "learning_rate": 0.001, "loss": 0.4183, "step": 10182 }, { "epoch": 0.2809713834384711, "grad_norm": 0.004340451210737228, "learning_rate": 0.001, "loss": 0.399, "step": 10183 }, { "epoch": 0.28099897563953546, "grad_norm": 0.003013703739270568, "learning_rate": 0.001, "loss": 0.4117, "step": 10184 }, { "epoch": 0.2810265678405999, "grad_norm": 0.004570307210087776, "learning_rate": 0.001, "loss": 0.3913, "step": 10185 }, { "epoch": 0.2810541600416642, "grad_norm": 0.0028304762672632933, "learning_rate": 0.001, "loss": 0.3712, "step": 10186 }, { "epoch": 0.2810817522427286, "grad_norm": 0.003872218308970332, "learning_rate": 0.001, "loss": 0.3755, "step": 10187 }, { "epoch": 0.281109344443793, "grad_norm": 0.003236171556636691, "learning_rate": 0.001, "loss": 0.4066, "step": 10188 }, { "epoch": 0.28113693664485734, "grad_norm": 0.00254503614269197, "learning_rate": 0.001, "loss": 0.3824, "step": 10189 }, { "epoch": 0.2811645288459217, "grad_norm": 0.0019429237581789494, "learning_rate": 0.001, "loss": 0.3883, "step": 10190 }, { "epoch": 0.28119212104698604, "grad_norm": 0.003968310542404652, "learning_rate": 0.001, "loss": 0.4173, "step": 10191 }, { "epoch": 0.28121971324805045, "grad_norm": 0.0034456211142241955, "learning_rate": 0.001, "loss": 0.4078, "step": 10192 }, { "epoch": 0.2812473054491148, "grad_norm": 0.00856733787804842, "learning_rate": 0.001, "loss": 0.3771, "step": 10193 }, { "epoch": 0.28127489765017916, "grad_norm": 0.00205041142180562, "learning_rate": 0.001, "loss": 0.3974, "step": 10194 }, { "epoch": 0.28130248985124356, "grad_norm": 0.0054842522367835045, "learning_rate": 0.001, "loss": 0.3786, "step": 10195 }, { "epoch": 0.2813300820523079, "grad_norm": 0.0026425907853990793, "learning_rate": 0.001, "loss": 0.415, "step": 10196 }, { "epoch": 0.28135767425337227, "grad_norm": 0.0030602633487433195, "learning_rate": 0.001, "loss": 0.4006, "step": 10197 }, { "epoch": 0.2813852664544367, "grad_norm": 0.00611920328810811, "learning_rate": 0.001, "loss": 0.3823, "step": 10198 }, { "epoch": 0.28141285865550103, "grad_norm": 0.0028095582965761423, "learning_rate": 0.001, "loss": 0.4303, "step": 10199 }, { "epoch": 0.2814404508565654, "grad_norm": 0.002753883833065629, "learning_rate": 0.001, "loss": 0.408, "step": 10200 }, { "epoch": 0.28146804305762974, "grad_norm": 0.0056452443823218346, "learning_rate": 0.001, "loss": 0.3858, "step": 10201 }, { "epoch": 0.28149563525869414, "grad_norm": 0.004540273919701576, "learning_rate": 0.001, "loss": 0.4322, "step": 10202 }, { "epoch": 0.2815232274597585, "grad_norm": 0.0029490333981812, "learning_rate": 0.001, "loss": 0.4101, "step": 10203 }, { "epoch": 0.28155081966082285, "grad_norm": 0.0022447453811764717, "learning_rate": 0.001, "loss": 0.4199, "step": 10204 }, { "epoch": 0.28157841186188726, "grad_norm": 0.004265769850462675, "learning_rate": 0.001, "loss": 0.4092, "step": 10205 }, { "epoch": 0.2816060040629516, "grad_norm": 0.007797228638082743, "learning_rate": 0.001, "loss": 0.3928, "step": 10206 }, { "epoch": 0.28163359626401596, "grad_norm": 0.0029594229999929667, "learning_rate": 0.001, "loss": 0.3981, "step": 10207 }, { "epoch": 0.28166118846508037, "grad_norm": 0.006314094644039869, "learning_rate": 0.001, "loss": 0.3699, "step": 10208 }, { "epoch": 0.2816887806661447, "grad_norm": 0.002639149548485875, "learning_rate": 0.001, "loss": 0.4034, "step": 10209 }, { "epoch": 0.2817163728672091, "grad_norm": 0.0027635907754302025, "learning_rate": 0.001, "loss": 0.4056, "step": 10210 }, { "epoch": 0.2817439650682734, "grad_norm": 0.006702120881527662, "learning_rate": 0.001, "loss": 0.3668, "step": 10211 }, { "epoch": 0.28177155726933784, "grad_norm": 0.008459938690066338, "learning_rate": 0.001, "loss": 0.4373, "step": 10212 }, { "epoch": 0.2817991494704022, "grad_norm": 0.005659305490553379, "learning_rate": 0.001, "loss": 0.3908, "step": 10213 }, { "epoch": 0.28182674167146654, "grad_norm": 0.002557772444561124, "learning_rate": 0.001, "loss": 0.4227, "step": 10214 }, { "epoch": 0.28185433387253095, "grad_norm": 0.0043907626532018185, "learning_rate": 0.001, "loss": 0.3726, "step": 10215 }, { "epoch": 0.2818819260735953, "grad_norm": 0.004590220283716917, "learning_rate": 0.001, "loss": 0.4098, "step": 10216 }, { "epoch": 0.28190951827465965, "grad_norm": 0.05343657732009888, "learning_rate": 0.001, "loss": 0.4197, "step": 10217 }, { "epoch": 0.28193711047572406, "grad_norm": 0.008130939677357674, "learning_rate": 0.001, "loss": 0.3902, "step": 10218 }, { "epoch": 0.2819647026767884, "grad_norm": 0.002707968931645155, "learning_rate": 0.001, "loss": 0.3788, "step": 10219 }, { "epoch": 0.28199229487785277, "grad_norm": 0.002904132939875126, "learning_rate": 0.001, "loss": 0.3735, "step": 10220 }, { "epoch": 0.2820198870789171, "grad_norm": 0.002991893794387579, "learning_rate": 0.001, "loss": 0.419, "step": 10221 }, { "epoch": 0.28204747927998153, "grad_norm": 0.0030220167245715857, "learning_rate": 0.001, "loss": 0.3878, "step": 10222 }, { "epoch": 0.2820750714810459, "grad_norm": 0.002117662690579891, "learning_rate": 0.001, "loss": 0.4007, "step": 10223 }, { "epoch": 0.28210266368211023, "grad_norm": 0.003019734052941203, "learning_rate": 0.001, "loss": 0.3892, "step": 10224 }, { "epoch": 0.28213025588317464, "grad_norm": 0.003011964727193117, "learning_rate": 0.001, "loss": 0.3953, "step": 10225 }, { "epoch": 0.282157848084239, "grad_norm": 0.0034174530301243067, "learning_rate": 0.001, "loss": 0.4179, "step": 10226 }, { "epoch": 0.28218544028530335, "grad_norm": 0.0024363279808312654, "learning_rate": 0.001, "loss": 0.3984, "step": 10227 }, { "epoch": 0.28221303248636775, "grad_norm": 0.002497728681191802, "learning_rate": 0.001, "loss": 0.4095, "step": 10228 }, { "epoch": 0.2822406246874321, "grad_norm": 0.0034284652210772038, "learning_rate": 0.001, "loss": 0.3882, "step": 10229 }, { "epoch": 0.28226821688849646, "grad_norm": 0.002253099577501416, "learning_rate": 0.001, "loss": 0.3991, "step": 10230 }, { "epoch": 0.2822958090895608, "grad_norm": 0.003588828956708312, "learning_rate": 0.001, "loss": 0.3769, "step": 10231 }, { "epoch": 0.2823234012906252, "grad_norm": 0.0027390895411372185, "learning_rate": 0.001, "loss": 0.3883, "step": 10232 }, { "epoch": 0.2823509934916896, "grad_norm": 0.002828077645972371, "learning_rate": 0.001, "loss": 0.3915, "step": 10233 }, { "epoch": 0.2823785856927539, "grad_norm": 0.006078117527067661, "learning_rate": 0.001, "loss": 0.3746, "step": 10234 }, { "epoch": 0.28240617789381833, "grad_norm": 0.004316994454711676, "learning_rate": 0.001, "loss": 0.3866, "step": 10235 }, { "epoch": 0.2824337700948827, "grad_norm": 0.0036562427412718534, "learning_rate": 0.001, "loss": 0.4069, "step": 10236 }, { "epoch": 0.28246136229594704, "grad_norm": 0.004819520283490419, "learning_rate": 0.001, "loss": 0.3939, "step": 10237 }, { "epoch": 0.28248895449701145, "grad_norm": 0.0036955669056624174, "learning_rate": 0.001, "loss": 0.376, "step": 10238 }, { "epoch": 0.2825165466980758, "grad_norm": 0.004412720445543528, "learning_rate": 0.001, "loss": 0.4036, "step": 10239 }, { "epoch": 0.28254413889914015, "grad_norm": 0.0021497290581464767, "learning_rate": 0.001, "loss": 0.4165, "step": 10240 }, { "epoch": 0.2825717311002045, "grad_norm": 0.003762908047065139, "learning_rate": 0.001, "loss": 0.3871, "step": 10241 }, { "epoch": 0.2825993233012689, "grad_norm": 0.0028142263181507587, "learning_rate": 0.001, "loss": 0.3492, "step": 10242 }, { "epoch": 0.28262691550233326, "grad_norm": 0.002374440897256136, "learning_rate": 0.001, "loss": 0.4136, "step": 10243 }, { "epoch": 0.2826545077033976, "grad_norm": 0.002582703484222293, "learning_rate": 0.001, "loss": 0.3865, "step": 10244 }, { "epoch": 0.282682099904462, "grad_norm": 0.003294025780633092, "learning_rate": 0.001, "loss": 0.4019, "step": 10245 }, { "epoch": 0.2827096921055264, "grad_norm": 0.0025567803531885147, "learning_rate": 0.001, "loss": 0.4282, "step": 10246 }, { "epoch": 0.28273728430659073, "grad_norm": 0.002444926183670759, "learning_rate": 0.001, "loss": 0.4432, "step": 10247 }, { "epoch": 0.28276487650765514, "grad_norm": 0.002646154025569558, "learning_rate": 0.001, "loss": 0.404, "step": 10248 }, { "epoch": 0.2827924687087195, "grad_norm": 0.003058774396777153, "learning_rate": 0.001, "loss": 0.4082, "step": 10249 }, { "epoch": 0.28282006090978384, "grad_norm": 0.004560539964586496, "learning_rate": 0.001, "loss": 0.3797, "step": 10250 }, { "epoch": 0.2828476531108482, "grad_norm": 0.005327416118234396, "learning_rate": 0.001, "loss": 0.3994, "step": 10251 }, { "epoch": 0.2828752453119126, "grad_norm": 0.002578763058409095, "learning_rate": 0.001, "loss": 0.3966, "step": 10252 }, { "epoch": 0.28290283751297696, "grad_norm": 0.0030044415034353733, "learning_rate": 0.001, "loss": 0.4102, "step": 10253 }, { "epoch": 0.2829304297140413, "grad_norm": 0.002438440453261137, "learning_rate": 0.001, "loss": 0.4276, "step": 10254 }, { "epoch": 0.2829580219151057, "grad_norm": 0.002474286826327443, "learning_rate": 0.001, "loss": 0.4097, "step": 10255 }, { "epoch": 0.28298561411617007, "grad_norm": 0.0036326062399894, "learning_rate": 0.001, "loss": 0.3871, "step": 10256 }, { "epoch": 0.2830132063172344, "grad_norm": 0.006661332678049803, "learning_rate": 0.001, "loss": 0.3838, "step": 10257 }, { "epoch": 0.2830407985182988, "grad_norm": 0.005072845611721277, "learning_rate": 0.001, "loss": 0.3903, "step": 10258 }, { "epoch": 0.2830683907193632, "grad_norm": 0.004820408299565315, "learning_rate": 0.001, "loss": 0.373, "step": 10259 }, { "epoch": 0.28309598292042754, "grad_norm": 0.00312256021425128, "learning_rate": 0.001, "loss": 0.4164, "step": 10260 }, { "epoch": 0.2831235751214919, "grad_norm": 0.0029510208405554295, "learning_rate": 0.001, "loss": 0.4023, "step": 10261 }, { "epoch": 0.2831511673225563, "grad_norm": 0.0040268669836223125, "learning_rate": 0.001, "loss": 0.3614, "step": 10262 }, { "epoch": 0.28317875952362065, "grad_norm": 0.006543302442878485, "learning_rate": 0.001, "loss": 0.384, "step": 10263 }, { "epoch": 0.283206351724685, "grad_norm": 0.003965241368860006, "learning_rate": 0.001, "loss": 0.3958, "step": 10264 }, { "epoch": 0.2832339439257494, "grad_norm": 0.0032414360903203487, "learning_rate": 0.001, "loss": 0.3433, "step": 10265 }, { "epoch": 0.28326153612681376, "grad_norm": 0.002474940847605467, "learning_rate": 0.001, "loss": 0.393, "step": 10266 }, { "epoch": 0.2832891283278781, "grad_norm": 0.0031417065765708685, "learning_rate": 0.001, "loss": 0.393, "step": 10267 }, { "epoch": 0.28331672052894247, "grad_norm": 0.0028232133481651545, "learning_rate": 0.001, "loss": 0.4599, "step": 10268 }, { "epoch": 0.2833443127300069, "grad_norm": 0.002691833535209298, "learning_rate": 0.001, "loss": 0.3892, "step": 10269 }, { "epoch": 0.28337190493107123, "grad_norm": 0.005066230893135071, "learning_rate": 0.001, "loss": 0.3908, "step": 10270 }, { "epoch": 0.2833994971321356, "grad_norm": 0.004286530893296003, "learning_rate": 0.001, "loss": 0.3913, "step": 10271 }, { "epoch": 0.2834270893332, "grad_norm": 0.004262128844857216, "learning_rate": 0.001, "loss": 0.4195, "step": 10272 }, { "epoch": 0.28345468153426434, "grad_norm": 0.0027960515581071377, "learning_rate": 0.001, "loss": 0.4211, "step": 10273 }, { "epoch": 0.2834822737353287, "grad_norm": 0.004192339722067118, "learning_rate": 0.001, "loss": 0.377, "step": 10274 }, { "epoch": 0.2835098659363931, "grad_norm": 0.0032848825212568045, "learning_rate": 0.001, "loss": 0.362, "step": 10275 }, { "epoch": 0.28353745813745745, "grad_norm": 0.0027345793787389994, "learning_rate": 0.001, "loss": 0.4034, "step": 10276 }, { "epoch": 0.2835650503385218, "grad_norm": 0.0030612831469625235, "learning_rate": 0.001, "loss": 0.3779, "step": 10277 }, { "epoch": 0.28359264253958616, "grad_norm": 0.002645805710926652, "learning_rate": 0.001, "loss": 0.3881, "step": 10278 }, { "epoch": 0.28362023474065057, "grad_norm": 0.0025237167719751596, "learning_rate": 0.001, "loss": 0.3954, "step": 10279 }, { "epoch": 0.2836478269417149, "grad_norm": 0.0036604590713977814, "learning_rate": 0.001, "loss": 0.4253, "step": 10280 }, { "epoch": 0.2836754191427793, "grad_norm": 0.007117830216884613, "learning_rate": 0.001, "loss": 0.3755, "step": 10281 }, { "epoch": 0.2837030113438437, "grad_norm": 0.05505690723657608, "learning_rate": 0.001, "loss": 0.3955, "step": 10282 }, { "epoch": 0.28373060354490803, "grad_norm": 0.006176114547997713, "learning_rate": 0.001, "loss": 0.352, "step": 10283 }, { "epoch": 0.2837581957459724, "grad_norm": 0.003028157399967313, "learning_rate": 0.001, "loss": 0.3814, "step": 10284 }, { "epoch": 0.2837857879470368, "grad_norm": 0.0032485162373632193, "learning_rate": 0.001, "loss": 0.4049, "step": 10285 }, { "epoch": 0.28381338014810115, "grad_norm": 0.002797128167003393, "learning_rate": 0.001, "loss": 0.4038, "step": 10286 }, { "epoch": 0.2838409723491655, "grad_norm": 0.003049092134460807, "learning_rate": 0.001, "loss": 0.4092, "step": 10287 }, { "epoch": 0.28386856455022985, "grad_norm": 0.0031948923133313656, "learning_rate": 0.001, "loss": 0.3943, "step": 10288 }, { "epoch": 0.28389615675129426, "grad_norm": 0.003836139803752303, "learning_rate": 0.001, "loss": 0.3844, "step": 10289 }, { "epoch": 0.2839237489523586, "grad_norm": 0.002338884864002466, "learning_rate": 0.001, "loss": 0.4126, "step": 10290 }, { "epoch": 0.28395134115342296, "grad_norm": 0.009832640178501606, "learning_rate": 0.001, "loss": 0.4102, "step": 10291 }, { "epoch": 0.2839789333544874, "grad_norm": 0.0026350358966737986, "learning_rate": 0.001, "loss": 0.43, "step": 10292 }, { "epoch": 0.2840065255555517, "grad_norm": 0.003660572227090597, "learning_rate": 0.001, "loss": 0.4099, "step": 10293 }, { "epoch": 0.2840341177566161, "grad_norm": 0.00275686988607049, "learning_rate": 0.001, "loss": 0.3934, "step": 10294 }, { "epoch": 0.2840617099576805, "grad_norm": 0.0033077567350119352, "learning_rate": 0.001, "loss": 0.3926, "step": 10295 }, { "epoch": 0.28408930215874484, "grad_norm": 0.006942362524569035, "learning_rate": 0.001, "loss": 0.354, "step": 10296 }, { "epoch": 0.2841168943598092, "grad_norm": 0.0036332812160253525, "learning_rate": 0.001, "loss": 0.3707, "step": 10297 }, { "epoch": 0.28414448656087354, "grad_norm": 0.012020766735076904, "learning_rate": 0.001, "loss": 0.3754, "step": 10298 }, { "epoch": 0.28417207876193795, "grad_norm": 0.0028270173352211714, "learning_rate": 0.001, "loss": 0.3852, "step": 10299 }, { "epoch": 0.2841996709630023, "grad_norm": 0.011626332998275757, "learning_rate": 0.001, "loss": 0.3823, "step": 10300 }, { "epoch": 0.28422726316406666, "grad_norm": 0.004878376144915819, "learning_rate": 0.001, "loss": 0.4427, "step": 10301 }, { "epoch": 0.28425485536513106, "grad_norm": 0.004530386067926884, "learning_rate": 0.001, "loss": 0.3858, "step": 10302 }, { "epoch": 0.2842824475661954, "grad_norm": 0.004930171649903059, "learning_rate": 0.001, "loss": 0.3893, "step": 10303 }, { "epoch": 0.28431003976725977, "grad_norm": 0.009053234942257404, "learning_rate": 0.001, "loss": 0.3703, "step": 10304 }, { "epoch": 0.2843376319683242, "grad_norm": 0.004205284174531698, "learning_rate": 0.001, "loss": 0.3538, "step": 10305 }, { "epoch": 0.28436522416938853, "grad_norm": 0.003883173456415534, "learning_rate": 0.001, "loss": 0.4267, "step": 10306 }, { "epoch": 0.2843928163704529, "grad_norm": 0.0031415647827088833, "learning_rate": 0.001, "loss": 0.4154, "step": 10307 }, { "epoch": 0.28442040857151724, "grad_norm": 0.003464039647951722, "learning_rate": 0.001, "loss": 0.39, "step": 10308 }, { "epoch": 0.28444800077258164, "grad_norm": 0.004332481883466244, "learning_rate": 0.001, "loss": 0.3642, "step": 10309 }, { "epoch": 0.284475592973646, "grad_norm": 0.00255342829041183, "learning_rate": 0.001, "loss": 0.4375, "step": 10310 }, { "epoch": 0.28450318517471035, "grad_norm": 0.0028760903514921665, "learning_rate": 0.001, "loss": 0.4054, "step": 10311 }, { "epoch": 0.28453077737577476, "grad_norm": 0.005956695880740881, "learning_rate": 0.001, "loss": 0.4019, "step": 10312 }, { "epoch": 0.2845583695768391, "grad_norm": 0.004237044602632523, "learning_rate": 0.001, "loss": 0.3803, "step": 10313 }, { "epoch": 0.28458596177790346, "grad_norm": 0.005159912630915642, "learning_rate": 0.001, "loss": 0.3598, "step": 10314 }, { "epoch": 0.28461355397896787, "grad_norm": 0.0030028277542442083, "learning_rate": 0.001, "loss": 0.4357, "step": 10315 }, { "epoch": 0.2846411461800322, "grad_norm": 0.002339400118216872, "learning_rate": 0.001, "loss": 0.4295, "step": 10316 }, { "epoch": 0.2846687383810966, "grad_norm": 0.0021588336676359177, "learning_rate": 0.001, "loss": 0.4477, "step": 10317 }, { "epoch": 0.28469633058216093, "grad_norm": 0.002696117153391242, "learning_rate": 0.001, "loss": 0.39, "step": 10318 }, { "epoch": 0.28472392278322534, "grad_norm": 0.0029791551642119884, "learning_rate": 0.001, "loss": 0.4123, "step": 10319 }, { "epoch": 0.2847515149842897, "grad_norm": 0.00416885782033205, "learning_rate": 0.001, "loss": 0.419, "step": 10320 }, { "epoch": 0.28477910718535404, "grad_norm": 0.005709108896553516, "learning_rate": 0.001, "loss": 0.4096, "step": 10321 }, { "epoch": 0.28480669938641845, "grad_norm": 0.002943917643278837, "learning_rate": 0.001, "loss": 0.3675, "step": 10322 }, { "epoch": 0.2848342915874828, "grad_norm": 0.0025159113574773073, "learning_rate": 0.001, "loss": 0.3841, "step": 10323 }, { "epoch": 0.28486188378854715, "grad_norm": 0.004362201318144798, "learning_rate": 0.001, "loss": 0.3772, "step": 10324 }, { "epoch": 0.28488947598961156, "grad_norm": 0.0049796863459050655, "learning_rate": 0.001, "loss": 0.4253, "step": 10325 }, { "epoch": 0.2849170681906759, "grad_norm": 0.004486286547034979, "learning_rate": 0.001, "loss": 0.4077, "step": 10326 }, { "epoch": 0.28494466039174027, "grad_norm": 0.006912431679666042, "learning_rate": 0.001, "loss": 0.4178, "step": 10327 }, { "epoch": 0.2849722525928046, "grad_norm": 0.0031216132920235395, "learning_rate": 0.001, "loss": 0.4226, "step": 10328 }, { "epoch": 0.28499984479386903, "grad_norm": 0.004195013549178839, "learning_rate": 0.001, "loss": 0.3776, "step": 10329 }, { "epoch": 0.2850274369949334, "grad_norm": 0.0029478815849870443, "learning_rate": 0.001, "loss": 0.3897, "step": 10330 }, { "epoch": 0.28505502919599773, "grad_norm": 0.004067423287779093, "learning_rate": 0.001, "loss": 0.3935, "step": 10331 }, { "epoch": 0.28508262139706214, "grad_norm": 0.004478363785892725, "learning_rate": 0.001, "loss": 0.4146, "step": 10332 }, { "epoch": 0.2851102135981265, "grad_norm": 0.009275195188820362, "learning_rate": 0.001, "loss": 0.3937, "step": 10333 }, { "epoch": 0.28513780579919085, "grad_norm": 0.0037428569048643112, "learning_rate": 0.001, "loss": 0.3955, "step": 10334 }, { "epoch": 0.28516539800025525, "grad_norm": 0.004474049899727106, "learning_rate": 0.001, "loss": 0.4112, "step": 10335 }, { "epoch": 0.2851929902013196, "grad_norm": 0.004371373914182186, "learning_rate": 0.001, "loss": 0.3571, "step": 10336 }, { "epoch": 0.28522058240238396, "grad_norm": 0.0037502245977520943, "learning_rate": 0.001, "loss": 0.3624, "step": 10337 }, { "epoch": 0.2852481746034483, "grad_norm": 0.002311047865077853, "learning_rate": 0.001, "loss": 0.3721, "step": 10338 }, { "epoch": 0.2852757668045127, "grad_norm": 0.006753645371645689, "learning_rate": 0.001, "loss": 0.3764, "step": 10339 }, { "epoch": 0.2853033590055771, "grad_norm": 0.0031517634633928537, "learning_rate": 0.001, "loss": 0.4182, "step": 10340 }, { "epoch": 0.2853309512066414, "grad_norm": 0.0038493776228278875, "learning_rate": 0.001, "loss": 0.3738, "step": 10341 }, { "epoch": 0.28535854340770583, "grad_norm": 0.0019644973799586296, "learning_rate": 0.001, "loss": 0.4105, "step": 10342 }, { "epoch": 0.2853861356087702, "grad_norm": 0.0025845207273960114, "learning_rate": 0.001, "loss": 0.394, "step": 10343 }, { "epoch": 0.28541372780983454, "grad_norm": 0.0030156485736370087, "learning_rate": 0.001, "loss": 0.3931, "step": 10344 }, { "epoch": 0.28544132001089895, "grad_norm": 0.0033527929335832596, "learning_rate": 0.001, "loss": 0.4045, "step": 10345 }, { "epoch": 0.2854689122119633, "grad_norm": 0.0047877393662929535, "learning_rate": 0.001, "loss": 0.4196, "step": 10346 }, { "epoch": 0.28549650441302765, "grad_norm": 0.002556293271481991, "learning_rate": 0.001, "loss": 0.4108, "step": 10347 }, { "epoch": 0.285524096614092, "grad_norm": 0.0028672493062913418, "learning_rate": 0.001, "loss": 0.3888, "step": 10348 }, { "epoch": 0.2855516888151564, "grad_norm": 0.0026332384441047907, "learning_rate": 0.001, "loss": 0.4436, "step": 10349 }, { "epoch": 0.28557928101622077, "grad_norm": 0.0033898288384079933, "learning_rate": 0.001, "loss": 0.3911, "step": 10350 }, { "epoch": 0.2856068732172851, "grad_norm": 0.003938847221434116, "learning_rate": 0.001, "loss": 0.4113, "step": 10351 }, { "epoch": 0.2856344654183495, "grad_norm": 0.004278605338186026, "learning_rate": 0.001, "loss": 0.3997, "step": 10352 }, { "epoch": 0.2856620576194139, "grad_norm": 0.009001370519399643, "learning_rate": 0.001, "loss": 0.4223, "step": 10353 }, { "epoch": 0.28568964982047823, "grad_norm": 0.0058325412683188915, "learning_rate": 0.001, "loss": 0.386, "step": 10354 }, { "epoch": 0.2857172420215426, "grad_norm": 0.0033330926671624184, "learning_rate": 0.001, "loss": 0.3984, "step": 10355 }, { "epoch": 0.285744834222607, "grad_norm": 0.0033065094612538815, "learning_rate": 0.001, "loss": 0.4256, "step": 10356 }, { "epoch": 0.28577242642367134, "grad_norm": 0.002417915500700474, "learning_rate": 0.001, "loss": 0.4, "step": 10357 }, { "epoch": 0.2858000186247357, "grad_norm": 0.0036074428353458643, "learning_rate": 0.001, "loss": 0.3477, "step": 10358 }, { "epoch": 0.2858276108258001, "grad_norm": 0.0028410926461219788, "learning_rate": 0.001, "loss": 0.3941, "step": 10359 }, { "epoch": 0.28585520302686446, "grad_norm": 0.0024444633163511753, "learning_rate": 0.001, "loss": 0.4072, "step": 10360 }, { "epoch": 0.2858827952279288, "grad_norm": 0.0035574915818870068, "learning_rate": 0.001, "loss": 0.3702, "step": 10361 }, { "epoch": 0.2859103874289932, "grad_norm": 0.003090563230216503, "learning_rate": 0.001, "loss": 0.3829, "step": 10362 }, { "epoch": 0.28593797963005757, "grad_norm": 0.003158135572448373, "learning_rate": 0.001, "loss": 0.3842, "step": 10363 }, { "epoch": 0.2859655718311219, "grad_norm": 0.004847416654229164, "learning_rate": 0.001, "loss": 0.3727, "step": 10364 }, { "epoch": 0.2859931640321863, "grad_norm": 0.0035511176101863384, "learning_rate": 0.001, "loss": 0.4061, "step": 10365 }, { "epoch": 0.2860207562332507, "grad_norm": 0.0049200220964848995, "learning_rate": 0.001, "loss": 0.399, "step": 10366 }, { "epoch": 0.28604834843431504, "grad_norm": 0.0024868594482541084, "learning_rate": 0.001, "loss": 0.374, "step": 10367 }, { "epoch": 0.2860759406353794, "grad_norm": 0.004043731838464737, "learning_rate": 0.001, "loss": 0.4011, "step": 10368 }, { "epoch": 0.2861035328364438, "grad_norm": 0.0034267944283783436, "learning_rate": 0.001, "loss": 0.4056, "step": 10369 }, { "epoch": 0.28613112503750815, "grad_norm": 0.003376040840521455, "learning_rate": 0.001, "loss": 0.4189, "step": 10370 }, { "epoch": 0.2861587172385725, "grad_norm": 0.0024833050556480885, "learning_rate": 0.001, "loss": 0.4246, "step": 10371 }, { "epoch": 0.2861863094396369, "grad_norm": 0.0030002393759787083, "learning_rate": 0.001, "loss": 0.4271, "step": 10372 }, { "epoch": 0.28621390164070126, "grad_norm": 0.0029513488989323378, "learning_rate": 0.001, "loss": 0.3705, "step": 10373 }, { "epoch": 0.2862414938417656, "grad_norm": 0.0033245980739593506, "learning_rate": 0.001, "loss": 0.3759, "step": 10374 }, { "epoch": 0.28626908604282997, "grad_norm": 0.002725611673668027, "learning_rate": 0.001, "loss": 0.3741, "step": 10375 }, { "epoch": 0.2862966782438944, "grad_norm": 0.0023878002539277077, "learning_rate": 0.001, "loss": 0.4046, "step": 10376 }, { "epoch": 0.28632427044495873, "grad_norm": 0.0076403240673244, "learning_rate": 0.001, "loss": 0.4184, "step": 10377 }, { "epoch": 0.2863518626460231, "grad_norm": 0.00309283216483891, "learning_rate": 0.001, "loss": 0.4104, "step": 10378 }, { "epoch": 0.2863794548470875, "grad_norm": 0.002792463870719075, "learning_rate": 0.001, "loss": 0.3792, "step": 10379 }, { "epoch": 0.28640704704815184, "grad_norm": 0.002797079971060157, "learning_rate": 0.001, "loss": 0.4015, "step": 10380 }, { "epoch": 0.2864346392492162, "grad_norm": 0.0023108189925551414, "learning_rate": 0.001, "loss": 0.4203, "step": 10381 }, { "epoch": 0.2864622314502806, "grad_norm": 0.0021790589671581984, "learning_rate": 0.001, "loss": 0.4065, "step": 10382 }, { "epoch": 0.28648982365134495, "grad_norm": 0.003151221200823784, "learning_rate": 0.001, "loss": 0.4023, "step": 10383 }, { "epoch": 0.2865174158524093, "grad_norm": 0.0043502976186573505, "learning_rate": 0.001, "loss": 0.4219, "step": 10384 }, { "epoch": 0.28654500805347366, "grad_norm": 0.0038338969461619854, "learning_rate": 0.001, "loss": 0.427, "step": 10385 }, { "epoch": 0.28657260025453807, "grad_norm": 0.004629583563655615, "learning_rate": 0.001, "loss": 0.4075, "step": 10386 }, { "epoch": 0.2866001924556024, "grad_norm": 0.008832892403006554, "learning_rate": 0.001, "loss": 0.409, "step": 10387 }, { "epoch": 0.2866277846566668, "grad_norm": 0.005081677809357643, "learning_rate": 0.001, "loss": 0.3842, "step": 10388 }, { "epoch": 0.2866553768577312, "grad_norm": 0.005641726311296225, "learning_rate": 0.001, "loss": 0.3621, "step": 10389 }, { "epoch": 0.28668296905879553, "grad_norm": 0.005556876305490732, "learning_rate": 0.001, "loss": 0.4187, "step": 10390 }, { "epoch": 0.2867105612598599, "grad_norm": 0.004483111668378115, "learning_rate": 0.001, "loss": 0.3967, "step": 10391 }, { "epoch": 0.2867381534609243, "grad_norm": 0.004832763224840164, "learning_rate": 0.001, "loss": 0.3957, "step": 10392 }, { "epoch": 0.28676574566198865, "grad_norm": 0.0030382846016436815, "learning_rate": 0.001, "loss": 0.4291, "step": 10393 }, { "epoch": 0.286793337863053, "grad_norm": 0.003834576578810811, "learning_rate": 0.001, "loss": 0.3803, "step": 10394 }, { "epoch": 0.28682093006411735, "grad_norm": 0.003595273941755295, "learning_rate": 0.001, "loss": 0.4179, "step": 10395 }, { "epoch": 0.28684852226518176, "grad_norm": 0.006260840687900782, "learning_rate": 0.001, "loss": 0.4191, "step": 10396 }, { "epoch": 0.2868761144662461, "grad_norm": 0.00411889236420393, "learning_rate": 0.001, "loss": 0.4024, "step": 10397 }, { "epoch": 0.28690370666731047, "grad_norm": 0.0030989821534603834, "learning_rate": 0.001, "loss": 0.4152, "step": 10398 }, { "epoch": 0.2869312988683749, "grad_norm": 0.003285297192633152, "learning_rate": 0.001, "loss": 0.4113, "step": 10399 }, { "epoch": 0.2869588910694392, "grad_norm": 0.0032312078401446342, "learning_rate": 0.001, "loss": 0.4149, "step": 10400 }, { "epoch": 0.2869864832705036, "grad_norm": 0.004018446430563927, "learning_rate": 0.001, "loss": 0.3976, "step": 10401 }, { "epoch": 0.287014075471568, "grad_norm": 0.0027517497073858976, "learning_rate": 0.001, "loss": 0.4206, "step": 10402 }, { "epoch": 0.28704166767263234, "grad_norm": 0.0023782148491591215, "learning_rate": 0.001, "loss": 0.4018, "step": 10403 }, { "epoch": 0.2870692598736967, "grad_norm": 0.002795133274048567, "learning_rate": 0.001, "loss": 0.416, "step": 10404 }, { "epoch": 0.28709685207476104, "grad_norm": 0.0036653687711805105, "learning_rate": 0.001, "loss": 0.3434, "step": 10405 }, { "epoch": 0.28712444427582545, "grad_norm": 0.003916740883141756, "learning_rate": 0.001, "loss": 0.3793, "step": 10406 }, { "epoch": 0.2871520364768898, "grad_norm": 0.004016435705125332, "learning_rate": 0.001, "loss": 0.4219, "step": 10407 }, { "epoch": 0.28717962867795416, "grad_norm": 0.0035471522714942694, "learning_rate": 0.001, "loss": 0.4374, "step": 10408 }, { "epoch": 0.28720722087901857, "grad_norm": 0.010290498845279217, "learning_rate": 0.001, "loss": 0.4291, "step": 10409 }, { "epoch": 0.2872348130800829, "grad_norm": 0.004986819811165333, "learning_rate": 0.001, "loss": 0.4183, "step": 10410 }, { "epoch": 0.28726240528114727, "grad_norm": 0.003227797569707036, "learning_rate": 0.001, "loss": 0.3876, "step": 10411 }, { "epoch": 0.2872899974822117, "grad_norm": 0.0036358062643557787, "learning_rate": 0.001, "loss": 0.4043, "step": 10412 }, { "epoch": 0.28731758968327603, "grad_norm": 0.002349398098886013, "learning_rate": 0.001, "loss": 0.4514, "step": 10413 }, { "epoch": 0.2873451818843404, "grad_norm": 0.004284038674086332, "learning_rate": 0.001, "loss": 0.3808, "step": 10414 }, { "epoch": 0.28737277408540474, "grad_norm": 0.0026736604049801826, "learning_rate": 0.001, "loss": 0.4064, "step": 10415 }, { "epoch": 0.28740036628646914, "grad_norm": 0.002364154439419508, "learning_rate": 0.001, "loss": 0.4356, "step": 10416 }, { "epoch": 0.2874279584875335, "grad_norm": 0.0029544299468398094, "learning_rate": 0.001, "loss": 0.4061, "step": 10417 }, { "epoch": 0.28745555068859785, "grad_norm": 0.0052476017735898495, "learning_rate": 0.001, "loss": 0.3952, "step": 10418 }, { "epoch": 0.28748314288966226, "grad_norm": 0.0021597561426460743, "learning_rate": 0.001, "loss": 0.4266, "step": 10419 }, { "epoch": 0.2875107350907266, "grad_norm": 0.006407409440726042, "learning_rate": 0.001, "loss": 0.4276, "step": 10420 }, { "epoch": 0.28753832729179096, "grad_norm": 0.004578462336212397, "learning_rate": 0.001, "loss": 0.3882, "step": 10421 }, { "epoch": 0.28756591949285537, "grad_norm": 0.0024405980948358774, "learning_rate": 0.001, "loss": 0.4355, "step": 10422 }, { "epoch": 0.2875935116939197, "grad_norm": 0.0061929537914693356, "learning_rate": 0.001, "loss": 0.3499, "step": 10423 }, { "epoch": 0.2876211038949841, "grad_norm": 0.002528023673221469, "learning_rate": 0.001, "loss": 0.4066, "step": 10424 }, { "epoch": 0.28764869609604843, "grad_norm": 0.005064224358648062, "learning_rate": 0.001, "loss": 0.4323, "step": 10425 }, { "epoch": 0.28767628829711284, "grad_norm": 0.0034656894858926535, "learning_rate": 0.001, "loss": 0.436, "step": 10426 }, { "epoch": 0.2877038804981772, "grad_norm": 0.013309495523571968, "learning_rate": 0.001, "loss": 0.3463, "step": 10427 }, { "epoch": 0.28773147269924154, "grad_norm": 0.0033926607575267553, "learning_rate": 0.001, "loss": 0.3824, "step": 10428 }, { "epoch": 0.28775906490030595, "grad_norm": 0.003299700329080224, "learning_rate": 0.001, "loss": 0.3592, "step": 10429 }, { "epoch": 0.2877866571013703, "grad_norm": 0.00263997376896441, "learning_rate": 0.001, "loss": 0.383, "step": 10430 }, { "epoch": 0.28781424930243465, "grad_norm": 0.002664315514266491, "learning_rate": 0.001, "loss": 0.4104, "step": 10431 }, { "epoch": 0.28784184150349906, "grad_norm": 0.003565206192433834, "learning_rate": 0.001, "loss": 0.395, "step": 10432 }, { "epoch": 0.2878694337045634, "grad_norm": 0.0028061075136065483, "learning_rate": 0.001, "loss": 0.4317, "step": 10433 }, { "epoch": 0.28789702590562777, "grad_norm": 0.005139282438904047, "learning_rate": 0.001, "loss": 0.3856, "step": 10434 }, { "epoch": 0.2879246181066921, "grad_norm": 0.003619102295488119, "learning_rate": 0.001, "loss": 0.4045, "step": 10435 }, { "epoch": 0.28795221030775653, "grad_norm": 0.0031147655099630356, "learning_rate": 0.001, "loss": 0.4198, "step": 10436 }, { "epoch": 0.2879798025088209, "grad_norm": 0.0035453352611511946, "learning_rate": 0.001, "loss": 0.414, "step": 10437 }, { "epoch": 0.28800739470988523, "grad_norm": 0.013630262576043606, "learning_rate": 0.001, "loss": 0.394, "step": 10438 }, { "epoch": 0.28803498691094964, "grad_norm": 0.006643341388553381, "learning_rate": 0.001, "loss": 0.3848, "step": 10439 }, { "epoch": 0.288062579112014, "grad_norm": 0.0038181054405868053, "learning_rate": 0.001, "loss": 0.3982, "step": 10440 }, { "epoch": 0.28809017131307835, "grad_norm": 0.00438200868666172, "learning_rate": 0.001, "loss": 0.4341, "step": 10441 }, { "epoch": 0.28811776351414276, "grad_norm": 0.009693934582173824, "learning_rate": 0.001, "loss": 0.4069, "step": 10442 }, { "epoch": 0.2881453557152071, "grad_norm": 0.0037193638272583485, "learning_rate": 0.001, "loss": 0.3944, "step": 10443 }, { "epoch": 0.28817294791627146, "grad_norm": 0.005814881529659033, "learning_rate": 0.001, "loss": 0.4172, "step": 10444 }, { "epoch": 0.2882005401173358, "grad_norm": 0.003509842325001955, "learning_rate": 0.001, "loss": 0.404, "step": 10445 }, { "epoch": 0.2882281323184002, "grad_norm": 0.048714328557252884, "learning_rate": 0.001, "loss": 0.4366, "step": 10446 }, { "epoch": 0.2882557245194646, "grad_norm": 0.003179010935127735, "learning_rate": 0.001, "loss": 0.3806, "step": 10447 }, { "epoch": 0.2882833167205289, "grad_norm": 0.004650244489312172, "learning_rate": 0.001, "loss": 0.3891, "step": 10448 }, { "epoch": 0.28831090892159333, "grad_norm": 0.010246813297271729, "learning_rate": 0.001, "loss": 0.3738, "step": 10449 }, { "epoch": 0.2883385011226577, "grad_norm": 0.0024331400636583567, "learning_rate": 0.001, "loss": 0.437, "step": 10450 }, { "epoch": 0.28836609332372204, "grad_norm": 0.0032423040829598904, "learning_rate": 0.001, "loss": 0.3977, "step": 10451 }, { "epoch": 0.2883936855247864, "grad_norm": 0.0032161688432097435, "learning_rate": 0.001, "loss": 0.3756, "step": 10452 }, { "epoch": 0.2884212777258508, "grad_norm": 0.009974083863198757, "learning_rate": 0.001, "loss": 0.4151, "step": 10453 }, { "epoch": 0.28844886992691515, "grad_norm": 0.0024438994005322456, "learning_rate": 0.001, "loss": 0.3818, "step": 10454 }, { "epoch": 0.2884764621279795, "grad_norm": 0.0024701599031686783, "learning_rate": 0.001, "loss": 0.4116, "step": 10455 }, { "epoch": 0.2885040543290439, "grad_norm": 0.0037678529042750597, "learning_rate": 0.001, "loss": 0.4239, "step": 10456 }, { "epoch": 0.28853164653010827, "grad_norm": 0.0021011261269450188, "learning_rate": 0.001, "loss": 0.44, "step": 10457 }, { "epoch": 0.2885592387311726, "grad_norm": 0.0035511725582182407, "learning_rate": 0.001, "loss": 0.3732, "step": 10458 }, { "epoch": 0.288586830932237, "grad_norm": 0.002597698476165533, "learning_rate": 0.001, "loss": 0.4159, "step": 10459 }, { "epoch": 0.2886144231333014, "grad_norm": 0.006349804345518351, "learning_rate": 0.001, "loss": 0.3979, "step": 10460 }, { "epoch": 0.28864201533436573, "grad_norm": 0.003128377255052328, "learning_rate": 0.001, "loss": 0.4154, "step": 10461 }, { "epoch": 0.2886696075354301, "grad_norm": 0.005628944840282202, "learning_rate": 0.001, "loss": 0.3865, "step": 10462 }, { "epoch": 0.2886971997364945, "grad_norm": 0.009223297238349915, "learning_rate": 0.001, "loss": 0.4119, "step": 10463 }, { "epoch": 0.28872479193755884, "grad_norm": 0.002501624170690775, "learning_rate": 0.001, "loss": 0.4228, "step": 10464 }, { "epoch": 0.2887523841386232, "grad_norm": 0.0031182433012872934, "learning_rate": 0.001, "loss": 0.4293, "step": 10465 }, { "epoch": 0.2887799763396876, "grad_norm": 0.003218629164621234, "learning_rate": 0.001, "loss": 0.396, "step": 10466 }, { "epoch": 0.28880756854075196, "grad_norm": 0.0033797677606344223, "learning_rate": 0.001, "loss": 0.4258, "step": 10467 }, { "epoch": 0.2888351607418163, "grad_norm": 0.005143940914422274, "learning_rate": 0.001, "loss": 0.4083, "step": 10468 }, { "epoch": 0.2888627529428807, "grad_norm": 0.02191130630671978, "learning_rate": 0.001, "loss": 0.3642, "step": 10469 }, { "epoch": 0.28889034514394507, "grad_norm": 0.013004349544644356, "learning_rate": 0.001, "loss": 0.3792, "step": 10470 }, { "epoch": 0.2889179373450094, "grad_norm": 0.0044490983709692955, "learning_rate": 0.001, "loss": 0.411, "step": 10471 }, { "epoch": 0.2889455295460738, "grad_norm": 0.004338787868618965, "learning_rate": 0.001, "loss": 0.3989, "step": 10472 }, { "epoch": 0.2889731217471382, "grad_norm": 0.0022168918512761593, "learning_rate": 0.001, "loss": 0.4162, "step": 10473 }, { "epoch": 0.28900071394820254, "grad_norm": 0.0039255740121006966, "learning_rate": 0.001, "loss": 0.4124, "step": 10474 }, { "epoch": 0.2890283061492669, "grad_norm": 0.0024870086926966906, "learning_rate": 0.001, "loss": 0.4359, "step": 10475 }, { "epoch": 0.2890558983503313, "grad_norm": 0.009762310422956944, "learning_rate": 0.001, "loss": 0.4262, "step": 10476 }, { "epoch": 0.28908349055139565, "grad_norm": 0.0031546815298497677, "learning_rate": 0.001, "loss": 0.3929, "step": 10477 }, { "epoch": 0.28911108275246, "grad_norm": 0.0028812165837734938, "learning_rate": 0.001, "loss": 0.3788, "step": 10478 }, { "epoch": 0.2891386749535244, "grad_norm": 0.003111601574346423, "learning_rate": 0.001, "loss": 0.3852, "step": 10479 }, { "epoch": 0.28916626715458876, "grad_norm": 0.0035458174534142017, "learning_rate": 0.001, "loss": 0.4125, "step": 10480 }, { "epoch": 0.2891938593556531, "grad_norm": 0.0034652771428227425, "learning_rate": 0.001, "loss": 0.3833, "step": 10481 }, { "epoch": 0.28922145155671747, "grad_norm": 0.004158776253461838, "learning_rate": 0.001, "loss": 0.3449, "step": 10482 }, { "epoch": 0.2892490437577819, "grad_norm": 0.0031947793904691935, "learning_rate": 0.001, "loss": 0.4176, "step": 10483 }, { "epoch": 0.28927663595884623, "grad_norm": 0.0023631020449101925, "learning_rate": 0.001, "loss": 0.4212, "step": 10484 }, { "epoch": 0.2893042281599106, "grad_norm": 0.0029194431845098734, "learning_rate": 0.001, "loss": 0.3617, "step": 10485 }, { "epoch": 0.289331820360975, "grad_norm": 0.003637043060734868, "learning_rate": 0.001, "loss": 0.4131, "step": 10486 }, { "epoch": 0.28935941256203934, "grad_norm": 0.002662140876054764, "learning_rate": 0.001, "loss": 0.3454, "step": 10487 }, { "epoch": 0.2893870047631037, "grad_norm": 0.002793428720906377, "learning_rate": 0.001, "loss": 0.398, "step": 10488 }, { "epoch": 0.2894145969641681, "grad_norm": 0.0031368432100862265, "learning_rate": 0.001, "loss": 0.4084, "step": 10489 }, { "epoch": 0.28944218916523246, "grad_norm": 0.003831058507785201, "learning_rate": 0.001, "loss": 0.3796, "step": 10490 }, { "epoch": 0.2894697813662968, "grad_norm": 0.0026088710874319077, "learning_rate": 0.001, "loss": 0.3848, "step": 10491 }, { "epoch": 0.28949737356736116, "grad_norm": 0.004104997497051954, "learning_rate": 0.001, "loss": 0.431, "step": 10492 }, { "epoch": 0.28952496576842557, "grad_norm": 0.004247668199241161, "learning_rate": 0.001, "loss": 0.3998, "step": 10493 }, { "epoch": 0.2895525579694899, "grad_norm": 0.0025502184871584177, "learning_rate": 0.001, "loss": 0.391, "step": 10494 }, { "epoch": 0.2895801501705543, "grad_norm": 0.0031428837683051825, "learning_rate": 0.001, "loss": 0.3968, "step": 10495 }, { "epoch": 0.2896077423716187, "grad_norm": 0.00442470470443368, "learning_rate": 0.001, "loss": 0.3862, "step": 10496 }, { "epoch": 0.28963533457268303, "grad_norm": 0.0065785483457148075, "learning_rate": 0.001, "loss": 0.4503, "step": 10497 }, { "epoch": 0.2896629267737474, "grad_norm": 0.0034503082279115915, "learning_rate": 0.001, "loss": 0.4078, "step": 10498 }, { "epoch": 0.2896905189748118, "grad_norm": 0.0028073948342353106, "learning_rate": 0.001, "loss": 0.3872, "step": 10499 }, { "epoch": 0.28971811117587615, "grad_norm": 0.006891318131238222, "learning_rate": 0.001, "loss": 0.4123, "step": 10500 }, { "epoch": 0.28971811117587615, "eval_runtime": 23.9893, "eval_samples_per_second": 1.334, "eval_steps_per_second": 0.167, "step": 10500 }, { "epoch": 0.2897457033769405, "grad_norm": 0.0038485617842525244, "learning_rate": 0.001, "loss": 0.4164, "step": 10501 }, { "epoch": 0.28977329557800485, "grad_norm": 0.004269871395081282, "learning_rate": 0.001, "loss": 0.4047, "step": 10502 }, { "epoch": 0.28980088777906926, "grad_norm": 0.00325970770791173, "learning_rate": 0.001, "loss": 0.4373, "step": 10503 }, { "epoch": 0.2898284799801336, "grad_norm": 0.0031080457847565413, "learning_rate": 0.001, "loss": 0.4237, "step": 10504 }, { "epoch": 0.28985607218119797, "grad_norm": 0.003122104099020362, "learning_rate": 0.001, "loss": 0.3966, "step": 10505 }, { "epoch": 0.2898836643822624, "grad_norm": 0.003600063733756542, "learning_rate": 0.001, "loss": 0.3981, "step": 10506 }, { "epoch": 0.2899112565833267, "grad_norm": 0.004156127572059631, "learning_rate": 0.001, "loss": 0.38, "step": 10507 }, { "epoch": 0.2899388487843911, "grad_norm": 0.0035865693353116512, "learning_rate": 0.001, "loss": 0.4205, "step": 10508 }, { "epoch": 0.2899664409854555, "grad_norm": 0.004097979050129652, "learning_rate": 0.001, "loss": 0.4112, "step": 10509 }, { "epoch": 0.28999403318651984, "grad_norm": 0.006173746194690466, "learning_rate": 0.001, "loss": 0.4409, "step": 10510 }, { "epoch": 0.2900216253875842, "grad_norm": 0.0032481453381478786, "learning_rate": 0.001, "loss": 0.4437, "step": 10511 }, { "epoch": 0.29004921758864854, "grad_norm": 0.009451840072870255, "learning_rate": 0.001, "loss": 0.3547, "step": 10512 }, { "epoch": 0.29007680978971295, "grad_norm": 0.0031670001335442066, "learning_rate": 0.001, "loss": 0.3936, "step": 10513 }, { "epoch": 0.2901044019907773, "grad_norm": 0.0040013547986745834, "learning_rate": 0.001, "loss": 0.4047, "step": 10514 }, { "epoch": 0.29013199419184166, "grad_norm": 0.01435109507292509, "learning_rate": 0.001, "loss": 0.3811, "step": 10515 }, { "epoch": 0.29015958639290607, "grad_norm": 0.014031428843736649, "learning_rate": 0.001, "loss": 0.3909, "step": 10516 }, { "epoch": 0.2901871785939704, "grad_norm": 0.021386094391345978, "learning_rate": 0.001, "loss": 0.4344, "step": 10517 }, { "epoch": 0.29021477079503477, "grad_norm": 0.01621176116168499, "learning_rate": 0.001, "loss": 0.4005, "step": 10518 }, { "epoch": 0.2902423629960992, "grad_norm": 0.1645982414484024, "learning_rate": 0.001, "loss": 0.3977, "step": 10519 }, { "epoch": 0.29026995519716353, "grad_norm": 0.009323320351541042, "learning_rate": 0.001, "loss": 0.3349, "step": 10520 }, { "epoch": 0.2902975473982279, "grad_norm": 0.010149064473807812, "learning_rate": 0.001, "loss": 0.4171, "step": 10521 }, { "epoch": 0.29032513959929224, "grad_norm": 0.004645455162972212, "learning_rate": 0.001, "loss": 0.4127, "step": 10522 }, { "epoch": 0.29035273180035664, "grad_norm": 0.005608486942946911, "learning_rate": 0.001, "loss": 0.41, "step": 10523 }, { "epoch": 0.290380324001421, "grad_norm": 0.020055659115314484, "learning_rate": 0.001, "loss": 0.4182, "step": 10524 }, { "epoch": 0.29040791620248535, "grad_norm": 0.00331861968152225, "learning_rate": 0.001, "loss": 0.3792, "step": 10525 }, { "epoch": 0.29043550840354976, "grad_norm": 0.0030799147207289934, "learning_rate": 0.001, "loss": 0.4441, "step": 10526 }, { "epoch": 0.2904631006046141, "grad_norm": 0.003175821155309677, "learning_rate": 0.001, "loss": 0.4228, "step": 10527 }, { "epoch": 0.29049069280567846, "grad_norm": 0.002744150348007679, "learning_rate": 0.001, "loss": 0.394, "step": 10528 }, { "epoch": 0.29051828500674287, "grad_norm": 0.0017547330353409052, "learning_rate": 0.001, "loss": 0.4104, "step": 10529 }, { "epoch": 0.2905458772078072, "grad_norm": 0.002442681696265936, "learning_rate": 0.001, "loss": 0.4078, "step": 10530 }, { "epoch": 0.2905734694088716, "grad_norm": 0.0028025461360812187, "learning_rate": 0.001, "loss": 0.3758, "step": 10531 }, { "epoch": 0.29060106160993593, "grad_norm": 0.003767822403460741, "learning_rate": 0.001, "loss": 0.3827, "step": 10532 }, { "epoch": 0.29062865381100034, "grad_norm": 0.031069811433553696, "learning_rate": 0.001, "loss": 0.3772, "step": 10533 }, { "epoch": 0.2906562460120647, "grad_norm": 0.005183485336601734, "learning_rate": 0.001, "loss": 0.3973, "step": 10534 }, { "epoch": 0.29068383821312904, "grad_norm": 0.004126776475459337, "learning_rate": 0.001, "loss": 0.3994, "step": 10535 }, { "epoch": 0.29071143041419345, "grad_norm": 0.0029067141003906727, "learning_rate": 0.001, "loss": 0.4026, "step": 10536 }, { "epoch": 0.2907390226152578, "grad_norm": 0.00455186702311039, "learning_rate": 0.001, "loss": 0.413, "step": 10537 }, { "epoch": 0.29076661481632216, "grad_norm": 0.0032677233684808016, "learning_rate": 0.001, "loss": 0.4204, "step": 10538 }, { "epoch": 0.29079420701738656, "grad_norm": 0.0033605170901864767, "learning_rate": 0.001, "loss": 0.4034, "step": 10539 }, { "epoch": 0.2908217992184509, "grad_norm": 0.0031016338616609573, "learning_rate": 0.001, "loss": 0.4089, "step": 10540 }, { "epoch": 0.29084939141951527, "grad_norm": 0.0022766580805182457, "learning_rate": 0.001, "loss": 0.4186, "step": 10541 }, { "epoch": 0.2908769836205796, "grad_norm": 0.0034821683075278997, "learning_rate": 0.001, "loss": 0.3844, "step": 10542 }, { "epoch": 0.29090457582164403, "grad_norm": 0.0025649424642324448, "learning_rate": 0.001, "loss": 0.4215, "step": 10543 }, { "epoch": 0.2909321680227084, "grad_norm": 0.004783533047884703, "learning_rate": 0.001, "loss": 0.4034, "step": 10544 }, { "epoch": 0.29095976022377273, "grad_norm": 0.007414715830236673, "learning_rate": 0.001, "loss": 0.4077, "step": 10545 }, { "epoch": 0.29098735242483714, "grad_norm": 0.010034749284386635, "learning_rate": 0.001, "loss": 0.3879, "step": 10546 }, { "epoch": 0.2910149446259015, "grad_norm": 0.0025835982523858547, "learning_rate": 0.001, "loss": 0.3929, "step": 10547 }, { "epoch": 0.29104253682696585, "grad_norm": 0.0022810695227235556, "learning_rate": 0.001, "loss": 0.4089, "step": 10548 }, { "epoch": 0.2910701290280302, "grad_norm": 0.0027416511438786983, "learning_rate": 0.001, "loss": 0.4023, "step": 10549 }, { "epoch": 0.2910977212290946, "grad_norm": 0.004057244397699833, "learning_rate": 0.001, "loss": 0.4082, "step": 10550 }, { "epoch": 0.29112531343015896, "grad_norm": 0.00185355672147125, "learning_rate": 0.001, "loss": 0.4296, "step": 10551 }, { "epoch": 0.2911529056312233, "grad_norm": 0.0033400054089725018, "learning_rate": 0.001, "loss": 0.4106, "step": 10552 }, { "epoch": 0.2911804978322877, "grad_norm": 0.005987055134028196, "learning_rate": 0.001, "loss": 0.3716, "step": 10553 }, { "epoch": 0.2912080900333521, "grad_norm": 0.007598403375595808, "learning_rate": 0.001, "loss": 0.3706, "step": 10554 }, { "epoch": 0.2912356822344164, "grad_norm": 0.00337135954760015, "learning_rate": 0.001, "loss": 0.4039, "step": 10555 }, { "epoch": 0.29126327443548083, "grad_norm": 0.003520526457577944, "learning_rate": 0.001, "loss": 0.4189, "step": 10556 }, { "epoch": 0.2912908666365452, "grad_norm": 0.002943966304883361, "learning_rate": 0.001, "loss": 0.3459, "step": 10557 }, { "epoch": 0.29131845883760954, "grad_norm": 0.004878449719399214, "learning_rate": 0.001, "loss": 0.3735, "step": 10558 }, { "epoch": 0.2913460510386739, "grad_norm": 0.0023713402915745974, "learning_rate": 0.001, "loss": 0.4028, "step": 10559 }, { "epoch": 0.2913736432397383, "grad_norm": 0.0031815902329981327, "learning_rate": 0.001, "loss": 0.3708, "step": 10560 }, { "epoch": 0.29140123544080265, "grad_norm": 0.002473505213856697, "learning_rate": 0.001, "loss": 0.4208, "step": 10561 }, { "epoch": 0.291428827641867, "grad_norm": 0.002570673357695341, "learning_rate": 0.001, "loss": 0.3712, "step": 10562 }, { "epoch": 0.2914564198429314, "grad_norm": 0.004323242697864771, "learning_rate": 0.001, "loss": 0.4001, "step": 10563 }, { "epoch": 0.29148401204399577, "grad_norm": 0.003748292336240411, "learning_rate": 0.001, "loss": 0.3852, "step": 10564 }, { "epoch": 0.2915116042450601, "grad_norm": 0.0039474996738135815, "learning_rate": 0.001, "loss": 0.424, "step": 10565 }, { "epoch": 0.2915391964461245, "grad_norm": 0.003185428213328123, "learning_rate": 0.001, "loss": 0.4258, "step": 10566 }, { "epoch": 0.2915667886471889, "grad_norm": 0.008764101192355156, "learning_rate": 0.001, "loss": 0.4073, "step": 10567 }, { "epoch": 0.29159438084825323, "grad_norm": 0.006038394290953875, "learning_rate": 0.001, "loss": 0.4069, "step": 10568 }, { "epoch": 0.2916219730493176, "grad_norm": 0.00498878164216876, "learning_rate": 0.001, "loss": 0.4099, "step": 10569 }, { "epoch": 0.291649565250382, "grad_norm": 0.002792202867567539, "learning_rate": 0.001, "loss": 0.4132, "step": 10570 }, { "epoch": 0.29167715745144634, "grad_norm": 0.004675569478422403, "learning_rate": 0.001, "loss": 0.4226, "step": 10571 }, { "epoch": 0.2917047496525107, "grad_norm": 0.00361837400123477, "learning_rate": 0.001, "loss": 0.3538, "step": 10572 }, { "epoch": 0.2917323418535751, "grad_norm": 0.00293007493019104, "learning_rate": 0.001, "loss": 0.4157, "step": 10573 }, { "epoch": 0.29175993405463946, "grad_norm": 0.0027430958580225706, "learning_rate": 0.001, "loss": 0.4124, "step": 10574 }, { "epoch": 0.2917875262557038, "grad_norm": 0.0023098003584891558, "learning_rate": 0.001, "loss": 0.4127, "step": 10575 }, { "epoch": 0.2918151184567682, "grad_norm": 0.0030539468862116337, "learning_rate": 0.001, "loss": 0.3997, "step": 10576 }, { "epoch": 0.29184271065783257, "grad_norm": 0.004506285302340984, "learning_rate": 0.001, "loss": 0.3835, "step": 10577 }, { "epoch": 0.2918703028588969, "grad_norm": 0.00386527506634593, "learning_rate": 0.001, "loss": 0.4258, "step": 10578 }, { "epoch": 0.2918978950599613, "grad_norm": 0.0044982582330703735, "learning_rate": 0.001, "loss": 0.3744, "step": 10579 }, { "epoch": 0.2919254872610257, "grad_norm": 0.005035079549998045, "learning_rate": 0.001, "loss": 0.3732, "step": 10580 }, { "epoch": 0.29195307946209004, "grad_norm": 0.007419761270284653, "learning_rate": 0.001, "loss": 0.3572, "step": 10581 }, { "epoch": 0.2919806716631544, "grad_norm": 0.003058833070099354, "learning_rate": 0.001, "loss": 0.4311, "step": 10582 }, { "epoch": 0.2920082638642188, "grad_norm": 0.008622899651527405, "learning_rate": 0.001, "loss": 0.405, "step": 10583 }, { "epoch": 0.29203585606528315, "grad_norm": 0.0038068420253694057, "learning_rate": 0.001, "loss": 0.4076, "step": 10584 }, { "epoch": 0.2920634482663475, "grad_norm": 0.0027387929148972034, "learning_rate": 0.001, "loss": 0.3948, "step": 10585 }, { "epoch": 0.2920910404674119, "grad_norm": 0.004177779424935579, "learning_rate": 0.001, "loss": 0.3726, "step": 10586 }, { "epoch": 0.29211863266847626, "grad_norm": 0.0050719305872917175, "learning_rate": 0.001, "loss": 0.4282, "step": 10587 }, { "epoch": 0.2921462248695406, "grad_norm": 0.00421450100839138, "learning_rate": 0.001, "loss": 0.3963, "step": 10588 }, { "epoch": 0.29217381707060497, "grad_norm": 0.031578365713357925, "learning_rate": 0.001, "loss": 0.3792, "step": 10589 }, { "epoch": 0.2922014092716694, "grad_norm": 0.008609611541032791, "learning_rate": 0.001, "loss": 0.3712, "step": 10590 }, { "epoch": 0.29222900147273373, "grad_norm": 0.005752986762672663, "learning_rate": 0.001, "loss": 0.38, "step": 10591 }, { "epoch": 0.2922565936737981, "grad_norm": 0.004760433454066515, "learning_rate": 0.001, "loss": 0.4197, "step": 10592 }, { "epoch": 0.2922841858748625, "grad_norm": 0.0037690645549446344, "learning_rate": 0.001, "loss": 0.3877, "step": 10593 }, { "epoch": 0.29231177807592684, "grad_norm": 0.0029250262305140495, "learning_rate": 0.001, "loss": 0.4383, "step": 10594 }, { "epoch": 0.2923393702769912, "grad_norm": 0.002340389881283045, "learning_rate": 0.001, "loss": 0.4113, "step": 10595 }, { "epoch": 0.2923669624780556, "grad_norm": 0.002268069889396429, "learning_rate": 0.001, "loss": 0.4107, "step": 10596 }, { "epoch": 0.29239455467911996, "grad_norm": 0.004916078876703978, "learning_rate": 0.001, "loss": 0.4144, "step": 10597 }, { "epoch": 0.2924221468801843, "grad_norm": 0.0026404778473079205, "learning_rate": 0.001, "loss": 0.3631, "step": 10598 }, { "epoch": 0.29244973908124866, "grad_norm": 0.003348248079419136, "learning_rate": 0.001, "loss": 0.387, "step": 10599 }, { "epoch": 0.29247733128231307, "grad_norm": 0.0029468834400177, "learning_rate": 0.001, "loss": 0.3734, "step": 10600 }, { "epoch": 0.2925049234833774, "grad_norm": 0.00501684146001935, "learning_rate": 0.001, "loss": 0.4221, "step": 10601 }, { "epoch": 0.2925325156844418, "grad_norm": 0.002596599282696843, "learning_rate": 0.001, "loss": 0.412, "step": 10602 }, { "epoch": 0.2925601078855062, "grad_norm": 0.004242388531565666, "learning_rate": 0.001, "loss": 0.4014, "step": 10603 }, { "epoch": 0.29258770008657053, "grad_norm": 0.003474390832707286, "learning_rate": 0.001, "loss": 0.378, "step": 10604 }, { "epoch": 0.2926152922876349, "grad_norm": 0.0028386737685650587, "learning_rate": 0.001, "loss": 0.4124, "step": 10605 }, { "epoch": 0.2926428844886993, "grad_norm": 0.005176781211048365, "learning_rate": 0.001, "loss": 0.3949, "step": 10606 }, { "epoch": 0.29267047668976365, "grad_norm": 0.002886307192966342, "learning_rate": 0.001, "loss": 0.395, "step": 10607 }, { "epoch": 0.292698068890828, "grad_norm": 0.0048707169480621815, "learning_rate": 0.001, "loss": 0.405, "step": 10608 }, { "epoch": 0.29272566109189235, "grad_norm": 0.003168995026499033, "learning_rate": 0.001, "loss": 0.377, "step": 10609 }, { "epoch": 0.29275325329295676, "grad_norm": 0.00348648545332253, "learning_rate": 0.001, "loss": 0.3736, "step": 10610 }, { "epoch": 0.2927808454940211, "grad_norm": 0.002645922591909766, "learning_rate": 0.001, "loss": 0.4314, "step": 10611 }, { "epoch": 0.29280843769508547, "grad_norm": 0.002931213239207864, "learning_rate": 0.001, "loss": 0.3761, "step": 10612 }, { "epoch": 0.2928360298961499, "grad_norm": 0.003571485634893179, "learning_rate": 0.001, "loss": 0.3484, "step": 10613 }, { "epoch": 0.2928636220972142, "grad_norm": 0.002395814750343561, "learning_rate": 0.001, "loss": 0.4037, "step": 10614 }, { "epoch": 0.2928912142982786, "grad_norm": 0.007757443469017744, "learning_rate": 0.001, "loss": 0.3728, "step": 10615 }, { "epoch": 0.292918806499343, "grad_norm": 0.003894525347277522, "learning_rate": 0.001, "loss": 0.3976, "step": 10616 }, { "epoch": 0.29294639870040734, "grad_norm": 0.005945149809122086, "learning_rate": 0.001, "loss": 0.3822, "step": 10617 }, { "epoch": 0.2929739909014717, "grad_norm": 0.005653233267366886, "learning_rate": 0.001, "loss": 0.3896, "step": 10618 }, { "epoch": 0.29300158310253605, "grad_norm": 0.006182766519486904, "learning_rate": 0.001, "loss": 0.4181, "step": 10619 }, { "epoch": 0.29302917530360045, "grad_norm": 0.004933160729706287, "learning_rate": 0.001, "loss": 0.4209, "step": 10620 }, { "epoch": 0.2930567675046648, "grad_norm": 0.004339593928307295, "learning_rate": 0.001, "loss": 0.3796, "step": 10621 }, { "epoch": 0.29308435970572916, "grad_norm": 0.0027681838255375624, "learning_rate": 0.001, "loss": 0.4027, "step": 10622 }, { "epoch": 0.29311195190679357, "grad_norm": 0.002985588042065501, "learning_rate": 0.001, "loss": 0.4367, "step": 10623 }, { "epoch": 0.2931395441078579, "grad_norm": 0.003226140746846795, "learning_rate": 0.001, "loss": 0.4223, "step": 10624 }, { "epoch": 0.29316713630892227, "grad_norm": 0.00455946521833539, "learning_rate": 0.001, "loss": 0.4024, "step": 10625 }, { "epoch": 0.2931947285099867, "grad_norm": 0.004617081955075264, "learning_rate": 0.001, "loss": 0.3963, "step": 10626 }, { "epoch": 0.29322232071105103, "grad_norm": 0.006010159850120544, "learning_rate": 0.001, "loss": 0.4247, "step": 10627 }, { "epoch": 0.2932499129121154, "grad_norm": 0.0038819455076009035, "learning_rate": 0.001, "loss": 0.4062, "step": 10628 }, { "epoch": 0.29327750511317974, "grad_norm": 0.0035864575766026974, "learning_rate": 0.001, "loss": 0.3823, "step": 10629 }, { "epoch": 0.29330509731424415, "grad_norm": 0.002191227860748768, "learning_rate": 0.001, "loss": 0.408, "step": 10630 }, { "epoch": 0.2933326895153085, "grad_norm": 0.004282178822904825, "learning_rate": 0.001, "loss": 0.3453, "step": 10631 }, { "epoch": 0.29336028171637285, "grad_norm": 0.0022272500209510326, "learning_rate": 0.001, "loss": 0.4057, "step": 10632 }, { "epoch": 0.29338787391743726, "grad_norm": 0.002555177314206958, "learning_rate": 0.001, "loss": 0.3953, "step": 10633 }, { "epoch": 0.2934154661185016, "grad_norm": 0.006331083830446005, "learning_rate": 0.001, "loss": 0.3625, "step": 10634 }, { "epoch": 0.29344305831956596, "grad_norm": 0.002670933725312352, "learning_rate": 0.001, "loss": 0.4096, "step": 10635 }, { "epoch": 0.2934706505206303, "grad_norm": 0.004304058384150267, "learning_rate": 0.001, "loss": 0.3682, "step": 10636 }, { "epoch": 0.2934982427216947, "grad_norm": 0.0024331146851181984, "learning_rate": 0.001, "loss": 0.393, "step": 10637 }, { "epoch": 0.2935258349227591, "grad_norm": 0.003931795712560415, "learning_rate": 0.001, "loss": 0.3928, "step": 10638 }, { "epoch": 0.29355342712382343, "grad_norm": 0.004206740763038397, "learning_rate": 0.001, "loss": 0.3946, "step": 10639 }, { "epoch": 0.29358101932488784, "grad_norm": 0.0031811101362109184, "learning_rate": 0.001, "loss": 0.4167, "step": 10640 }, { "epoch": 0.2936086115259522, "grad_norm": 0.002255258383229375, "learning_rate": 0.001, "loss": 0.3808, "step": 10641 }, { "epoch": 0.29363620372701654, "grad_norm": 0.002453514840453863, "learning_rate": 0.001, "loss": 0.4223, "step": 10642 }, { "epoch": 0.29366379592808095, "grad_norm": 0.002259905217215419, "learning_rate": 0.001, "loss": 0.3815, "step": 10643 }, { "epoch": 0.2936913881291453, "grad_norm": 0.0024826505687087774, "learning_rate": 0.001, "loss": 0.3844, "step": 10644 }, { "epoch": 0.29371898033020966, "grad_norm": 0.0024130498059093952, "learning_rate": 0.001, "loss": 0.39, "step": 10645 }, { "epoch": 0.293746572531274, "grad_norm": 0.002940199337899685, "learning_rate": 0.001, "loss": 0.4167, "step": 10646 }, { "epoch": 0.2937741647323384, "grad_norm": 0.0037300034891813993, "learning_rate": 0.001, "loss": 0.3452, "step": 10647 }, { "epoch": 0.29380175693340277, "grad_norm": 0.0023701984900981188, "learning_rate": 0.001, "loss": 0.4345, "step": 10648 }, { "epoch": 0.2938293491344671, "grad_norm": 0.0023255145642906427, "learning_rate": 0.001, "loss": 0.3798, "step": 10649 }, { "epoch": 0.29385694133553153, "grad_norm": 0.001929348916746676, "learning_rate": 0.001, "loss": 0.4208, "step": 10650 }, { "epoch": 0.2938845335365959, "grad_norm": 0.003764538327232003, "learning_rate": 0.001, "loss": 0.4241, "step": 10651 }, { "epoch": 0.29391212573766023, "grad_norm": 0.0031687645241618156, "learning_rate": 0.001, "loss": 0.3864, "step": 10652 }, { "epoch": 0.29393971793872464, "grad_norm": 0.0023800362832844257, "learning_rate": 0.001, "loss": 0.4062, "step": 10653 }, { "epoch": 0.293967310139789, "grad_norm": 0.003322491655126214, "learning_rate": 0.001, "loss": 0.3802, "step": 10654 }, { "epoch": 0.29399490234085335, "grad_norm": 0.003406172851100564, "learning_rate": 0.001, "loss": 0.3784, "step": 10655 }, { "epoch": 0.2940224945419177, "grad_norm": 0.0024194566067308187, "learning_rate": 0.001, "loss": 0.3699, "step": 10656 }, { "epoch": 0.2940500867429821, "grad_norm": 0.002812894992530346, "learning_rate": 0.001, "loss": 0.3738, "step": 10657 }, { "epoch": 0.29407767894404646, "grad_norm": 0.004144003614783287, "learning_rate": 0.001, "loss": 0.3811, "step": 10658 }, { "epoch": 0.2941052711451108, "grad_norm": 0.0025214876513928175, "learning_rate": 0.001, "loss": 0.3895, "step": 10659 }, { "epoch": 0.2941328633461752, "grad_norm": 0.0037252691108733416, "learning_rate": 0.001, "loss": 0.3846, "step": 10660 }, { "epoch": 0.2941604555472396, "grad_norm": 0.0028469781391322613, "learning_rate": 0.001, "loss": 0.4039, "step": 10661 }, { "epoch": 0.2941880477483039, "grad_norm": 0.004957746714353561, "learning_rate": 0.001, "loss": 0.3672, "step": 10662 }, { "epoch": 0.29421563994936833, "grad_norm": 0.0030585613567382097, "learning_rate": 0.001, "loss": 0.383, "step": 10663 }, { "epoch": 0.2942432321504327, "grad_norm": 0.003800381440669298, "learning_rate": 0.001, "loss": 0.4068, "step": 10664 }, { "epoch": 0.29427082435149704, "grad_norm": 0.0074830930680036545, "learning_rate": 0.001, "loss": 0.3925, "step": 10665 }, { "epoch": 0.2942984165525614, "grad_norm": 0.0026420445647090673, "learning_rate": 0.001, "loss": 0.4019, "step": 10666 }, { "epoch": 0.2943260087536258, "grad_norm": 0.0033852518536150455, "learning_rate": 0.001, "loss": 0.3826, "step": 10667 }, { "epoch": 0.29435360095469015, "grad_norm": 0.004352671559900045, "learning_rate": 0.001, "loss": 0.4167, "step": 10668 }, { "epoch": 0.2943811931557545, "grad_norm": 0.002693094778805971, "learning_rate": 0.001, "loss": 0.4035, "step": 10669 }, { "epoch": 0.2944087853568189, "grad_norm": 0.0030384764540940523, "learning_rate": 0.001, "loss": 0.3952, "step": 10670 }, { "epoch": 0.29443637755788327, "grad_norm": 0.0021408821921795607, "learning_rate": 0.001, "loss": 0.4297, "step": 10671 }, { "epoch": 0.2944639697589476, "grad_norm": 0.0028972120489925146, "learning_rate": 0.001, "loss": 0.3687, "step": 10672 }, { "epoch": 0.294491561960012, "grad_norm": 0.0024992034304887056, "learning_rate": 0.001, "loss": 0.4118, "step": 10673 }, { "epoch": 0.2945191541610764, "grad_norm": 0.004567326512187719, "learning_rate": 0.001, "loss": 0.4199, "step": 10674 }, { "epoch": 0.29454674636214073, "grad_norm": 0.0030367637518793344, "learning_rate": 0.001, "loss": 0.4081, "step": 10675 }, { "epoch": 0.2945743385632051, "grad_norm": 0.003327249316498637, "learning_rate": 0.001, "loss": 0.382, "step": 10676 }, { "epoch": 0.2946019307642695, "grad_norm": 0.00256183254532516, "learning_rate": 0.001, "loss": 0.3798, "step": 10677 }, { "epoch": 0.29462952296533385, "grad_norm": 0.004928114358335733, "learning_rate": 0.001, "loss": 0.3755, "step": 10678 }, { "epoch": 0.2946571151663982, "grad_norm": 0.0023404727689921856, "learning_rate": 0.001, "loss": 0.3945, "step": 10679 }, { "epoch": 0.2946847073674626, "grad_norm": 0.0023969600442796946, "learning_rate": 0.001, "loss": 0.4028, "step": 10680 }, { "epoch": 0.29471229956852696, "grad_norm": 0.0030103707686066628, "learning_rate": 0.001, "loss": 0.406, "step": 10681 }, { "epoch": 0.2947398917695913, "grad_norm": 0.003063908079639077, "learning_rate": 0.001, "loss": 0.4101, "step": 10682 }, { "epoch": 0.2947674839706557, "grad_norm": 0.0045205047354102135, "learning_rate": 0.001, "loss": 0.3845, "step": 10683 }, { "epoch": 0.29479507617172007, "grad_norm": 0.0176579337567091, "learning_rate": 0.001, "loss": 0.4263, "step": 10684 }, { "epoch": 0.2948226683727844, "grad_norm": 0.011839848011732101, "learning_rate": 0.001, "loss": 0.4159, "step": 10685 }, { "epoch": 0.2948502605738488, "grad_norm": 0.01756063662469387, "learning_rate": 0.001, "loss": 0.4065, "step": 10686 }, { "epoch": 0.2948778527749132, "grad_norm": 0.009184034541249275, "learning_rate": 0.001, "loss": 0.3991, "step": 10687 }, { "epoch": 0.29490544497597754, "grad_norm": 0.005395642481744289, "learning_rate": 0.001, "loss": 0.391, "step": 10688 }, { "epoch": 0.2949330371770419, "grad_norm": 0.016659358516335487, "learning_rate": 0.001, "loss": 0.3581, "step": 10689 }, { "epoch": 0.2949606293781063, "grad_norm": 0.01040132250636816, "learning_rate": 0.001, "loss": 0.3883, "step": 10690 }, { "epoch": 0.29498822157917065, "grad_norm": 0.003244626335799694, "learning_rate": 0.001, "loss": 0.3769, "step": 10691 }, { "epoch": 0.295015813780235, "grad_norm": 0.004035422578454018, "learning_rate": 0.001, "loss": 0.4067, "step": 10692 }, { "epoch": 0.2950434059812994, "grad_norm": 0.00780785595998168, "learning_rate": 0.001, "loss": 0.4043, "step": 10693 }, { "epoch": 0.29507099818236376, "grad_norm": 0.010562491603195667, "learning_rate": 0.001, "loss": 0.4504, "step": 10694 }, { "epoch": 0.2950985903834281, "grad_norm": 0.003506020875647664, "learning_rate": 0.001, "loss": 0.4049, "step": 10695 }, { "epoch": 0.29512618258449247, "grad_norm": 0.0033239612821489573, "learning_rate": 0.001, "loss": 0.4266, "step": 10696 }, { "epoch": 0.2951537747855569, "grad_norm": 0.0031016524881124496, "learning_rate": 0.001, "loss": 0.445, "step": 10697 }, { "epoch": 0.29518136698662123, "grad_norm": 0.0037100305780768394, "learning_rate": 0.001, "loss": 0.4156, "step": 10698 }, { "epoch": 0.2952089591876856, "grad_norm": 0.003935493528842926, "learning_rate": 0.001, "loss": 0.3695, "step": 10699 }, { "epoch": 0.29523655138875, "grad_norm": 0.0022481977939605713, "learning_rate": 0.001, "loss": 0.4624, "step": 10700 }, { "epoch": 0.29526414358981434, "grad_norm": 0.003701514797285199, "learning_rate": 0.001, "loss": 0.4009, "step": 10701 }, { "epoch": 0.2952917357908787, "grad_norm": 0.004181206226348877, "learning_rate": 0.001, "loss": 0.3765, "step": 10702 }, { "epoch": 0.2953193279919431, "grad_norm": 0.0034305029548704624, "learning_rate": 0.001, "loss": 0.391, "step": 10703 }, { "epoch": 0.29534692019300746, "grad_norm": 0.013461762107908726, "learning_rate": 0.001, "loss": 0.4012, "step": 10704 }, { "epoch": 0.2953745123940718, "grad_norm": 0.006567132659256458, "learning_rate": 0.001, "loss": 0.4033, "step": 10705 }, { "epoch": 0.29540210459513616, "grad_norm": 0.006370114628225565, "learning_rate": 0.001, "loss": 0.3926, "step": 10706 }, { "epoch": 0.29542969679620057, "grad_norm": 0.0061858720146119595, "learning_rate": 0.001, "loss": 0.4074, "step": 10707 }, { "epoch": 0.2954572889972649, "grad_norm": 0.0025320991408079863, "learning_rate": 0.001, "loss": 0.4031, "step": 10708 }, { "epoch": 0.2954848811983293, "grad_norm": 0.0028619118966162205, "learning_rate": 0.001, "loss": 0.4078, "step": 10709 }, { "epoch": 0.2955124733993937, "grad_norm": 0.0026225673500448465, "learning_rate": 0.001, "loss": 0.4244, "step": 10710 }, { "epoch": 0.29554006560045804, "grad_norm": 0.0035380006302148104, "learning_rate": 0.001, "loss": 0.3842, "step": 10711 }, { "epoch": 0.2955676578015224, "grad_norm": 0.0031377810519188643, "learning_rate": 0.001, "loss": 0.4318, "step": 10712 }, { "epoch": 0.2955952500025868, "grad_norm": 0.0046783569268882275, "learning_rate": 0.001, "loss": 0.3563, "step": 10713 }, { "epoch": 0.29562284220365115, "grad_norm": 0.0029727064538747072, "learning_rate": 0.001, "loss": 0.4166, "step": 10714 }, { "epoch": 0.2956504344047155, "grad_norm": 0.0036277880426496267, "learning_rate": 0.001, "loss": 0.4064, "step": 10715 }, { "epoch": 0.29567802660577985, "grad_norm": 0.002516635926440358, "learning_rate": 0.001, "loss": 0.4071, "step": 10716 }, { "epoch": 0.29570561880684426, "grad_norm": 0.0029823719523847103, "learning_rate": 0.001, "loss": 0.4081, "step": 10717 }, { "epoch": 0.2957332110079086, "grad_norm": 0.008971653878688812, "learning_rate": 0.001, "loss": 0.4054, "step": 10718 }, { "epoch": 0.29576080320897297, "grad_norm": 0.0029265042394399643, "learning_rate": 0.001, "loss": 0.4077, "step": 10719 }, { "epoch": 0.2957883954100374, "grad_norm": 0.003302923170849681, "learning_rate": 0.001, "loss": 0.4356, "step": 10720 }, { "epoch": 0.2958159876111017, "grad_norm": 0.0031180151272565126, "learning_rate": 0.001, "loss": 0.4354, "step": 10721 }, { "epoch": 0.2958435798121661, "grad_norm": 0.009599697776138783, "learning_rate": 0.001, "loss": 0.3762, "step": 10722 }, { "epoch": 0.2958711720132305, "grad_norm": 0.0027971549425274134, "learning_rate": 0.001, "loss": 0.3832, "step": 10723 }, { "epoch": 0.29589876421429484, "grad_norm": 0.0056414068676531315, "learning_rate": 0.001, "loss": 0.3877, "step": 10724 }, { "epoch": 0.2959263564153592, "grad_norm": 0.004426025785505772, "learning_rate": 0.001, "loss": 0.4151, "step": 10725 }, { "epoch": 0.29595394861642355, "grad_norm": 0.0021196724846959114, "learning_rate": 0.001, "loss": 0.4232, "step": 10726 }, { "epoch": 0.29598154081748795, "grad_norm": 0.002627891954034567, "learning_rate": 0.001, "loss": 0.4296, "step": 10727 }, { "epoch": 0.2960091330185523, "grad_norm": 0.006108472123742104, "learning_rate": 0.001, "loss": 0.3671, "step": 10728 }, { "epoch": 0.29603672521961666, "grad_norm": 0.0034290605690330267, "learning_rate": 0.001, "loss": 0.3751, "step": 10729 }, { "epoch": 0.29606431742068107, "grad_norm": 0.0039821164682507515, "learning_rate": 0.001, "loss": 0.3657, "step": 10730 }, { "epoch": 0.2960919096217454, "grad_norm": 0.0026045297272503376, "learning_rate": 0.001, "loss": 0.4055, "step": 10731 }, { "epoch": 0.29611950182280977, "grad_norm": 0.0025215772911906242, "learning_rate": 0.001, "loss": 0.3925, "step": 10732 }, { "epoch": 0.2961470940238741, "grad_norm": 0.0033087488263845444, "learning_rate": 0.001, "loss": 0.3933, "step": 10733 }, { "epoch": 0.29617468622493853, "grad_norm": 0.002935874741524458, "learning_rate": 0.001, "loss": 0.3783, "step": 10734 }, { "epoch": 0.2962022784260029, "grad_norm": 0.00557227386161685, "learning_rate": 0.001, "loss": 0.3924, "step": 10735 }, { "epoch": 0.29622987062706724, "grad_norm": 0.0036772515159100294, "learning_rate": 0.001, "loss": 0.403, "step": 10736 }, { "epoch": 0.29625746282813165, "grad_norm": 0.004311760421842337, "learning_rate": 0.001, "loss": 0.3504, "step": 10737 }, { "epoch": 0.296285055029196, "grad_norm": 0.0028161678928881884, "learning_rate": 0.001, "loss": 0.4023, "step": 10738 }, { "epoch": 0.29631264723026035, "grad_norm": 0.00484466552734375, "learning_rate": 0.001, "loss": 0.3902, "step": 10739 }, { "epoch": 0.29634023943132476, "grad_norm": 0.0023396711330860853, "learning_rate": 0.001, "loss": 0.4433, "step": 10740 }, { "epoch": 0.2963678316323891, "grad_norm": 0.0026779670733958483, "learning_rate": 0.001, "loss": 0.3784, "step": 10741 }, { "epoch": 0.29639542383345346, "grad_norm": 0.002949584275484085, "learning_rate": 0.001, "loss": 0.34, "step": 10742 }, { "epoch": 0.2964230160345178, "grad_norm": 0.00291722291149199, "learning_rate": 0.001, "loss": 0.3756, "step": 10743 }, { "epoch": 0.2964506082355822, "grad_norm": 0.00298800109885633, "learning_rate": 0.001, "loss": 0.3669, "step": 10744 }, { "epoch": 0.2964782004366466, "grad_norm": 0.003106021322309971, "learning_rate": 0.001, "loss": 0.4179, "step": 10745 }, { "epoch": 0.29650579263771093, "grad_norm": 0.002655046060681343, "learning_rate": 0.001, "loss": 0.3925, "step": 10746 }, { "epoch": 0.29653338483877534, "grad_norm": 0.003087179269641638, "learning_rate": 0.001, "loss": 0.418, "step": 10747 }, { "epoch": 0.2965609770398397, "grad_norm": 0.004109977278858423, "learning_rate": 0.001, "loss": 0.4055, "step": 10748 }, { "epoch": 0.29658856924090404, "grad_norm": 0.0026674012187868357, "learning_rate": 0.001, "loss": 0.423, "step": 10749 }, { "epoch": 0.29661616144196845, "grad_norm": 0.004147304221987724, "learning_rate": 0.001, "loss": 0.4138, "step": 10750 }, { "epoch": 0.2966437536430328, "grad_norm": 0.0035869008861482143, "learning_rate": 0.001, "loss": 0.3942, "step": 10751 }, { "epoch": 0.29667134584409716, "grad_norm": 0.0047487919218838215, "learning_rate": 0.001, "loss": 0.4298, "step": 10752 }, { "epoch": 0.2966989380451615, "grad_norm": 0.004151053261011839, "learning_rate": 0.001, "loss": 0.4299, "step": 10753 }, { "epoch": 0.2967265302462259, "grad_norm": 0.007814058102667332, "learning_rate": 0.001, "loss": 0.3843, "step": 10754 }, { "epoch": 0.29675412244729027, "grad_norm": 0.011301838792860508, "learning_rate": 0.001, "loss": 0.3942, "step": 10755 }, { "epoch": 0.2967817146483546, "grad_norm": 0.00466828653588891, "learning_rate": 0.001, "loss": 0.411, "step": 10756 }, { "epoch": 0.29680930684941903, "grad_norm": 0.002339472994208336, "learning_rate": 0.001, "loss": 0.4028, "step": 10757 }, { "epoch": 0.2968368990504834, "grad_norm": 0.0024191855918616056, "learning_rate": 0.001, "loss": 0.411, "step": 10758 }, { "epoch": 0.29686449125154774, "grad_norm": 0.003175315447151661, "learning_rate": 0.001, "loss": 0.4209, "step": 10759 }, { "epoch": 0.29689208345261214, "grad_norm": 0.010462275706231594, "learning_rate": 0.001, "loss": 0.3814, "step": 10760 }, { "epoch": 0.2969196756536765, "grad_norm": 0.002783535746857524, "learning_rate": 0.001, "loss": 0.4265, "step": 10761 }, { "epoch": 0.29694726785474085, "grad_norm": 0.0035620226990431547, "learning_rate": 0.001, "loss": 0.4312, "step": 10762 }, { "epoch": 0.2969748600558052, "grad_norm": 0.003987728152424097, "learning_rate": 0.001, "loss": 0.385, "step": 10763 }, { "epoch": 0.2970024522568696, "grad_norm": 0.0033255494199693203, "learning_rate": 0.001, "loss": 0.448, "step": 10764 }, { "epoch": 0.29703004445793396, "grad_norm": 0.0032936478964984417, "learning_rate": 0.001, "loss": 0.3832, "step": 10765 }, { "epoch": 0.2970576366589983, "grad_norm": 0.0029676929116249084, "learning_rate": 0.001, "loss": 0.4159, "step": 10766 }, { "epoch": 0.2970852288600627, "grad_norm": 0.0031973712611943483, "learning_rate": 0.001, "loss": 0.3938, "step": 10767 }, { "epoch": 0.2971128210611271, "grad_norm": 0.00365530326962471, "learning_rate": 0.001, "loss": 0.3773, "step": 10768 }, { "epoch": 0.2971404132621914, "grad_norm": 0.0027721880469471216, "learning_rate": 0.001, "loss": 0.4058, "step": 10769 }, { "epoch": 0.29716800546325584, "grad_norm": 0.003800489939749241, "learning_rate": 0.001, "loss": 0.4081, "step": 10770 }, { "epoch": 0.2971955976643202, "grad_norm": 0.0025981413200497627, "learning_rate": 0.001, "loss": 0.3701, "step": 10771 }, { "epoch": 0.29722318986538454, "grad_norm": 0.002971008885651827, "learning_rate": 0.001, "loss": 0.3769, "step": 10772 }, { "epoch": 0.2972507820664489, "grad_norm": 0.0038061172235757113, "learning_rate": 0.001, "loss": 0.3942, "step": 10773 }, { "epoch": 0.2972783742675133, "grad_norm": 0.006774569861590862, "learning_rate": 0.001, "loss": 0.4156, "step": 10774 }, { "epoch": 0.29730596646857765, "grad_norm": 0.002431475091725588, "learning_rate": 0.001, "loss": 0.3845, "step": 10775 }, { "epoch": 0.297333558669642, "grad_norm": 0.0034196816850453615, "learning_rate": 0.001, "loss": 0.4186, "step": 10776 }, { "epoch": 0.2973611508707064, "grad_norm": 0.0041458397172391415, "learning_rate": 0.001, "loss": 0.3907, "step": 10777 }, { "epoch": 0.29738874307177077, "grad_norm": 0.0031936741434037685, "learning_rate": 0.001, "loss": 0.3969, "step": 10778 }, { "epoch": 0.2974163352728351, "grad_norm": 0.0030546991620212793, "learning_rate": 0.001, "loss": 0.3683, "step": 10779 }, { "epoch": 0.2974439274738995, "grad_norm": 0.0025455260183662176, "learning_rate": 0.001, "loss": 0.4408, "step": 10780 }, { "epoch": 0.2974715196749639, "grad_norm": 0.002307919319719076, "learning_rate": 0.001, "loss": 0.4057, "step": 10781 }, { "epoch": 0.29749911187602823, "grad_norm": 0.0028700283728539944, "learning_rate": 0.001, "loss": 0.3809, "step": 10782 }, { "epoch": 0.2975267040770926, "grad_norm": 0.0026034286711364985, "learning_rate": 0.001, "loss": 0.4032, "step": 10783 }, { "epoch": 0.297554296278157, "grad_norm": 0.005439944099634886, "learning_rate": 0.001, "loss": 0.3849, "step": 10784 }, { "epoch": 0.29758188847922135, "grad_norm": 0.004825190175324678, "learning_rate": 0.001, "loss": 0.3851, "step": 10785 }, { "epoch": 0.2976094806802857, "grad_norm": 0.0020885509438812733, "learning_rate": 0.001, "loss": 0.3967, "step": 10786 }, { "epoch": 0.2976370728813501, "grad_norm": 0.0027620510663837194, "learning_rate": 0.001, "loss": 0.436, "step": 10787 }, { "epoch": 0.29766466508241446, "grad_norm": 0.002639887621626258, "learning_rate": 0.001, "loss": 0.3654, "step": 10788 }, { "epoch": 0.2976922572834788, "grad_norm": 0.0022529284469783306, "learning_rate": 0.001, "loss": 0.4011, "step": 10789 }, { "epoch": 0.2977198494845432, "grad_norm": 0.0026258870493620634, "learning_rate": 0.001, "loss": 0.4102, "step": 10790 }, { "epoch": 0.2977474416856076, "grad_norm": 0.0029656942933797836, "learning_rate": 0.001, "loss": 0.4083, "step": 10791 }, { "epoch": 0.2977750338866719, "grad_norm": 0.0020525925792753696, "learning_rate": 0.001, "loss": 0.4201, "step": 10792 }, { "epoch": 0.2978026260877363, "grad_norm": 0.0030268540140241385, "learning_rate": 0.001, "loss": 0.4068, "step": 10793 }, { "epoch": 0.2978302182888007, "grad_norm": 0.003296051872894168, "learning_rate": 0.001, "loss": 0.3897, "step": 10794 }, { "epoch": 0.29785781048986504, "grad_norm": 0.002267766511067748, "learning_rate": 0.001, "loss": 0.4141, "step": 10795 }, { "epoch": 0.2978854026909294, "grad_norm": 0.0039958711713552475, "learning_rate": 0.001, "loss": 0.3643, "step": 10796 }, { "epoch": 0.2979129948919938, "grad_norm": 0.003096002619713545, "learning_rate": 0.001, "loss": 0.356, "step": 10797 }, { "epoch": 0.29794058709305815, "grad_norm": 0.004919158294796944, "learning_rate": 0.001, "loss": 0.3677, "step": 10798 }, { "epoch": 0.2979681792941225, "grad_norm": 0.002651217393577099, "learning_rate": 0.001, "loss": 0.397, "step": 10799 }, { "epoch": 0.2979957714951869, "grad_norm": 0.0031349128112196922, "learning_rate": 0.001, "loss": 0.3819, "step": 10800 }, { "epoch": 0.29802336369625126, "grad_norm": 0.0057730907574296, "learning_rate": 0.001, "loss": 0.3844, "step": 10801 }, { "epoch": 0.2980509558973156, "grad_norm": 0.004934222903102636, "learning_rate": 0.001, "loss": 0.3653, "step": 10802 }, { "epoch": 0.29807854809837997, "grad_norm": 0.003473837161436677, "learning_rate": 0.001, "loss": 0.3967, "step": 10803 }, { "epoch": 0.2981061402994444, "grad_norm": 0.0036314206663519144, "learning_rate": 0.001, "loss": 0.3985, "step": 10804 }, { "epoch": 0.29813373250050873, "grad_norm": 0.004540051333606243, "learning_rate": 0.001, "loss": 0.3885, "step": 10805 }, { "epoch": 0.2981613247015731, "grad_norm": 0.002627008128911257, "learning_rate": 0.001, "loss": 0.4118, "step": 10806 }, { "epoch": 0.2981889169026375, "grad_norm": 0.0027296175248920918, "learning_rate": 0.001, "loss": 0.3905, "step": 10807 }, { "epoch": 0.29821650910370184, "grad_norm": 0.0026647706981748343, "learning_rate": 0.001, "loss": 0.4231, "step": 10808 }, { "epoch": 0.2982441013047662, "grad_norm": 0.003241637721657753, "learning_rate": 0.001, "loss": 0.3983, "step": 10809 }, { "epoch": 0.2982716935058306, "grad_norm": 0.0028002928011119366, "learning_rate": 0.001, "loss": 0.4148, "step": 10810 }, { "epoch": 0.29829928570689496, "grad_norm": 0.0028025200590491295, "learning_rate": 0.001, "loss": 0.4006, "step": 10811 }, { "epoch": 0.2983268779079593, "grad_norm": 0.002906485227867961, "learning_rate": 0.001, "loss": 0.3997, "step": 10812 }, { "epoch": 0.29835447010902366, "grad_norm": 0.0039535220712423325, "learning_rate": 0.001, "loss": 0.4075, "step": 10813 }, { "epoch": 0.29838206231008807, "grad_norm": 0.0033603734336793423, "learning_rate": 0.001, "loss": 0.3703, "step": 10814 }, { "epoch": 0.2984096545111524, "grad_norm": 0.0031661302782595158, "learning_rate": 0.001, "loss": 0.3992, "step": 10815 }, { "epoch": 0.2984372467122168, "grad_norm": 0.002344726352021098, "learning_rate": 0.001, "loss": 0.3914, "step": 10816 }, { "epoch": 0.2984648389132812, "grad_norm": 0.0025039922911673784, "learning_rate": 0.001, "loss": 0.398, "step": 10817 }, { "epoch": 0.29849243111434554, "grad_norm": 0.002225354313850403, "learning_rate": 0.001, "loss": 0.425, "step": 10818 }, { "epoch": 0.2985200233154099, "grad_norm": 0.0027313402388244867, "learning_rate": 0.001, "loss": 0.3782, "step": 10819 }, { "epoch": 0.2985476155164743, "grad_norm": 0.017960865050554276, "learning_rate": 0.001, "loss": 0.4091, "step": 10820 }, { "epoch": 0.29857520771753865, "grad_norm": 0.011598210781812668, "learning_rate": 0.001, "loss": 0.3751, "step": 10821 }, { "epoch": 0.298602799918603, "grad_norm": 0.00278772902674973, "learning_rate": 0.001, "loss": 0.4128, "step": 10822 }, { "epoch": 0.29863039211966735, "grad_norm": 0.0026171396020799875, "learning_rate": 0.001, "loss": 0.4718, "step": 10823 }, { "epoch": 0.29865798432073176, "grad_norm": 0.0038320801686495543, "learning_rate": 0.001, "loss": 0.3419, "step": 10824 }, { "epoch": 0.2986855765217961, "grad_norm": 0.002889628754928708, "learning_rate": 0.001, "loss": 0.4093, "step": 10825 }, { "epoch": 0.29871316872286047, "grad_norm": 0.003588633146136999, "learning_rate": 0.001, "loss": 0.3945, "step": 10826 }, { "epoch": 0.2987407609239249, "grad_norm": 0.009326872415840626, "learning_rate": 0.001, "loss": 0.4037, "step": 10827 }, { "epoch": 0.2987683531249892, "grad_norm": 0.003909088671207428, "learning_rate": 0.001, "loss": 0.407, "step": 10828 }, { "epoch": 0.2987959453260536, "grad_norm": 0.002797875087708235, "learning_rate": 0.001, "loss": 0.3514, "step": 10829 }, { "epoch": 0.29882353752711793, "grad_norm": 0.003469844814389944, "learning_rate": 0.001, "loss": 0.4057, "step": 10830 }, { "epoch": 0.29885112972818234, "grad_norm": 0.0033456028904765844, "learning_rate": 0.001, "loss": 0.3879, "step": 10831 }, { "epoch": 0.2988787219292467, "grad_norm": 0.034299980849027634, "learning_rate": 0.001, "loss": 0.3644, "step": 10832 }, { "epoch": 0.29890631413031105, "grad_norm": 0.003411337733268738, "learning_rate": 0.001, "loss": 0.3837, "step": 10833 }, { "epoch": 0.29893390633137545, "grad_norm": 0.0031848186627030373, "learning_rate": 0.001, "loss": 0.3845, "step": 10834 }, { "epoch": 0.2989614985324398, "grad_norm": 0.004304205067455769, "learning_rate": 0.001, "loss": 0.3718, "step": 10835 }, { "epoch": 0.29898909073350416, "grad_norm": 0.002746276091784239, "learning_rate": 0.001, "loss": 0.3903, "step": 10836 }, { "epoch": 0.29901668293456857, "grad_norm": 0.002491393592208624, "learning_rate": 0.001, "loss": 0.4229, "step": 10837 }, { "epoch": 0.2990442751356329, "grad_norm": 0.0038965316489338875, "learning_rate": 0.001, "loss": 0.3433, "step": 10838 }, { "epoch": 0.2990718673366973, "grad_norm": 0.0028245735447853804, "learning_rate": 0.001, "loss": 0.3994, "step": 10839 }, { "epoch": 0.2990994595377616, "grad_norm": 0.0021889859344810247, "learning_rate": 0.001, "loss": 0.4225, "step": 10840 }, { "epoch": 0.29912705173882603, "grad_norm": 0.0026167482137680054, "learning_rate": 0.001, "loss": 0.4401, "step": 10841 }, { "epoch": 0.2991546439398904, "grad_norm": 0.0028761785943061113, "learning_rate": 0.001, "loss": 0.3929, "step": 10842 }, { "epoch": 0.29918223614095474, "grad_norm": 0.0031041146721690893, "learning_rate": 0.001, "loss": 0.3791, "step": 10843 }, { "epoch": 0.29920982834201915, "grad_norm": 0.002183483447879553, "learning_rate": 0.001, "loss": 0.4201, "step": 10844 }, { "epoch": 0.2992374205430835, "grad_norm": 0.0021024607121944427, "learning_rate": 0.001, "loss": 0.3916, "step": 10845 }, { "epoch": 0.29926501274414785, "grad_norm": 0.004469338804483414, "learning_rate": 0.001, "loss": 0.3661, "step": 10846 }, { "epoch": 0.29929260494521226, "grad_norm": 0.002203304087743163, "learning_rate": 0.001, "loss": 0.3981, "step": 10847 }, { "epoch": 0.2993201971462766, "grad_norm": 0.002491022925823927, "learning_rate": 0.001, "loss": 0.3879, "step": 10848 }, { "epoch": 0.29934778934734096, "grad_norm": 0.005152471829205751, "learning_rate": 0.001, "loss": 0.3898, "step": 10849 }, { "epoch": 0.2993753815484053, "grad_norm": 0.002667092252522707, "learning_rate": 0.001, "loss": 0.4063, "step": 10850 }, { "epoch": 0.2994029737494697, "grad_norm": 0.002084558829665184, "learning_rate": 0.001, "loss": 0.4005, "step": 10851 }, { "epoch": 0.2994305659505341, "grad_norm": 0.004005138296633959, "learning_rate": 0.001, "loss": 0.3918, "step": 10852 }, { "epoch": 0.29945815815159843, "grad_norm": 0.002981225959956646, "learning_rate": 0.001, "loss": 0.4126, "step": 10853 }, { "epoch": 0.29948575035266284, "grad_norm": 0.002931734314188361, "learning_rate": 0.001, "loss": 0.3983, "step": 10854 }, { "epoch": 0.2995133425537272, "grad_norm": 0.002261395798996091, "learning_rate": 0.001, "loss": 0.4082, "step": 10855 }, { "epoch": 0.29954093475479154, "grad_norm": 0.0047109718434512615, "learning_rate": 0.001, "loss": 0.3924, "step": 10856 }, { "epoch": 0.29956852695585595, "grad_norm": 0.003906469792127609, "learning_rate": 0.001, "loss": 0.4029, "step": 10857 }, { "epoch": 0.2995961191569203, "grad_norm": 0.003466276917606592, "learning_rate": 0.001, "loss": 0.3953, "step": 10858 }, { "epoch": 0.29962371135798466, "grad_norm": 0.005029084160923958, "learning_rate": 0.001, "loss": 0.4143, "step": 10859 }, { "epoch": 0.299651303559049, "grad_norm": 0.0030740953516215086, "learning_rate": 0.001, "loss": 0.402, "step": 10860 }, { "epoch": 0.2996788957601134, "grad_norm": 0.00288767390884459, "learning_rate": 0.001, "loss": 0.3996, "step": 10861 }, { "epoch": 0.29970648796117777, "grad_norm": 0.004084376618266106, "learning_rate": 0.001, "loss": 0.408, "step": 10862 }, { "epoch": 0.2997340801622421, "grad_norm": 0.008121359162032604, "learning_rate": 0.001, "loss": 0.3961, "step": 10863 }, { "epoch": 0.29976167236330653, "grad_norm": 0.0030457088723778725, "learning_rate": 0.001, "loss": 0.406, "step": 10864 }, { "epoch": 0.2997892645643709, "grad_norm": 0.0024807127192616463, "learning_rate": 0.001, "loss": 0.4243, "step": 10865 }, { "epoch": 0.29981685676543524, "grad_norm": 0.0041881585493683815, "learning_rate": 0.001, "loss": 0.3817, "step": 10866 }, { "epoch": 0.29984444896649964, "grad_norm": 0.005836515221744776, "learning_rate": 0.001, "loss": 0.4146, "step": 10867 }, { "epoch": 0.299872041167564, "grad_norm": 0.003004920668900013, "learning_rate": 0.001, "loss": 0.377, "step": 10868 }, { "epoch": 0.29989963336862835, "grad_norm": 0.0062048486433923244, "learning_rate": 0.001, "loss": 0.3794, "step": 10869 }, { "epoch": 0.2999272255696927, "grad_norm": 0.0027396834921091795, "learning_rate": 0.001, "loss": 0.375, "step": 10870 }, { "epoch": 0.2999548177707571, "grad_norm": 0.0027734714094549417, "learning_rate": 0.001, "loss": 0.3982, "step": 10871 }, { "epoch": 0.29998240997182146, "grad_norm": 0.0023340985644608736, "learning_rate": 0.001, "loss": 0.4122, "step": 10872 }, { "epoch": 0.3000100021728858, "grad_norm": 0.005321372300386429, "learning_rate": 0.001, "loss": 0.4037, "step": 10873 }, { "epoch": 0.3000375943739502, "grad_norm": 0.002819483634084463, "learning_rate": 0.001, "loss": 0.3661, "step": 10874 }, { "epoch": 0.3000651865750146, "grad_norm": 0.0026980694383382797, "learning_rate": 0.001, "loss": 0.4059, "step": 10875 }, { "epoch": 0.30009277877607893, "grad_norm": 0.003244214691221714, "learning_rate": 0.001, "loss": 0.3792, "step": 10876 }, { "epoch": 0.30012037097714334, "grad_norm": 0.004197717644274235, "learning_rate": 0.001, "loss": 0.4162, "step": 10877 }, { "epoch": 0.3001479631782077, "grad_norm": 0.0026915615890175104, "learning_rate": 0.001, "loss": 0.3901, "step": 10878 }, { "epoch": 0.30017555537927204, "grad_norm": 0.003251213114708662, "learning_rate": 0.001, "loss": 0.4024, "step": 10879 }, { "epoch": 0.3002031475803364, "grad_norm": 0.004156854934990406, "learning_rate": 0.001, "loss": 0.4144, "step": 10880 }, { "epoch": 0.3002307397814008, "grad_norm": 0.0028878510929644108, "learning_rate": 0.001, "loss": 0.3931, "step": 10881 }, { "epoch": 0.30025833198246515, "grad_norm": 0.003199911443516612, "learning_rate": 0.001, "loss": 0.3568, "step": 10882 }, { "epoch": 0.3002859241835295, "grad_norm": 0.002910776762291789, "learning_rate": 0.001, "loss": 0.4044, "step": 10883 }, { "epoch": 0.3003135163845939, "grad_norm": 0.004030926618725061, "learning_rate": 0.001, "loss": 0.394, "step": 10884 }, { "epoch": 0.30034110858565827, "grad_norm": 0.004950513131916523, "learning_rate": 0.001, "loss": 0.3878, "step": 10885 }, { "epoch": 0.3003687007867226, "grad_norm": 0.0032172142527997494, "learning_rate": 0.001, "loss": 0.4344, "step": 10886 }, { "epoch": 0.30039629298778703, "grad_norm": 0.002721422351896763, "learning_rate": 0.001, "loss": 0.3552, "step": 10887 }, { "epoch": 0.3004238851888514, "grad_norm": 0.0029736789874732494, "learning_rate": 0.001, "loss": 0.4153, "step": 10888 }, { "epoch": 0.30045147738991573, "grad_norm": 0.0037150525022298098, "learning_rate": 0.001, "loss": 0.4114, "step": 10889 }, { "epoch": 0.3004790695909801, "grad_norm": 0.002336825244128704, "learning_rate": 0.001, "loss": 0.3982, "step": 10890 }, { "epoch": 0.3005066617920445, "grad_norm": 0.0033320027869194746, "learning_rate": 0.001, "loss": 0.4131, "step": 10891 }, { "epoch": 0.30053425399310885, "grad_norm": 0.002994644921272993, "learning_rate": 0.001, "loss": 0.3913, "step": 10892 }, { "epoch": 0.3005618461941732, "grad_norm": 0.002780053298920393, "learning_rate": 0.001, "loss": 0.4264, "step": 10893 }, { "epoch": 0.3005894383952376, "grad_norm": 0.0025280967820435762, "learning_rate": 0.001, "loss": 0.4108, "step": 10894 }, { "epoch": 0.30061703059630196, "grad_norm": 0.003201976651325822, "learning_rate": 0.001, "loss": 0.4263, "step": 10895 }, { "epoch": 0.3006446227973663, "grad_norm": 0.0020535339135676622, "learning_rate": 0.001, "loss": 0.4297, "step": 10896 }, { "epoch": 0.3006722149984307, "grad_norm": 0.0021851370111107826, "learning_rate": 0.001, "loss": 0.4011, "step": 10897 }, { "epoch": 0.3006998071994951, "grad_norm": 0.003310294123366475, "learning_rate": 0.001, "loss": 0.3851, "step": 10898 }, { "epoch": 0.3007273994005594, "grad_norm": 0.002345843706279993, "learning_rate": 0.001, "loss": 0.3874, "step": 10899 }, { "epoch": 0.3007549916016238, "grad_norm": 0.004679036792367697, "learning_rate": 0.001, "loss": 0.389, "step": 10900 }, { "epoch": 0.3007825838026882, "grad_norm": 0.0026619029231369495, "learning_rate": 0.001, "loss": 0.3923, "step": 10901 }, { "epoch": 0.30081017600375254, "grad_norm": 0.003363067051395774, "learning_rate": 0.001, "loss": 0.3938, "step": 10902 }, { "epoch": 0.3008377682048169, "grad_norm": 0.0038764209020882845, "learning_rate": 0.001, "loss": 0.3845, "step": 10903 }, { "epoch": 0.3008653604058813, "grad_norm": 0.005055352114140987, "learning_rate": 0.001, "loss": 0.382, "step": 10904 }, { "epoch": 0.30089295260694565, "grad_norm": 0.0033133430406451225, "learning_rate": 0.001, "loss": 0.3822, "step": 10905 }, { "epoch": 0.30092054480801, "grad_norm": 0.002643629675731063, "learning_rate": 0.001, "loss": 0.4331, "step": 10906 }, { "epoch": 0.3009481370090744, "grad_norm": 0.010275435633957386, "learning_rate": 0.001, "loss": 0.42, "step": 10907 }, { "epoch": 0.30097572921013876, "grad_norm": 0.0022841233294457197, "learning_rate": 0.001, "loss": 0.4112, "step": 10908 }, { "epoch": 0.3010033214112031, "grad_norm": 0.00588460685685277, "learning_rate": 0.001, "loss": 0.4, "step": 10909 }, { "epoch": 0.30103091361226747, "grad_norm": 0.003090951358899474, "learning_rate": 0.001, "loss": 0.3985, "step": 10910 }, { "epoch": 0.3010585058133319, "grad_norm": 0.011329708620905876, "learning_rate": 0.001, "loss": 0.4228, "step": 10911 }, { "epoch": 0.30108609801439623, "grad_norm": 0.002050732960924506, "learning_rate": 0.001, "loss": 0.4334, "step": 10912 }, { "epoch": 0.3011136902154606, "grad_norm": 0.005159840919077396, "learning_rate": 0.001, "loss": 0.3852, "step": 10913 }, { "epoch": 0.301141282416525, "grad_norm": 0.002075582044199109, "learning_rate": 0.001, "loss": 0.3713, "step": 10914 }, { "epoch": 0.30116887461758934, "grad_norm": 0.0043141464702785015, "learning_rate": 0.001, "loss": 0.3896, "step": 10915 }, { "epoch": 0.3011964668186537, "grad_norm": 0.0034079302567988634, "learning_rate": 0.001, "loss": 0.4032, "step": 10916 }, { "epoch": 0.30122405901971805, "grad_norm": 0.0023484264966100454, "learning_rate": 0.001, "loss": 0.3907, "step": 10917 }, { "epoch": 0.30125165122078246, "grad_norm": 0.0037031807005405426, "learning_rate": 0.001, "loss": 0.4304, "step": 10918 }, { "epoch": 0.3012792434218468, "grad_norm": 0.0023840791545808315, "learning_rate": 0.001, "loss": 0.4139, "step": 10919 }, { "epoch": 0.30130683562291116, "grad_norm": 0.002175979781895876, "learning_rate": 0.001, "loss": 0.4065, "step": 10920 }, { "epoch": 0.30133442782397557, "grad_norm": 0.0025980628561228514, "learning_rate": 0.001, "loss": 0.4119, "step": 10921 }, { "epoch": 0.3013620200250399, "grad_norm": 0.012000390328466892, "learning_rate": 0.001, "loss": 0.3716, "step": 10922 }, { "epoch": 0.3013896122261043, "grad_norm": 0.002536133164539933, "learning_rate": 0.001, "loss": 0.372, "step": 10923 }, { "epoch": 0.3014172044271687, "grad_norm": 0.002884393557906151, "learning_rate": 0.001, "loss": 0.377, "step": 10924 }, { "epoch": 0.30144479662823304, "grad_norm": 0.002150443848222494, "learning_rate": 0.001, "loss": 0.4188, "step": 10925 }, { "epoch": 0.3014723888292974, "grad_norm": 0.0027890305500477552, "learning_rate": 0.001, "loss": 0.3784, "step": 10926 }, { "epoch": 0.30149998103036174, "grad_norm": 0.0049143158830702305, "learning_rate": 0.001, "loss": 0.3985, "step": 10927 }, { "epoch": 0.30152757323142615, "grad_norm": 0.003542589023709297, "learning_rate": 0.001, "loss": 0.3984, "step": 10928 }, { "epoch": 0.3015551654324905, "grad_norm": 0.002202375093474984, "learning_rate": 0.001, "loss": 0.4332, "step": 10929 }, { "epoch": 0.30158275763355485, "grad_norm": 0.0022870609536767006, "learning_rate": 0.001, "loss": 0.4504, "step": 10930 }, { "epoch": 0.30161034983461926, "grad_norm": 0.0055159348994493484, "learning_rate": 0.001, "loss": 0.3928, "step": 10931 }, { "epoch": 0.3016379420356836, "grad_norm": 0.03571107238531113, "learning_rate": 0.001, "loss": 0.3786, "step": 10932 }, { "epoch": 0.30166553423674797, "grad_norm": 0.004229827784001827, "learning_rate": 0.001, "loss": 0.3811, "step": 10933 }, { "epoch": 0.3016931264378124, "grad_norm": 0.0025944889057427645, "learning_rate": 0.001, "loss": 0.4141, "step": 10934 }, { "epoch": 0.30172071863887673, "grad_norm": 0.007089374121278524, "learning_rate": 0.001, "loss": 0.403, "step": 10935 }, { "epoch": 0.3017483108399411, "grad_norm": 0.002950213151052594, "learning_rate": 0.001, "loss": 0.3948, "step": 10936 }, { "epoch": 0.30177590304100543, "grad_norm": 0.006799718365073204, "learning_rate": 0.001, "loss": 0.4585, "step": 10937 }, { "epoch": 0.30180349524206984, "grad_norm": 0.004761769436299801, "learning_rate": 0.001, "loss": 0.4065, "step": 10938 }, { "epoch": 0.3018310874431342, "grad_norm": 0.004374027717858553, "learning_rate": 0.001, "loss": 0.3667, "step": 10939 }, { "epoch": 0.30185867964419855, "grad_norm": 0.007258753292262554, "learning_rate": 0.001, "loss": 0.3262, "step": 10940 }, { "epoch": 0.30188627184526295, "grad_norm": 0.0028893048875033855, "learning_rate": 0.001, "loss": 0.4137, "step": 10941 }, { "epoch": 0.3019138640463273, "grad_norm": 0.003643547184765339, "learning_rate": 0.001, "loss": 0.3969, "step": 10942 }, { "epoch": 0.30194145624739166, "grad_norm": 0.0045185210183262825, "learning_rate": 0.001, "loss": 0.4042, "step": 10943 }, { "epoch": 0.30196904844845607, "grad_norm": 0.0024632923305034637, "learning_rate": 0.001, "loss": 0.4148, "step": 10944 }, { "epoch": 0.3019966406495204, "grad_norm": 0.0027886577881872654, "learning_rate": 0.001, "loss": 0.4051, "step": 10945 }, { "epoch": 0.3020242328505848, "grad_norm": 0.0049835750833153725, "learning_rate": 0.001, "loss": 0.3886, "step": 10946 }, { "epoch": 0.3020518250516491, "grad_norm": 0.003602303098887205, "learning_rate": 0.001, "loss": 0.3975, "step": 10947 }, { "epoch": 0.30207941725271353, "grad_norm": 0.0027262901421636343, "learning_rate": 0.001, "loss": 0.4128, "step": 10948 }, { "epoch": 0.3021070094537779, "grad_norm": 0.0026782192289829254, "learning_rate": 0.001, "loss": 0.4065, "step": 10949 }, { "epoch": 0.30213460165484224, "grad_norm": 0.0033344633411616087, "learning_rate": 0.001, "loss": 0.37, "step": 10950 }, { "epoch": 0.30216219385590665, "grad_norm": 0.007024961058050394, "learning_rate": 0.001, "loss": 0.4372, "step": 10951 }, { "epoch": 0.302189786056971, "grad_norm": 0.0027252105064690113, "learning_rate": 0.001, "loss": 0.435, "step": 10952 }, { "epoch": 0.30221737825803535, "grad_norm": 0.0036616462748497725, "learning_rate": 0.001, "loss": 0.3623, "step": 10953 }, { "epoch": 0.30224497045909976, "grad_norm": 0.0024184328503906727, "learning_rate": 0.001, "loss": 0.3791, "step": 10954 }, { "epoch": 0.3022725626601641, "grad_norm": 0.002676165895536542, "learning_rate": 0.001, "loss": 0.3811, "step": 10955 }, { "epoch": 0.30230015486122847, "grad_norm": 0.00226954510435462, "learning_rate": 0.001, "loss": 0.3708, "step": 10956 }, { "epoch": 0.3023277470622928, "grad_norm": 0.002427699277177453, "learning_rate": 0.001, "loss": 0.4012, "step": 10957 }, { "epoch": 0.3023553392633572, "grad_norm": 0.002578388201072812, "learning_rate": 0.001, "loss": 0.3645, "step": 10958 }, { "epoch": 0.3023829314644216, "grad_norm": 0.0028099005576223135, "learning_rate": 0.001, "loss": 0.3625, "step": 10959 }, { "epoch": 0.30241052366548593, "grad_norm": 0.0038160127587616444, "learning_rate": 0.001, "loss": 0.4079, "step": 10960 }, { "epoch": 0.30243811586655034, "grad_norm": 0.004297412466257811, "learning_rate": 0.001, "loss": 0.3963, "step": 10961 }, { "epoch": 0.3024657080676147, "grad_norm": 0.004003297537565231, "learning_rate": 0.001, "loss": 0.3661, "step": 10962 }, { "epoch": 0.30249330026867904, "grad_norm": 0.003333853790536523, "learning_rate": 0.001, "loss": 0.3919, "step": 10963 }, { "epoch": 0.30252089246974345, "grad_norm": 0.007552805822342634, "learning_rate": 0.001, "loss": 0.3821, "step": 10964 }, { "epoch": 0.3025484846708078, "grad_norm": 0.002244778210297227, "learning_rate": 0.001, "loss": 0.4141, "step": 10965 }, { "epoch": 0.30257607687187216, "grad_norm": 0.0028706330340355635, "learning_rate": 0.001, "loss": 0.4069, "step": 10966 }, { "epoch": 0.3026036690729365, "grad_norm": 0.002905101515352726, "learning_rate": 0.001, "loss": 0.3895, "step": 10967 }, { "epoch": 0.3026312612740009, "grad_norm": 0.0025956053286790848, "learning_rate": 0.001, "loss": 0.3999, "step": 10968 }, { "epoch": 0.30265885347506527, "grad_norm": 0.0038647784385830164, "learning_rate": 0.001, "loss": 0.3859, "step": 10969 }, { "epoch": 0.3026864456761296, "grad_norm": 0.004919635597616434, "learning_rate": 0.001, "loss": 0.4256, "step": 10970 }, { "epoch": 0.30271403787719403, "grad_norm": 0.0035008483100682497, "learning_rate": 0.001, "loss": 0.3939, "step": 10971 }, { "epoch": 0.3027416300782584, "grad_norm": 0.0027946606278419495, "learning_rate": 0.001, "loss": 0.4249, "step": 10972 }, { "epoch": 0.30276922227932274, "grad_norm": 0.0031994767487049103, "learning_rate": 0.001, "loss": 0.3945, "step": 10973 }, { "epoch": 0.30279681448038714, "grad_norm": 0.006230973172932863, "learning_rate": 0.001, "loss": 0.4049, "step": 10974 }, { "epoch": 0.3028244066814515, "grad_norm": 0.003084323601797223, "learning_rate": 0.001, "loss": 0.4212, "step": 10975 }, { "epoch": 0.30285199888251585, "grad_norm": 0.0022413854021579027, "learning_rate": 0.001, "loss": 0.4093, "step": 10976 }, { "epoch": 0.3028795910835802, "grad_norm": 0.0032844298984855413, "learning_rate": 0.001, "loss": 0.4187, "step": 10977 }, { "epoch": 0.3029071832846446, "grad_norm": 0.002548948395997286, "learning_rate": 0.001, "loss": 0.395, "step": 10978 }, { "epoch": 0.30293477548570896, "grad_norm": 0.0052003706805408, "learning_rate": 0.001, "loss": 0.4372, "step": 10979 }, { "epoch": 0.3029623676867733, "grad_norm": 0.003729903372004628, "learning_rate": 0.001, "loss": 0.3965, "step": 10980 }, { "epoch": 0.3029899598878377, "grad_norm": 0.0028468905948102474, "learning_rate": 0.001, "loss": 0.3932, "step": 10981 }, { "epoch": 0.3030175520889021, "grad_norm": 0.0020310785621404648, "learning_rate": 0.001, "loss": 0.4303, "step": 10982 }, { "epoch": 0.30304514428996643, "grad_norm": 0.002875153673812747, "learning_rate": 0.001, "loss": 0.3977, "step": 10983 }, { "epoch": 0.30307273649103084, "grad_norm": 0.003797962563112378, "learning_rate": 0.001, "loss": 0.4152, "step": 10984 }, { "epoch": 0.3031003286920952, "grad_norm": 0.003318538423627615, "learning_rate": 0.001, "loss": 0.4107, "step": 10985 }, { "epoch": 0.30312792089315954, "grad_norm": 0.0027376478537917137, "learning_rate": 0.001, "loss": 0.3777, "step": 10986 }, { "epoch": 0.3031555130942239, "grad_norm": 0.0021703215315937996, "learning_rate": 0.001, "loss": 0.4027, "step": 10987 }, { "epoch": 0.3031831052952883, "grad_norm": 0.00254058837890625, "learning_rate": 0.001, "loss": 0.3964, "step": 10988 }, { "epoch": 0.30321069749635265, "grad_norm": 0.0031664727721363306, "learning_rate": 0.001, "loss": 0.3666, "step": 10989 }, { "epoch": 0.303238289697417, "grad_norm": 0.003056199988350272, "learning_rate": 0.001, "loss": 0.3985, "step": 10990 }, { "epoch": 0.3032658818984814, "grad_norm": 0.002997304080054164, "learning_rate": 0.001, "loss": 0.4234, "step": 10991 }, { "epoch": 0.30329347409954577, "grad_norm": 0.0028167872224003077, "learning_rate": 0.001, "loss": 0.4345, "step": 10992 }, { "epoch": 0.3033210663006101, "grad_norm": 0.0038003227673470974, "learning_rate": 0.001, "loss": 0.4154, "step": 10993 }, { "epoch": 0.30334865850167453, "grad_norm": 0.010225886479020119, "learning_rate": 0.001, "loss": 0.3884, "step": 10994 }, { "epoch": 0.3033762507027389, "grad_norm": 0.004211194813251495, "learning_rate": 0.001, "loss": 0.3673, "step": 10995 }, { "epoch": 0.30340384290380323, "grad_norm": 0.0034563657827675343, "learning_rate": 0.001, "loss": 0.4128, "step": 10996 }, { "epoch": 0.3034314351048676, "grad_norm": 0.0032761215697973967, "learning_rate": 0.001, "loss": 0.3444, "step": 10997 }, { "epoch": 0.303459027305932, "grad_norm": 0.0042040105909109116, "learning_rate": 0.001, "loss": 0.3889, "step": 10998 }, { "epoch": 0.30348661950699635, "grad_norm": 0.005281214602291584, "learning_rate": 0.001, "loss": 0.4308, "step": 10999 }, { "epoch": 0.3035142117080607, "grad_norm": 0.0027140434831380844, "learning_rate": 0.001, "loss": 0.414, "step": 11000 }, { "epoch": 0.3035142117080607, "eval_runtime": 23.9221, "eval_samples_per_second": 1.338, "eval_steps_per_second": 0.167, "step": 11000 }, { "epoch": 0.3035418039091251, "grad_norm": 0.0059446897357702255, "learning_rate": 0.001, "loss": 0.3585, "step": 11001 }, { "epoch": 0.30356939611018946, "grad_norm": 0.002668326487764716, "learning_rate": 0.001, "loss": 0.3736, "step": 11002 }, { "epoch": 0.3035969883112538, "grad_norm": 0.002568204887211323, "learning_rate": 0.001, "loss": 0.3838, "step": 11003 }, { "epoch": 0.3036245805123182, "grad_norm": 0.002712225541472435, "learning_rate": 0.001, "loss": 0.4075, "step": 11004 }, { "epoch": 0.3036521727133826, "grad_norm": 0.004501170478761196, "learning_rate": 0.001, "loss": 0.3691, "step": 11005 }, { "epoch": 0.3036797649144469, "grad_norm": 0.005471101030707359, "learning_rate": 0.001, "loss": 0.3584, "step": 11006 }, { "epoch": 0.3037073571155113, "grad_norm": 0.0026964505668729544, "learning_rate": 0.001, "loss": 0.4092, "step": 11007 }, { "epoch": 0.3037349493165757, "grad_norm": 0.003707374446094036, "learning_rate": 0.001, "loss": 0.3823, "step": 11008 }, { "epoch": 0.30376254151764004, "grad_norm": 0.012931153178215027, "learning_rate": 0.001, "loss": 0.4105, "step": 11009 }, { "epoch": 0.3037901337187044, "grad_norm": 0.009963351301848888, "learning_rate": 0.001, "loss": 0.3633, "step": 11010 }, { "epoch": 0.3038177259197688, "grad_norm": 0.0042528132908046246, "learning_rate": 0.001, "loss": 0.4651, "step": 11011 }, { "epoch": 0.30384531812083315, "grad_norm": 0.004563808441162109, "learning_rate": 0.001, "loss": 0.4025, "step": 11012 }, { "epoch": 0.3038729103218975, "grad_norm": 0.0027470001950860023, "learning_rate": 0.001, "loss": 0.4025, "step": 11013 }, { "epoch": 0.30390050252296186, "grad_norm": 0.0026821244973689318, "learning_rate": 0.001, "loss": 0.3991, "step": 11014 }, { "epoch": 0.30392809472402627, "grad_norm": 0.002528084209188819, "learning_rate": 0.001, "loss": 0.3951, "step": 11015 }, { "epoch": 0.3039556869250906, "grad_norm": 0.005667414516210556, "learning_rate": 0.001, "loss": 0.3563, "step": 11016 }, { "epoch": 0.30398327912615497, "grad_norm": 0.0031073635909706354, "learning_rate": 0.001, "loss": 0.3783, "step": 11017 }, { "epoch": 0.3040108713272194, "grad_norm": 0.0026901857927441597, "learning_rate": 0.001, "loss": 0.3659, "step": 11018 }, { "epoch": 0.30403846352828373, "grad_norm": 0.0030291874427348375, "learning_rate": 0.001, "loss": 0.373, "step": 11019 }, { "epoch": 0.3040660557293481, "grad_norm": 0.0024688898120075464, "learning_rate": 0.001, "loss": 0.3581, "step": 11020 }, { "epoch": 0.3040936479304125, "grad_norm": 0.0029477437492460012, "learning_rate": 0.001, "loss": 0.3858, "step": 11021 }, { "epoch": 0.30412124013147684, "grad_norm": 0.0034905215725302696, "learning_rate": 0.001, "loss": 0.4062, "step": 11022 }, { "epoch": 0.3041488323325412, "grad_norm": 0.0028976472094655037, "learning_rate": 0.001, "loss": 0.4163, "step": 11023 }, { "epoch": 0.30417642453360555, "grad_norm": 0.002226645825430751, "learning_rate": 0.001, "loss": 0.4025, "step": 11024 }, { "epoch": 0.30420401673466996, "grad_norm": 0.00522614223882556, "learning_rate": 0.001, "loss": 0.3829, "step": 11025 }, { "epoch": 0.3042316089357343, "grad_norm": 0.004471330437809229, "learning_rate": 0.001, "loss": 0.4359, "step": 11026 }, { "epoch": 0.30425920113679866, "grad_norm": 0.0025410952512174845, "learning_rate": 0.001, "loss": 0.4367, "step": 11027 }, { "epoch": 0.30428679333786307, "grad_norm": 0.0039034727960824966, "learning_rate": 0.001, "loss": 0.3898, "step": 11028 }, { "epoch": 0.3043143855389274, "grad_norm": 0.0032912297174334526, "learning_rate": 0.001, "loss": 0.4122, "step": 11029 }, { "epoch": 0.3043419777399918, "grad_norm": 0.009197532199323177, "learning_rate": 0.001, "loss": 0.4057, "step": 11030 }, { "epoch": 0.3043695699410562, "grad_norm": 0.003327523358166218, "learning_rate": 0.001, "loss": 0.3578, "step": 11031 }, { "epoch": 0.30439716214212054, "grad_norm": 0.0036153208930045366, "learning_rate": 0.001, "loss": 0.3604, "step": 11032 }, { "epoch": 0.3044247543431849, "grad_norm": 0.0028802400920540094, "learning_rate": 0.001, "loss": 0.408, "step": 11033 }, { "epoch": 0.30445234654424924, "grad_norm": 0.0023775347508490086, "learning_rate": 0.001, "loss": 0.4018, "step": 11034 }, { "epoch": 0.30447993874531365, "grad_norm": 0.007655430119484663, "learning_rate": 0.001, "loss": 0.3631, "step": 11035 }, { "epoch": 0.304507530946378, "grad_norm": 0.0038870847783982754, "learning_rate": 0.001, "loss": 0.4255, "step": 11036 }, { "epoch": 0.30453512314744235, "grad_norm": 0.010876862332224846, "learning_rate": 0.001, "loss": 0.3784, "step": 11037 }, { "epoch": 0.30456271534850676, "grad_norm": 0.01851513236761093, "learning_rate": 0.001, "loss": 0.4014, "step": 11038 }, { "epoch": 0.3045903075495711, "grad_norm": 0.0071109263226389885, "learning_rate": 0.001, "loss": 0.3756, "step": 11039 }, { "epoch": 0.30461789975063547, "grad_norm": 0.002972143003717065, "learning_rate": 0.001, "loss": 0.3919, "step": 11040 }, { "epoch": 0.3046454919516999, "grad_norm": 0.0042697228491306305, "learning_rate": 0.001, "loss": 0.3751, "step": 11041 }, { "epoch": 0.30467308415276423, "grad_norm": 0.0034229194279760122, "learning_rate": 0.001, "loss": 0.4196, "step": 11042 }, { "epoch": 0.3047006763538286, "grad_norm": 0.0030748506542295218, "learning_rate": 0.001, "loss": 0.371, "step": 11043 }, { "epoch": 0.30472826855489293, "grad_norm": 0.003974152263253927, "learning_rate": 0.001, "loss": 0.4025, "step": 11044 }, { "epoch": 0.30475586075595734, "grad_norm": 0.0025529295671731234, "learning_rate": 0.001, "loss": 0.3786, "step": 11045 }, { "epoch": 0.3047834529570217, "grad_norm": 0.002749155042693019, "learning_rate": 0.001, "loss": 0.4034, "step": 11046 }, { "epoch": 0.30481104515808605, "grad_norm": 0.004938180558383465, "learning_rate": 0.001, "loss": 0.3687, "step": 11047 }, { "epoch": 0.30483863735915046, "grad_norm": 0.0023326098453253508, "learning_rate": 0.001, "loss": 0.4322, "step": 11048 }, { "epoch": 0.3048662295602148, "grad_norm": 0.0040608481504023075, "learning_rate": 0.001, "loss": 0.409, "step": 11049 }, { "epoch": 0.30489382176127916, "grad_norm": 0.004212150815874338, "learning_rate": 0.001, "loss": 0.3593, "step": 11050 }, { "epoch": 0.30492141396234357, "grad_norm": 0.0022898244205862284, "learning_rate": 0.001, "loss": 0.4135, "step": 11051 }, { "epoch": 0.3049490061634079, "grad_norm": 0.009196506813168526, "learning_rate": 0.001, "loss": 0.4093, "step": 11052 }, { "epoch": 0.3049765983644723, "grad_norm": 0.0035148763563483953, "learning_rate": 0.001, "loss": 0.4245, "step": 11053 }, { "epoch": 0.3050041905655366, "grad_norm": 0.002594373654574156, "learning_rate": 0.001, "loss": 0.3939, "step": 11054 }, { "epoch": 0.30503178276660103, "grad_norm": 0.0025384812615811825, "learning_rate": 0.001, "loss": 0.3966, "step": 11055 }, { "epoch": 0.3050593749676654, "grad_norm": 0.0027979982551187277, "learning_rate": 0.001, "loss": 0.3702, "step": 11056 }, { "epoch": 0.30508696716872974, "grad_norm": 0.0027982275933027267, "learning_rate": 0.001, "loss": 0.3994, "step": 11057 }, { "epoch": 0.30511455936979415, "grad_norm": 0.002686277497559786, "learning_rate": 0.001, "loss": 0.3885, "step": 11058 }, { "epoch": 0.3051421515708585, "grad_norm": 0.0058397226966917515, "learning_rate": 0.001, "loss": 0.4108, "step": 11059 }, { "epoch": 0.30516974377192285, "grad_norm": 0.0036498201079666615, "learning_rate": 0.001, "loss": 0.4319, "step": 11060 }, { "epoch": 0.30519733597298726, "grad_norm": 0.0023296461440622807, "learning_rate": 0.001, "loss": 0.4174, "step": 11061 }, { "epoch": 0.3052249281740516, "grad_norm": 0.024197006598114967, "learning_rate": 0.001, "loss": 0.3755, "step": 11062 }, { "epoch": 0.30525252037511597, "grad_norm": 0.005508562549948692, "learning_rate": 0.001, "loss": 0.402, "step": 11063 }, { "epoch": 0.3052801125761803, "grad_norm": 0.009980360977351665, "learning_rate": 0.001, "loss": 0.4399, "step": 11064 }, { "epoch": 0.3053077047772447, "grad_norm": 0.006406312808394432, "learning_rate": 0.001, "loss": 0.4074, "step": 11065 }, { "epoch": 0.3053352969783091, "grad_norm": 0.00675487145781517, "learning_rate": 0.001, "loss": 0.3866, "step": 11066 }, { "epoch": 0.30536288917937343, "grad_norm": 0.00432139215990901, "learning_rate": 0.001, "loss": 0.4053, "step": 11067 }, { "epoch": 0.30539048138043784, "grad_norm": 0.004939731676131487, "learning_rate": 0.001, "loss": 0.3877, "step": 11068 }, { "epoch": 0.3054180735815022, "grad_norm": 0.048318974673748016, "learning_rate": 0.001, "loss": 0.3609, "step": 11069 }, { "epoch": 0.30544566578256654, "grad_norm": 0.002895533572882414, "learning_rate": 0.001, "loss": 0.4055, "step": 11070 }, { "epoch": 0.30547325798363095, "grad_norm": 0.0036742573138326406, "learning_rate": 0.001, "loss": 0.3945, "step": 11071 }, { "epoch": 0.3055008501846953, "grad_norm": 0.0037072747945785522, "learning_rate": 0.001, "loss": 0.4032, "step": 11072 }, { "epoch": 0.30552844238575966, "grad_norm": 0.003319724928587675, "learning_rate": 0.001, "loss": 0.4217, "step": 11073 }, { "epoch": 0.305556034586824, "grad_norm": 0.002916824072599411, "learning_rate": 0.001, "loss": 0.4025, "step": 11074 }, { "epoch": 0.3055836267878884, "grad_norm": 0.00400786055251956, "learning_rate": 0.001, "loss": 0.4225, "step": 11075 }, { "epoch": 0.30561121898895277, "grad_norm": 0.003279311815276742, "learning_rate": 0.001, "loss": 0.3989, "step": 11076 }, { "epoch": 0.3056388111900171, "grad_norm": 0.003970045130699873, "learning_rate": 0.001, "loss": 0.4058, "step": 11077 }, { "epoch": 0.30566640339108153, "grad_norm": 0.0034616603516042233, "learning_rate": 0.001, "loss": 0.386, "step": 11078 }, { "epoch": 0.3056939955921459, "grad_norm": 0.003935270942747593, "learning_rate": 0.001, "loss": 0.3998, "step": 11079 }, { "epoch": 0.30572158779321024, "grad_norm": 0.005806989502161741, "learning_rate": 0.001, "loss": 0.3649, "step": 11080 }, { "epoch": 0.30574917999427464, "grad_norm": 0.004018415231257677, "learning_rate": 0.001, "loss": 0.3918, "step": 11081 }, { "epoch": 0.305776772195339, "grad_norm": 0.0026464585680514574, "learning_rate": 0.001, "loss": 0.3754, "step": 11082 }, { "epoch": 0.30580436439640335, "grad_norm": 0.006566127296537161, "learning_rate": 0.001, "loss": 0.378, "step": 11083 }, { "epoch": 0.3058319565974677, "grad_norm": 0.0035378343891352415, "learning_rate": 0.001, "loss": 0.3585, "step": 11084 }, { "epoch": 0.3058595487985321, "grad_norm": 0.005152801051735878, "learning_rate": 0.001, "loss": 0.4044, "step": 11085 }, { "epoch": 0.30588714099959646, "grad_norm": 0.0025191842578351498, "learning_rate": 0.001, "loss": 0.4112, "step": 11086 }, { "epoch": 0.3059147332006608, "grad_norm": 0.002810958307236433, "learning_rate": 0.001, "loss": 0.3963, "step": 11087 }, { "epoch": 0.3059423254017252, "grad_norm": 0.003942539449781179, "learning_rate": 0.001, "loss": 0.3939, "step": 11088 }, { "epoch": 0.3059699176027896, "grad_norm": 0.0032198168337345123, "learning_rate": 0.001, "loss": 0.4049, "step": 11089 }, { "epoch": 0.30599750980385393, "grad_norm": 0.004471073392778635, "learning_rate": 0.001, "loss": 0.3834, "step": 11090 }, { "epoch": 0.30602510200491834, "grad_norm": 0.0025380223523825407, "learning_rate": 0.001, "loss": 0.4236, "step": 11091 }, { "epoch": 0.3060526942059827, "grad_norm": 0.003936004359275103, "learning_rate": 0.001, "loss": 0.3769, "step": 11092 }, { "epoch": 0.30608028640704704, "grad_norm": 0.0035589123144745827, "learning_rate": 0.001, "loss": 0.3807, "step": 11093 }, { "epoch": 0.3061078786081114, "grad_norm": 0.003829971654340625, "learning_rate": 0.001, "loss": 0.4002, "step": 11094 }, { "epoch": 0.3061354708091758, "grad_norm": 0.0033564860932528973, "learning_rate": 0.001, "loss": 0.3849, "step": 11095 }, { "epoch": 0.30616306301024016, "grad_norm": 0.006199992261826992, "learning_rate": 0.001, "loss": 0.4171, "step": 11096 }, { "epoch": 0.3061906552113045, "grad_norm": 0.0038411279674619436, "learning_rate": 0.001, "loss": 0.4201, "step": 11097 }, { "epoch": 0.3062182474123689, "grad_norm": 0.0028849910013377666, "learning_rate": 0.001, "loss": 0.4105, "step": 11098 }, { "epoch": 0.30624583961343327, "grad_norm": 0.005419904366135597, "learning_rate": 0.001, "loss": 0.3878, "step": 11099 }, { "epoch": 0.3062734318144976, "grad_norm": 0.004621229134500027, "learning_rate": 0.001, "loss": 0.4035, "step": 11100 }, { "epoch": 0.30630102401556203, "grad_norm": 0.004461618140339851, "learning_rate": 0.001, "loss": 0.3947, "step": 11101 }, { "epoch": 0.3063286162166264, "grad_norm": 0.0031210975721478462, "learning_rate": 0.001, "loss": 0.402, "step": 11102 }, { "epoch": 0.30635620841769073, "grad_norm": 0.0036159909795969725, "learning_rate": 0.001, "loss": 0.3989, "step": 11103 }, { "epoch": 0.3063838006187551, "grad_norm": 0.0030616209842264652, "learning_rate": 0.001, "loss": 0.3793, "step": 11104 }, { "epoch": 0.3064113928198195, "grad_norm": 0.004985187668353319, "learning_rate": 0.001, "loss": 0.3857, "step": 11105 }, { "epoch": 0.30643898502088385, "grad_norm": 0.0028018758166581392, "learning_rate": 0.001, "loss": 0.4123, "step": 11106 }, { "epoch": 0.3064665772219482, "grad_norm": 0.004196144640445709, "learning_rate": 0.001, "loss": 0.3706, "step": 11107 }, { "epoch": 0.3064941694230126, "grad_norm": 0.0038290584925562143, "learning_rate": 0.001, "loss": 0.3843, "step": 11108 }, { "epoch": 0.30652176162407696, "grad_norm": 0.0028051917906850576, "learning_rate": 0.001, "loss": 0.4059, "step": 11109 }, { "epoch": 0.3065493538251413, "grad_norm": 0.003199791768565774, "learning_rate": 0.001, "loss": 0.3826, "step": 11110 }, { "epoch": 0.30657694602620567, "grad_norm": 0.0040592108853161335, "learning_rate": 0.001, "loss": 0.4014, "step": 11111 }, { "epoch": 0.3066045382272701, "grad_norm": 0.005883520934730768, "learning_rate": 0.001, "loss": 0.3825, "step": 11112 }, { "epoch": 0.3066321304283344, "grad_norm": 0.0024851111229509115, "learning_rate": 0.001, "loss": 0.4243, "step": 11113 }, { "epoch": 0.3066597226293988, "grad_norm": 0.003925004508346319, "learning_rate": 0.001, "loss": 0.3919, "step": 11114 }, { "epoch": 0.3066873148304632, "grad_norm": 0.0040878294967114925, "learning_rate": 0.001, "loss": 0.3982, "step": 11115 }, { "epoch": 0.30671490703152754, "grad_norm": 0.0032062202226370573, "learning_rate": 0.001, "loss": 0.3999, "step": 11116 }, { "epoch": 0.3067424992325919, "grad_norm": 0.004021904431283474, "learning_rate": 0.001, "loss": 0.4104, "step": 11117 }, { "epoch": 0.3067700914336563, "grad_norm": 0.002459887880831957, "learning_rate": 0.001, "loss": 0.4044, "step": 11118 }, { "epoch": 0.30679768363472065, "grad_norm": 0.0019991539884358644, "learning_rate": 0.001, "loss": 0.4321, "step": 11119 }, { "epoch": 0.306825275835785, "grad_norm": 0.002835212042555213, "learning_rate": 0.001, "loss": 0.3878, "step": 11120 }, { "epoch": 0.30685286803684936, "grad_norm": 0.003514475654810667, "learning_rate": 0.001, "loss": 0.4045, "step": 11121 }, { "epoch": 0.30688046023791377, "grad_norm": 0.0025088752154260874, "learning_rate": 0.001, "loss": 0.3913, "step": 11122 }, { "epoch": 0.3069080524389781, "grad_norm": 0.0025806506164371967, "learning_rate": 0.001, "loss": 0.425, "step": 11123 }, { "epoch": 0.30693564464004247, "grad_norm": 0.0025829877704381943, "learning_rate": 0.001, "loss": 0.3979, "step": 11124 }, { "epoch": 0.3069632368411069, "grad_norm": 0.0033166445791721344, "learning_rate": 0.001, "loss": 0.3884, "step": 11125 }, { "epoch": 0.30699082904217123, "grad_norm": 0.0023280063178390265, "learning_rate": 0.001, "loss": 0.4089, "step": 11126 }, { "epoch": 0.3070184212432356, "grad_norm": 0.004174636676907539, "learning_rate": 0.001, "loss": 0.3766, "step": 11127 }, { "epoch": 0.3070460134443, "grad_norm": 0.002358420053496957, "learning_rate": 0.001, "loss": 0.4037, "step": 11128 }, { "epoch": 0.30707360564536434, "grad_norm": 0.002632340881973505, "learning_rate": 0.001, "loss": 0.4251, "step": 11129 }, { "epoch": 0.3071011978464287, "grad_norm": 0.004848247393965721, "learning_rate": 0.001, "loss": 0.4432, "step": 11130 }, { "epoch": 0.30712879004749305, "grad_norm": 0.002614147262647748, "learning_rate": 0.001, "loss": 0.3682, "step": 11131 }, { "epoch": 0.30715638224855746, "grad_norm": 0.0026096473447978497, "learning_rate": 0.001, "loss": 0.4032, "step": 11132 }, { "epoch": 0.3071839744496218, "grad_norm": 0.002825048053637147, "learning_rate": 0.001, "loss": 0.3376, "step": 11133 }, { "epoch": 0.30721156665068616, "grad_norm": 0.002993338042870164, "learning_rate": 0.001, "loss": 0.3784, "step": 11134 }, { "epoch": 0.30723915885175057, "grad_norm": 0.0025206785649061203, "learning_rate": 0.001, "loss": 0.4025, "step": 11135 }, { "epoch": 0.3072667510528149, "grad_norm": 0.0064323414117097855, "learning_rate": 0.001, "loss": 0.3892, "step": 11136 }, { "epoch": 0.3072943432538793, "grad_norm": 0.006029163021594286, "learning_rate": 0.001, "loss": 0.4013, "step": 11137 }, { "epoch": 0.3073219354549437, "grad_norm": 0.003802549559623003, "learning_rate": 0.001, "loss": 0.4149, "step": 11138 }, { "epoch": 0.30734952765600804, "grad_norm": 0.0036848250310868025, "learning_rate": 0.001, "loss": 0.408, "step": 11139 }, { "epoch": 0.3073771198570724, "grad_norm": 0.002685883082449436, "learning_rate": 0.001, "loss": 0.3675, "step": 11140 }, { "epoch": 0.30740471205813674, "grad_norm": 0.004660370759665966, "learning_rate": 0.001, "loss": 0.4285, "step": 11141 }, { "epoch": 0.30743230425920115, "grad_norm": 0.004408833105117083, "learning_rate": 0.001, "loss": 0.4147, "step": 11142 }, { "epoch": 0.3074598964602655, "grad_norm": 0.003984685987234116, "learning_rate": 0.001, "loss": 0.3991, "step": 11143 }, { "epoch": 0.30748748866132986, "grad_norm": 0.0034449570812284946, "learning_rate": 0.001, "loss": 0.392, "step": 11144 }, { "epoch": 0.30751508086239426, "grad_norm": 0.0050663729198277, "learning_rate": 0.001, "loss": 0.4088, "step": 11145 }, { "epoch": 0.3075426730634586, "grad_norm": 0.0025960092898458242, "learning_rate": 0.001, "loss": 0.3755, "step": 11146 }, { "epoch": 0.30757026526452297, "grad_norm": 0.003925015218555927, "learning_rate": 0.001, "loss": 0.4243, "step": 11147 }, { "epoch": 0.3075978574655874, "grad_norm": 0.003075996646657586, "learning_rate": 0.001, "loss": 0.3816, "step": 11148 }, { "epoch": 0.30762544966665173, "grad_norm": 0.0028860154561698437, "learning_rate": 0.001, "loss": 0.4019, "step": 11149 }, { "epoch": 0.3076530418677161, "grad_norm": 0.0029483395628631115, "learning_rate": 0.001, "loss": 0.3814, "step": 11150 }, { "epoch": 0.30768063406878043, "grad_norm": 0.006685647647827864, "learning_rate": 0.001, "loss": 0.4076, "step": 11151 }, { "epoch": 0.30770822626984484, "grad_norm": 0.0028818738646805286, "learning_rate": 0.001, "loss": 0.3885, "step": 11152 }, { "epoch": 0.3077358184709092, "grad_norm": 0.003373377723619342, "learning_rate": 0.001, "loss": 0.3959, "step": 11153 }, { "epoch": 0.30776341067197355, "grad_norm": 0.005747408606112003, "learning_rate": 0.001, "loss": 0.4192, "step": 11154 }, { "epoch": 0.30779100287303796, "grad_norm": 0.003609336679801345, "learning_rate": 0.001, "loss": 0.4214, "step": 11155 }, { "epoch": 0.3078185950741023, "grad_norm": 0.005563123617321253, "learning_rate": 0.001, "loss": 0.4268, "step": 11156 }, { "epoch": 0.30784618727516666, "grad_norm": 0.003597275586798787, "learning_rate": 0.001, "loss": 0.4104, "step": 11157 }, { "epoch": 0.30787377947623107, "grad_norm": 0.0027966846246272326, "learning_rate": 0.001, "loss": 0.4379, "step": 11158 }, { "epoch": 0.3079013716772954, "grad_norm": 0.002950585214421153, "learning_rate": 0.001, "loss": 0.3762, "step": 11159 }, { "epoch": 0.3079289638783598, "grad_norm": 0.0030036913231015205, "learning_rate": 0.001, "loss": 0.4352, "step": 11160 }, { "epoch": 0.3079565560794241, "grad_norm": 0.004463705699890852, "learning_rate": 0.001, "loss": 0.4056, "step": 11161 }, { "epoch": 0.30798414828048853, "grad_norm": 0.0020041607785969973, "learning_rate": 0.001, "loss": 0.4238, "step": 11162 }, { "epoch": 0.3080117404815529, "grad_norm": 0.003834531642496586, "learning_rate": 0.001, "loss": 0.3812, "step": 11163 }, { "epoch": 0.30803933268261724, "grad_norm": 0.002345136133953929, "learning_rate": 0.001, "loss": 0.4017, "step": 11164 }, { "epoch": 0.30806692488368165, "grad_norm": 0.0028496733866631985, "learning_rate": 0.001, "loss": 0.3947, "step": 11165 }, { "epoch": 0.308094517084746, "grad_norm": 0.003474792465567589, "learning_rate": 0.001, "loss": 0.4264, "step": 11166 }, { "epoch": 0.30812210928581035, "grad_norm": 0.0036203833296895027, "learning_rate": 0.001, "loss": 0.4084, "step": 11167 }, { "epoch": 0.30814970148687476, "grad_norm": 0.004971282556653023, "learning_rate": 0.001, "loss": 0.3704, "step": 11168 }, { "epoch": 0.3081772936879391, "grad_norm": 0.007116068620234728, "learning_rate": 0.001, "loss": 0.4281, "step": 11169 }, { "epoch": 0.30820488588900347, "grad_norm": 0.005289649125188589, "learning_rate": 0.001, "loss": 0.3739, "step": 11170 }, { "epoch": 0.3082324780900678, "grad_norm": 0.0034187568817287683, "learning_rate": 0.001, "loss": 0.3981, "step": 11171 }, { "epoch": 0.3082600702911322, "grad_norm": 0.002938291057944298, "learning_rate": 0.001, "loss": 0.432, "step": 11172 }, { "epoch": 0.3082876624921966, "grad_norm": 0.004787916783243418, "learning_rate": 0.001, "loss": 0.4073, "step": 11173 }, { "epoch": 0.30831525469326093, "grad_norm": 0.0029457362834364176, "learning_rate": 0.001, "loss": 0.4207, "step": 11174 }, { "epoch": 0.30834284689432534, "grad_norm": 0.006731043569743633, "learning_rate": 0.001, "loss": 0.3951, "step": 11175 }, { "epoch": 0.3083704390953897, "grad_norm": 0.005668407306075096, "learning_rate": 0.001, "loss": 0.4199, "step": 11176 }, { "epoch": 0.30839803129645404, "grad_norm": 0.0028704048600047827, "learning_rate": 0.001, "loss": 0.4402, "step": 11177 }, { "epoch": 0.30842562349751845, "grad_norm": 0.00468471460044384, "learning_rate": 0.001, "loss": 0.4205, "step": 11178 }, { "epoch": 0.3084532156985828, "grad_norm": 0.003784140106290579, "learning_rate": 0.001, "loss": 0.42, "step": 11179 }, { "epoch": 0.30848080789964716, "grad_norm": 0.0028519639745354652, "learning_rate": 0.001, "loss": 0.4257, "step": 11180 }, { "epoch": 0.3085084001007115, "grad_norm": 0.004001649562269449, "learning_rate": 0.001, "loss": 0.3933, "step": 11181 }, { "epoch": 0.3085359923017759, "grad_norm": 0.0034179389476776123, "learning_rate": 0.001, "loss": 0.3608, "step": 11182 }, { "epoch": 0.30856358450284027, "grad_norm": 0.004161709453910589, "learning_rate": 0.001, "loss": 0.3561, "step": 11183 }, { "epoch": 0.3085911767039046, "grad_norm": 0.0021271705627441406, "learning_rate": 0.001, "loss": 0.4045, "step": 11184 }, { "epoch": 0.30861876890496903, "grad_norm": 0.004352390766143799, "learning_rate": 0.001, "loss": 0.4272, "step": 11185 }, { "epoch": 0.3086463611060334, "grad_norm": 0.0035197525285184383, "learning_rate": 0.001, "loss": 0.3793, "step": 11186 }, { "epoch": 0.30867395330709774, "grad_norm": 0.003885299200192094, "learning_rate": 0.001, "loss": 0.4038, "step": 11187 }, { "epoch": 0.30870154550816215, "grad_norm": 0.004519937559962273, "learning_rate": 0.001, "loss": 0.4017, "step": 11188 }, { "epoch": 0.3087291377092265, "grad_norm": 0.001987120136618614, "learning_rate": 0.001, "loss": 0.4137, "step": 11189 }, { "epoch": 0.30875672991029085, "grad_norm": 0.003369292477145791, "learning_rate": 0.001, "loss": 0.3709, "step": 11190 }, { "epoch": 0.3087843221113552, "grad_norm": 0.0031297069508582354, "learning_rate": 0.001, "loss": 0.3769, "step": 11191 }, { "epoch": 0.3088119143124196, "grad_norm": 0.0024408060126006603, "learning_rate": 0.001, "loss": 0.4602, "step": 11192 }, { "epoch": 0.30883950651348396, "grad_norm": 0.0031313635408878326, "learning_rate": 0.001, "loss": 0.4325, "step": 11193 }, { "epoch": 0.3088670987145483, "grad_norm": 0.0029580635018646717, "learning_rate": 0.001, "loss": 0.3781, "step": 11194 }, { "epoch": 0.3088946909156127, "grad_norm": 0.0027078099083155394, "learning_rate": 0.001, "loss": 0.3863, "step": 11195 }, { "epoch": 0.3089222831166771, "grad_norm": 0.0038466129917651415, "learning_rate": 0.001, "loss": 0.4153, "step": 11196 }, { "epoch": 0.30894987531774143, "grad_norm": 0.003398042405024171, "learning_rate": 0.001, "loss": 0.3995, "step": 11197 }, { "epoch": 0.30897746751880584, "grad_norm": 0.007685355842113495, "learning_rate": 0.001, "loss": 0.4019, "step": 11198 }, { "epoch": 0.3090050597198702, "grad_norm": 0.005074410233646631, "learning_rate": 0.001, "loss": 0.3998, "step": 11199 }, { "epoch": 0.30903265192093454, "grad_norm": 0.003894890658557415, "learning_rate": 0.001, "loss": 0.4394, "step": 11200 }, { "epoch": 0.3090602441219989, "grad_norm": 0.0026795901358127594, "learning_rate": 0.001, "loss": 0.4315, "step": 11201 }, { "epoch": 0.3090878363230633, "grad_norm": 0.003955294843763113, "learning_rate": 0.001, "loss": 0.3723, "step": 11202 }, { "epoch": 0.30911542852412766, "grad_norm": 0.0025795320980250835, "learning_rate": 0.001, "loss": 0.4317, "step": 11203 }, { "epoch": 0.309143020725192, "grad_norm": 0.004697353113442659, "learning_rate": 0.001, "loss": 0.368, "step": 11204 }, { "epoch": 0.3091706129262564, "grad_norm": 0.0036036588717252016, "learning_rate": 0.001, "loss": 0.376, "step": 11205 }, { "epoch": 0.30919820512732077, "grad_norm": 0.002920630620792508, "learning_rate": 0.001, "loss": 0.4238, "step": 11206 }, { "epoch": 0.3092257973283851, "grad_norm": 0.00252629560418427, "learning_rate": 0.001, "loss": 0.4474, "step": 11207 }, { "epoch": 0.3092533895294495, "grad_norm": 0.002593503100797534, "learning_rate": 0.001, "loss": 0.4349, "step": 11208 }, { "epoch": 0.3092809817305139, "grad_norm": 0.002563952933996916, "learning_rate": 0.001, "loss": 0.3945, "step": 11209 }, { "epoch": 0.30930857393157823, "grad_norm": 0.00308457319624722, "learning_rate": 0.001, "loss": 0.3698, "step": 11210 }, { "epoch": 0.3093361661326426, "grad_norm": 0.004335676319897175, "learning_rate": 0.001, "loss": 0.3702, "step": 11211 }, { "epoch": 0.309363758333707, "grad_norm": 0.004092890303581953, "learning_rate": 0.001, "loss": 0.3871, "step": 11212 }, { "epoch": 0.30939135053477135, "grad_norm": 0.003627408528700471, "learning_rate": 0.001, "loss": 0.4175, "step": 11213 }, { "epoch": 0.3094189427358357, "grad_norm": 0.0031013472471386194, "learning_rate": 0.001, "loss": 0.3755, "step": 11214 }, { "epoch": 0.3094465349369001, "grad_norm": 0.002774468855932355, "learning_rate": 0.001, "loss": 0.434, "step": 11215 }, { "epoch": 0.30947412713796446, "grad_norm": 0.0022229300811886787, "learning_rate": 0.001, "loss": 0.4303, "step": 11216 }, { "epoch": 0.3095017193390288, "grad_norm": 0.004068805370479822, "learning_rate": 0.001, "loss": 0.3823, "step": 11217 }, { "epoch": 0.30952931154009317, "grad_norm": 0.0022410049568861723, "learning_rate": 0.001, "loss": 0.4067, "step": 11218 }, { "epoch": 0.3095569037411576, "grad_norm": 0.002807425567880273, "learning_rate": 0.001, "loss": 0.409, "step": 11219 }, { "epoch": 0.3095844959422219, "grad_norm": 0.008168449625372887, "learning_rate": 0.001, "loss": 0.4257, "step": 11220 }, { "epoch": 0.3096120881432863, "grad_norm": 0.007224308326840401, "learning_rate": 0.001, "loss": 0.3927, "step": 11221 }, { "epoch": 0.3096396803443507, "grad_norm": 0.004867930430918932, "learning_rate": 0.001, "loss": 0.4163, "step": 11222 }, { "epoch": 0.30966727254541504, "grad_norm": 0.004449460655450821, "learning_rate": 0.001, "loss": 0.4065, "step": 11223 }, { "epoch": 0.3096948647464794, "grad_norm": 0.003997356165200472, "learning_rate": 0.001, "loss": 0.3887, "step": 11224 }, { "epoch": 0.3097224569475438, "grad_norm": 0.0036069692578166723, "learning_rate": 0.001, "loss": 0.3726, "step": 11225 }, { "epoch": 0.30975004914860815, "grad_norm": 0.003934454172849655, "learning_rate": 0.001, "loss": 0.4149, "step": 11226 }, { "epoch": 0.3097776413496725, "grad_norm": 0.01457425020635128, "learning_rate": 0.001, "loss": 0.3672, "step": 11227 }, { "epoch": 0.30980523355073686, "grad_norm": 0.004006854724138975, "learning_rate": 0.001, "loss": 0.3849, "step": 11228 }, { "epoch": 0.30983282575180127, "grad_norm": 0.0053675188682973385, "learning_rate": 0.001, "loss": 0.3782, "step": 11229 }, { "epoch": 0.3098604179528656, "grad_norm": 0.0030079265125095844, "learning_rate": 0.001, "loss": 0.3465, "step": 11230 }, { "epoch": 0.30988801015392997, "grad_norm": 0.004947451408952475, "learning_rate": 0.001, "loss": 0.4309, "step": 11231 }, { "epoch": 0.3099156023549944, "grad_norm": 0.0025891209952533245, "learning_rate": 0.001, "loss": 0.3676, "step": 11232 }, { "epoch": 0.30994319455605873, "grad_norm": 0.0022305548191070557, "learning_rate": 0.001, "loss": 0.4611, "step": 11233 }, { "epoch": 0.3099707867571231, "grad_norm": 0.0024289439897984266, "learning_rate": 0.001, "loss": 0.416, "step": 11234 }, { "epoch": 0.3099983789581875, "grad_norm": 0.005738126579672098, "learning_rate": 0.001, "loss": 0.392, "step": 11235 }, { "epoch": 0.31002597115925185, "grad_norm": 0.005195004399865866, "learning_rate": 0.001, "loss": 0.3711, "step": 11236 }, { "epoch": 0.3100535633603162, "grad_norm": 0.003794547636061907, "learning_rate": 0.001, "loss": 0.4293, "step": 11237 }, { "epoch": 0.31008115556138055, "grad_norm": 0.0029551167972385883, "learning_rate": 0.001, "loss": 0.3751, "step": 11238 }, { "epoch": 0.31010874776244496, "grad_norm": 0.002617292571812868, "learning_rate": 0.001, "loss": 0.4004, "step": 11239 }, { "epoch": 0.3101363399635093, "grad_norm": 0.013282334432005882, "learning_rate": 0.001, "loss": 0.3738, "step": 11240 }, { "epoch": 0.31016393216457366, "grad_norm": 0.0025673380587249994, "learning_rate": 0.001, "loss": 0.4433, "step": 11241 }, { "epoch": 0.31019152436563807, "grad_norm": 0.0032431413419544697, "learning_rate": 0.001, "loss": 0.3933, "step": 11242 }, { "epoch": 0.3102191165667024, "grad_norm": 0.006165751256048679, "learning_rate": 0.001, "loss": 0.4224, "step": 11243 }, { "epoch": 0.3102467087677668, "grad_norm": 0.0051593780517578125, "learning_rate": 0.001, "loss": 0.4081, "step": 11244 }, { "epoch": 0.3102743009688312, "grad_norm": 0.004621399566531181, "learning_rate": 0.001, "loss": 0.4194, "step": 11245 }, { "epoch": 0.31030189316989554, "grad_norm": 0.007019545882940292, "learning_rate": 0.001, "loss": 0.3769, "step": 11246 }, { "epoch": 0.3103294853709599, "grad_norm": 0.0030706804245710373, "learning_rate": 0.001, "loss": 0.4098, "step": 11247 }, { "epoch": 0.31035707757202424, "grad_norm": 0.009759355336427689, "learning_rate": 0.001, "loss": 0.4047, "step": 11248 }, { "epoch": 0.31038466977308865, "grad_norm": 0.004515976645052433, "learning_rate": 0.001, "loss": 0.3796, "step": 11249 }, { "epoch": 0.310412261974153, "grad_norm": 0.005143680609762669, "learning_rate": 0.001, "loss": 0.3672, "step": 11250 }, { "epoch": 0.31043985417521736, "grad_norm": 0.004736714996397495, "learning_rate": 0.001, "loss": 0.3894, "step": 11251 }, { "epoch": 0.31046744637628176, "grad_norm": 0.004402066580951214, "learning_rate": 0.001, "loss": 0.3817, "step": 11252 }, { "epoch": 0.3104950385773461, "grad_norm": 0.004078419879078865, "learning_rate": 0.001, "loss": 0.3932, "step": 11253 }, { "epoch": 0.31052263077841047, "grad_norm": 0.005986323114484549, "learning_rate": 0.001, "loss": 0.4077, "step": 11254 }, { "epoch": 0.3105502229794749, "grad_norm": 0.0041157579980790615, "learning_rate": 0.001, "loss": 0.4147, "step": 11255 }, { "epoch": 0.31057781518053923, "grad_norm": 0.005283708218485117, "learning_rate": 0.001, "loss": 0.3949, "step": 11256 }, { "epoch": 0.3106054073816036, "grad_norm": 0.0037576963659375906, "learning_rate": 0.001, "loss": 0.4075, "step": 11257 }, { "epoch": 0.31063299958266793, "grad_norm": 0.004866046831011772, "learning_rate": 0.001, "loss": 0.3901, "step": 11258 }, { "epoch": 0.31066059178373234, "grad_norm": 0.007358568720519543, "learning_rate": 0.001, "loss": 0.4208, "step": 11259 }, { "epoch": 0.3106881839847967, "grad_norm": 0.004052911419421434, "learning_rate": 0.001, "loss": 0.412, "step": 11260 }, { "epoch": 0.31071577618586105, "grad_norm": 0.004355463664978743, "learning_rate": 0.001, "loss": 0.3823, "step": 11261 }, { "epoch": 0.31074336838692546, "grad_norm": 0.006888225674629211, "learning_rate": 0.001, "loss": 0.3888, "step": 11262 }, { "epoch": 0.3107709605879898, "grad_norm": 0.004396567586809397, "learning_rate": 0.001, "loss": 0.3983, "step": 11263 }, { "epoch": 0.31079855278905416, "grad_norm": 0.003929099999368191, "learning_rate": 0.001, "loss": 0.3827, "step": 11264 }, { "epoch": 0.31082614499011857, "grad_norm": 0.0028630662709474564, "learning_rate": 0.001, "loss": 0.4243, "step": 11265 }, { "epoch": 0.3108537371911829, "grad_norm": 0.003565790131688118, "learning_rate": 0.001, "loss": 0.4025, "step": 11266 }, { "epoch": 0.3108813293922473, "grad_norm": 0.004160342272371054, "learning_rate": 0.001, "loss": 0.3897, "step": 11267 }, { "epoch": 0.3109089215933116, "grad_norm": 0.004211884923279285, "learning_rate": 0.001, "loss": 0.4012, "step": 11268 }, { "epoch": 0.31093651379437603, "grad_norm": 0.004373464733362198, "learning_rate": 0.001, "loss": 0.4307, "step": 11269 }, { "epoch": 0.3109641059954404, "grad_norm": 0.0034809240605682135, "learning_rate": 0.001, "loss": 0.3354, "step": 11270 }, { "epoch": 0.31099169819650474, "grad_norm": 0.004237370565533638, "learning_rate": 0.001, "loss": 0.3768, "step": 11271 }, { "epoch": 0.31101929039756915, "grad_norm": 0.003246934851631522, "learning_rate": 0.001, "loss": 0.4406, "step": 11272 }, { "epoch": 0.3110468825986335, "grad_norm": 0.0036773579195141792, "learning_rate": 0.001, "loss": 0.3831, "step": 11273 }, { "epoch": 0.31107447479969785, "grad_norm": 0.005854573100805283, "learning_rate": 0.001, "loss": 0.3478, "step": 11274 }, { "epoch": 0.31110206700076226, "grad_norm": 0.004920937120914459, "learning_rate": 0.001, "loss": 0.4125, "step": 11275 }, { "epoch": 0.3111296592018266, "grad_norm": 0.0030272614676505327, "learning_rate": 0.001, "loss": 0.4373, "step": 11276 }, { "epoch": 0.31115725140289097, "grad_norm": 0.002614455996081233, "learning_rate": 0.001, "loss": 0.3873, "step": 11277 }, { "epoch": 0.3111848436039553, "grad_norm": 0.006632671691477299, "learning_rate": 0.001, "loss": 0.3863, "step": 11278 }, { "epoch": 0.3112124358050197, "grad_norm": 0.005778672639280558, "learning_rate": 0.001, "loss": 0.3869, "step": 11279 }, { "epoch": 0.3112400280060841, "grad_norm": 0.0035203301813453436, "learning_rate": 0.001, "loss": 0.3743, "step": 11280 }, { "epoch": 0.31126762020714843, "grad_norm": 0.002483597956597805, "learning_rate": 0.001, "loss": 0.3692, "step": 11281 }, { "epoch": 0.31129521240821284, "grad_norm": 0.004106397274881601, "learning_rate": 0.001, "loss": 0.3901, "step": 11282 }, { "epoch": 0.3113228046092772, "grad_norm": 0.002346999244764447, "learning_rate": 0.001, "loss": 0.4004, "step": 11283 }, { "epoch": 0.31135039681034155, "grad_norm": 0.005101846065372229, "learning_rate": 0.001, "loss": 0.3911, "step": 11284 }, { "epoch": 0.31137798901140595, "grad_norm": 0.0027893667574971914, "learning_rate": 0.001, "loss": 0.4077, "step": 11285 }, { "epoch": 0.3114055812124703, "grad_norm": 0.0029769500251859426, "learning_rate": 0.001, "loss": 0.4126, "step": 11286 }, { "epoch": 0.31143317341353466, "grad_norm": 0.0033861526753753424, "learning_rate": 0.001, "loss": 0.3951, "step": 11287 }, { "epoch": 0.311460765614599, "grad_norm": 0.006664988584816456, "learning_rate": 0.001, "loss": 0.3681, "step": 11288 }, { "epoch": 0.3114883578156634, "grad_norm": 0.004560802131891251, "learning_rate": 0.001, "loss": 0.4025, "step": 11289 }, { "epoch": 0.31151595001672777, "grad_norm": 0.005946944933384657, "learning_rate": 0.001, "loss": 0.3888, "step": 11290 }, { "epoch": 0.3115435422177921, "grad_norm": 0.0032140284311026335, "learning_rate": 0.001, "loss": 0.3937, "step": 11291 }, { "epoch": 0.31157113441885653, "grad_norm": 0.011185484007000923, "learning_rate": 0.001, "loss": 0.392, "step": 11292 }, { "epoch": 0.3115987266199209, "grad_norm": 0.0024210652336478233, "learning_rate": 0.001, "loss": 0.3568, "step": 11293 }, { "epoch": 0.31162631882098524, "grad_norm": 0.00252347718924284, "learning_rate": 0.001, "loss": 0.404, "step": 11294 }, { "epoch": 0.3116539110220496, "grad_norm": 0.0039216154254972935, "learning_rate": 0.001, "loss": 0.405, "step": 11295 }, { "epoch": 0.311681503223114, "grad_norm": 0.0024138404987752438, "learning_rate": 0.001, "loss": 0.3934, "step": 11296 }, { "epoch": 0.31170909542417835, "grad_norm": 0.0026965364813804626, "learning_rate": 0.001, "loss": 0.4213, "step": 11297 }, { "epoch": 0.3117366876252427, "grad_norm": 0.0034357013646513224, "learning_rate": 0.001, "loss": 0.409, "step": 11298 }, { "epoch": 0.3117642798263071, "grad_norm": 0.0027775561902672052, "learning_rate": 0.001, "loss": 0.4162, "step": 11299 }, { "epoch": 0.31179187202737146, "grad_norm": 0.002832148689776659, "learning_rate": 0.001, "loss": 0.3689, "step": 11300 }, { "epoch": 0.3118194642284358, "grad_norm": 0.002956314478069544, "learning_rate": 0.001, "loss": 0.4185, "step": 11301 }, { "epoch": 0.3118470564295002, "grad_norm": 0.006010833196341991, "learning_rate": 0.001, "loss": 0.3865, "step": 11302 }, { "epoch": 0.3118746486305646, "grad_norm": 0.002660473110154271, "learning_rate": 0.001, "loss": 0.426, "step": 11303 }, { "epoch": 0.31190224083162893, "grad_norm": 0.002595923375338316, "learning_rate": 0.001, "loss": 0.3733, "step": 11304 }, { "epoch": 0.3119298330326933, "grad_norm": 0.004651275463402271, "learning_rate": 0.001, "loss": 0.4042, "step": 11305 }, { "epoch": 0.3119574252337577, "grad_norm": 0.0029690570663660765, "learning_rate": 0.001, "loss": 0.353, "step": 11306 }, { "epoch": 0.31198501743482204, "grad_norm": 0.0036981538869440556, "learning_rate": 0.001, "loss": 0.4236, "step": 11307 }, { "epoch": 0.3120126096358864, "grad_norm": 0.0027398637030273676, "learning_rate": 0.001, "loss": 0.4292, "step": 11308 }, { "epoch": 0.3120402018369508, "grad_norm": 0.003324718214571476, "learning_rate": 0.001, "loss": 0.4538, "step": 11309 }, { "epoch": 0.31206779403801516, "grad_norm": 0.003862992627546191, "learning_rate": 0.001, "loss": 0.3949, "step": 11310 }, { "epoch": 0.3120953862390795, "grad_norm": 0.0027167177759110928, "learning_rate": 0.001, "loss": 0.361, "step": 11311 }, { "epoch": 0.3121229784401439, "grad_norm": 0.005694078281521797, "learning_rate": 0.001, "loss": 0.3341, "step": 11312 }, { "epoch": 0.31215057064120827, "grad_norm": 0.0032801406923681498, "learning_rate": 0.001, "loss": 0.3755, "step": 11313 }, { "epoch": 0.3121781628422726, "grad_norm": 0.0072447690181434155, "learning_rate": 0.001, "loss": 0.3904, "step": 11314 }, { "epoch": 0.312205755043337, "grad_norm": 0.00575765548273921, "learning_rate": 0.001, "loss": 0.3801, "step": 11315 }, { "epoch": 0.3122333472444014, "grad_norm": 0.0030593755654990673, "learning_rate": 0.001, "loss": 0.3693, "step": 11316 }, { "epoch": 0.31226093944546573, "grad_norm": 0.0034721451811492443, "learning_rate": 0.001, "loss": 0.3949, "step": 11317 }, { "epoch": 0.3122885316465301, "grad_norm": 0.0029739809688180685, "learning_rate": 0.001, "loss": 0.4125, "step": 11318 }, { "epoch": 0.3123161238475945, "grad_norm": 0.004178667441010475, "learning_rate": 0.001, "loss": 0.4132, "step": 11319 }, { "epoch": 0.31234371604865885, "grad_norm": 0.00276771723292768, "learning_rate": 0.001, "loss": 0.4108, "step": 11320 }, { "epoch": 0.3123713082497232, "grad_norm": 0.0033821798861026764, "learning_rate": 0.001, "loss": 0.4078, "step": 11321 }, { "epoch": 0.3123989004507876, "grad_norm": 0.003055412322282791, "learning_rate": 0.001, "loss": 0.4071, "step": 11322 }, { "epoch": 0.31242649265185196, "grad_norm": 0.006147374864667654, "learning_rate": 0.001, "loss": 0.4117, "step": 11323 }, { "epoch": 0.3124540848529163, "grad_norm": 0.002903368789702654, "learning_rate": 0.001, "loss": 0.3949, "step": 11324 }, { "epoch": 0.31248167705398067, "grad_norm": 0.002361924620345235, "learning_rate": 0.001, "loss": 0.3739, "step": 11325 }, { "epoch": 0.3125092692550451, "grad_norm": 0.002768756588920951, "learning_rate": 0.001, "loss": 0.3951, "step": 11326 }, { "epoch": 0.3125368614561094, "grad_norm": 0.009059234522283077, "learning_rate": 0.001, "loss": 0.3981, "step": 11327 }, { "epoch": 0.3125644536571738, "grad_norm": 0.0029197093099355698, "learning_rate": 0.001, "loss": 0.4243, "step": 11328 }, { "epoch": 0.3125920458582382, "grad_norm": 0.003418351523578167, "learning_rate": 0.001, "loss": 0.3994, "step": 11329 }, { "epoch": 0.31261963805930254, "grad_norm": 0.004701963625848293, "learning_rate": 0.001, "loss": 0.3557, "step": 11330 }, { "epoch": 0.3126472302603669, "grad_norm": 0.008553891442716122, "learning_rate": 0.001, "loss": 0.3819, "step": 11331 }, { "epoch": 0.3126748224614313, "grad_norm": 0.0062170871533453465, "learning_rate": 0.001, "loss": 0.3824, "step": 11332 }, { "epoch": 0.31270241466249565, "grad_norm": 0.0068610128946602345, "learning_rate": 0.001, "loss": 0.4345, "step": 11333 }, { "epoch": 0.31273000686356, "grad_norm": 0.008722824975848198, "learning_rate": 0.001, "loss": 0.377, "step": 11334 }, { "epoch": 0.31275759906462436, "grad_norm": 0.004026619717478752, "learning_rate": 0.001, "loss": 0.4284, "step": 11335 }, { "epoch": 0.31278519126568877, "grad_norm": 0.006060839165002108, "learning_rate": 0.001, "loss": 0.3568, "step": 11336 }, { "epoch": 0.3128127834667531, "grad_norm": 0.0033903757575899363, "learning_rate": 0.001, "loss": 0.4015, "step": 11337 }, { "epoch": 0.31284037566781747, "grad_norm": 0.004284377209842205, "learning_rate": 0.001, "loss": 0.3676, "step": 11338 }, { "epoch": 0.3128679678688819, "grad_norm": 0.002431008731946349, "learning_rate": 0.001, "loss": 0.4198, "step": 11339 }, { "epoch": 0.31289556006994623, "grad_norm": 0.0026608938351273537, "learning_rate": 0.001, "loss": 0.4473, "step": 11340 }, { "epoch": 0.3129231522710106, "grad_norm": 0.0032837912440299988, "learning_rate": 0.001, "loss": 0.3772, "step": 11341 }, { "epoch": 0.312950744472075, "grad_norm": 0.0023711349349468946, "learning_rate": 0.001, "loss": 0.43, "step": 11342 }, { "epoch": 0.31297833667313935, "grad_norm": 0.003046794096007943, "learning_rate": 0.001, "loss": 0.4083, "step": 11343 }, { "epoch": 0.3130059288742037, "grad_norm": 0.0024662583600729704, "learning_rate": 0.001, "loss": 0.4026, "step": 11344 }, { "epoch": 0.31303352107526805, "grad_norm": 0.002765703946352005, "learning_rate": 0.001, "loss": 0.3953, "step": 11345 }, { "epoch": 0.31306111327633246, "grad_norm": 0.0024844948202371597, "learning_rate": 0.001, "loss": 0.3844, "step": 11346 }, { "epoch": 0.3130887054773968, "grad_norm": 0.0025793337263166904, "learning_rate": 0.001, "loss": 0.3831, "step": 11347 }, { "epoch": 0.31311629767846116, "grad_norm": 0.003900103038176894, "learning_rate": 0.001, "loss": 0.3882, "step": 11348 }, { "epoch": 0.31314388987952557, "grad_norm": 0.0021257405169308186, "learning_rate": 0.001, "loss": 0.4056, "step": 11349 }, { "epoch": 0.3131714820805899, "grad_norm": 0.00406573386862874, "learning_rate": 0.001, "loss": 0.3997, "step": 11350 }, { "epoch": 0.3131990742816543, "grad_norm": 0.003567320527508855, "learning_rate": 0.001, "loss": 0.4156, "step": 11351 }, { "epoch": 0.3132266664827187, "grad_norm": 0.0028111112769693136, "learning_rate": 0.001, "loss": 0.3966, "step": 11352 }, { "epoch": 0.31325425868378304, "grad_norm": 0.004948635585606098, "learning_rate": 0.001, "loss": 0.4094, "step": 11353 }, { "epoch": 0.3132818508848474, "grad_norm": 0.0023954228963702917, "learning_rate": 0.001, "loss": 0.4198, "step": 11354 }, { "epoch": 0.31330944308591174, "grad_norm": 0.0028324832674115896, "learning_rate": 0.001, "loss": 0.3635, "step": 11355 }, { "epoch": 0.31333703528697615, "grad_norm": 0.0026057560462504625, "learning_rate": 0.001, "loss": 0.4259, "step": 11356 }, { "epoch": 0.3133646274880405, "grad_norm": 0.006796353030949831, "learning_rate": 0.001, "loss": 0.4002, "step": 11357 }, { "epoch": 0.31339221968910486, "grad_norm": 0.0028146374970674515, "learning_rate": 0.001, "loss": 0.4088, "step": 11358 }, { "epoch": 0.31341981189016926, "grad_norm": 0.0030071684159338474, "learning_rate": 0.001, "loss": 0.4035, "step": 11359 }, { "epoch": 0.3134474040912336, "grad_norm": 0.0034612752497196198, "learning_rate": 0.001, "loss": 0.423, "step": 11360 }, { "epoch": 0.31347499629229797, "grad_norm": 0.0025981247890740633, "learning_rate": 0.001, "loss": 0.3929, "step": 11361 }, { "epoch": 0.3135025884933624, "grad_norm": 0.0051524159498512745, "learning_rate": 0.001, "loss": 0.3775, "step": 11362 }, { "epoch": 0.31353018069442673, "grad_norm": 0.008304521441459656, "learning_rate": 0.001, "loss": 0.3742, "step": 11363 }, { "epoch": 0.3135577728954911, "grad_norm": 0.0033939266577363014, "learning_rate": 0.001, "loss": 0.409, "step": 11364 }, { "epoch": 0.31358536509655544, "grad_norm": 0.0036061664577573538, "learning_rate": 0.001, "loss": 0.4467, "step": 11365 }, { "epoch": 0.31361295729761984, "grad_norm": 0.005402529612183571, "learning_rate": 0.001, "loss": 0.3777, "step": 11366 }, { "epoch": 0.3136405494986842, "grad_norm": 0.005045123398303986, "learning_rate": 0.001, "loss": 0.3801, "step": 11367 }, { "epoch": 0.31366814169974855, "grad_norm": 0.00310247833840549, "learning_rate": 0.001, "loss": 0.4249, "step": 11368 }, { "epoch": 0.31369573390081296, "grad_norm": 0.002901574596762657, "learning_rate": 0.001, "loss": 0.4057, "step": 11369 }, { "epoch": 0.3137233261018773, "grad_norm": 0.0029007482808083296, "learning_rate": 0.001, "loss": 0.3855, "step": 11370 }, { "epoch": 0.31375091830294166, "grad_norm": 0.0036781311500817537, "learning_rate": 0.001, "loss": 0.4041, "step": 11371 }, { "epoch": 0.31377851050400607, "grad_norm": 0.004679156932979822, "learning_rate": 0.001, "loss": 0.3871, "step": 11372 }, { "epoch": 0.3138061027050704, "grad_norm": 0.002834693994373083, "learning_rate": 0.001, "loss": 0.4189, "step": 11373 }, { "epoch": 0.3138336949061348, "grad_norm": 0.0029626016039401293, "learning_rate": 0.001, "loss": 0.4246, "step": 11374 }, { "epoch": 0.3138612871071991, "grad_norm": 0.0032038032077252865, "learning_rate": 0.001, "loss": 0.3963, "step": 11375 }, { "epoch": 0.31388887930826354, "grad_norm": 0.004681065212935209, "learning_rate": 0.001, "loss": 0.4177, "step": 11376 }, { "epoch": 0.3139164715093279, "grad_norm": 0.004868704825639725, "learning_rate": 0.001, "loss": 0.4064, "step": 11377 }, { "epoch": 0.31394406371039224, "grad_norm": 0.005584596190601587, "learning_rate": 0.001, "loss": 0.3819, "step": 11378 }, { "epoch": 0.31397165591145665, "grad_norm": 0.002676298376172781, "learning_rate": 0.001, "loss": 0.4062, "step": 11379 }, { "epoch": 0.313999248112521, "grad_norm": 0.003069305093958974, "learning_rate": 0.001, "loss": 0.3809, "step": 11380 }, { "epoch": 0.31402684031358535, "grad_norm": 0.002781213726848364, "learning_rate": 0.001, "loss": 0.4287, "step": 11381 }, { "epoch": 0.31405443251464976, "grad_norm": 0.0022207528818398714, "learning_rate": 0.001, "loss": 0.4073, "step": 11382 }, { "epoch": 0.3140820247157141, "grad_norm": 0.0025241211988031864, "learning_rate": 0.001, "loss": 0.3887, "step": 11383 }, { "epoch": 0.31410961691677847, "grad_norm": 0.004623900167644024, "learning_rate": 0.001, "loss": 0.3849, "step": 11384 }, { "epoch": 0.3141372091178428, "grad_norm": 0.0028964534867554903, "learning_rate": 0.001, "loss": 0.4217, "step": 11385 }, { "epoch": 0.3141648013189072, "grad_norm": 0.01115355733782053, "learning_rate": 0.001, "loss": 0.4417, "step": 11386 }, { "epoch": 0.3141923935199716, "grad_norm": 0.003189532784745097, "learning_rate": 0.001, "loss": 0.3955, "step": 11387 }, { "epoch": 0.31421998572103593, "grad_norm": 0.005057721398770809, "learning_rate": 0.001, "loss": 0.4194, "step": 11388 }, { "epoch": 0.31424757792210034, "grad_norm": 0.005494655575603247, "learning_rate": 0.001, "loss": 0.4234, "step": 11389 }, { "epoch": 0.3142751701231647, "grad_norm": 0.0033896707464009523, "learning_rate": 0.001, "loss": 0.3345, "step": 11390 }, { "epoch": 0.31430276232422905, "grad_norm": 0.0024348304141312838, "learning_rate": 0.001, "loss": 0.413, "step": 11391 }, { "epoch": 0.3143303545252934, "grad_norm": 0.002852590288966894, "learning_rate": 0.001, "loss": 0.4514, "step": 11392 }, { "epoch": 0.3143579467263578, "grad_norm": 0.0025470538530498743, "learning_rate": 0.001, "loss": 0.3871, "step": 11393 }, { "epoch": 0.31438553892742216, "grad_norm": 0.003952103201299906, "learning_rate": 0.001, "loss": 0.3673, "step": 11394 }, { "epoch": 0.3144131311284865, "grad_norm": 0.0030062925070524216, "learning_rate": 0.001, "loss": 0.4349, "step": 11395 }, { "epoch": 0.3144407233295509, "grad_norm": 0.008497409522533417, "learning_rate": 0.001, "loss": 0.4031, "step": 11396 }, { "epoch": 0.31446831553061527, "grad_norm": 0.0024450889322906733, "learning_rate": 0.001, "loss": 0.3983, "step": 11397 }, { "epoch": 0.3144959077316796, "grad_norm": 0.002428788226097822, "learning_rate": 0.001, "loss": 0.3698, "step": 11398 }, { "epoch": 0.31452349993274403, "grad_norm": 0.0026488315779715776, "learning_rate": 0.001, "loss": 0.4082, "step": 11399 }, { "epoch": 0.3145510921338084, "grad_norm": 0.003063708543777466, "learning_rate": 0.001, "loss": 0.3891, "step": 11400 }, { "epoch": 0.31457868433487274, "grad_norm": 0.0036700747441500425, "learning_rate": 0.001, "loss": 0.3839, "step": 11401 }, { "epoch": 0.3146062765359371, "grad_norm": 0.0024674683809280396, "learning_rate": 0.001, "loss": 0.3815, "step": 11402 }, { "epoch": 0.3146338687370015, "grad_norm": 0.002942080609500408, "learning_rate": 0.001, "loss": 0.4228, "step": 11403 }, { "epoch": 0.31466146093806585, "grad_norm": 0.0024689743295311928, "learning_rate": 0.001, "loss": 0.3945, "step": 11404 }, { "epoch": 0.3146890531391302, "grad_norm": 0.003294649999588728, "learning_rate": 0.001, "loss": 0.4543, "step": 11405 }, { "epoch": 0.3147166453401946, "grad_norm": 0.0027752909809350967, "learning_rate": 0.001, "loss": 0.3795, "step": 11406 }, { "epoch": 0.31474423754125896, "grad_norm": 0.0031409088987857103, "learning_rate": 0.001, "loss": 0.3476, "step": 11407 }, { "epoch": 0.3147718297423233, "grad_norm": 0.004619597923010588, "learning_rate": 0.001, "loss": 0.3996, "step": 11408 }, { "epoch": 0.3147994219433877, "grad_norm": 0.005096206441521645, "learning_rate": 0.001, "loss": 0.4164, "step": 11409 }, { "epoch": 0.3148270141444521, "grad_norm": 0.003756036050617695, "learning_rate": 0.001, "loss": 0.3912, "step": 11410 }, { "epoch": 0.31485460634551643, "grad_norm": 0.0021125138737261295, "learning_rate": 0.001, "loss": 0.4238, "step": 11411 }, { "epoch": 0.3148821985465808, "grad_norm": 0.002177385613322258, "learning_rate": 0.001, "loss": 0.381, "step": 11412 }, { "epoch": 0.3149097907476452, "grad_norm": 0.0027410865295678377, "learning_rate": 0.001, "loss": 0.4251, "step": 11413 }, { "epoch": 0.31493738294870954, "grad_norm": 0.003590056672692299, "learning_rate": 0.001, "loss": 0.4169, "step": 11414 }, { "epoch": 0.3149649751497739, "grad_norm": 0.002279781037941575, "learning_rate": 0.001, "loss": 0.3946, "step": 11415 }, { "epoch": 0.3149925673508383, "grad_norm": 0.0023333400022238493, "learning_rate": 0.001, "loss": 0.4085, "step": 11416 }, { "epoch": 0.31502015955190266, "grad_norm": 0.004322835244238377, "learning_rate": 0.001, "loss": 0.3835, "step": 11417 }, { "epoch": 0.315047751752967, "grad_norm": 0.0034005579072982073, "learning_rate": 0.001, "loss": 0.4159, "step": 11418 }, { "epoch": 0.3150753439540314, "grad_norm": 0.007161610759794712, "learning_rate": 0.001, "loss": 0.4044, "step": 11419 }, { "epoch": 0.31510293615509577, "grad_norm": 0.007838692516088486, "learning_rate": 0.001, "loss": 0.4317, "step": 11420 }, { "epoch": 0.3151305283561601, "grad_norm": 0.004573942627757788, "learning_rate": 0.001, "loss": 0.3715, "step": 11421 }, { "epoch": 0.3151581205572245, "grad_norm": 0.005383932497352362, "learning_rate": 0.001, "loss": 0.4437, "step": 11422 }, { "epoch": 0.3151857127582889, "grad_norm": 0.004969790577888489, "learning_rate": 0.001, "loss": 0.3845, "step": 11423 }, { "epoch": 0.31521330495935324, "grad_norm": 0.0022800557781010866, "learning_rate": 0.001, "loss": 0.3946, "step": 11424 }, { "epoch": 0.3152408971604176, "grad_norm": 0.0025320611894130707, "learning_rate": 0.001, "loss": 0.4653, "step": 11425 }, { "epoch": 0.315268489361482, "grad_norm": 0.002388893160969019, "learning_rate": 0.001, "loss": 0.4077, "step": 11426 }, { "epoch": 0.31529608156254635, "grad_norm": 0.008318680338561535, "learning_rate": 0.001, "loss": 0.397, "step": 11427 }, { "epoch": 0.3153236737636107, "grad_norm": 0.007277218624949455, "learning_rate": 0.001, "loss": 0.3586, "step": 11428 }, { "epoch": 0.3153512659646751, "grad_norm": 0.0038948303554207087, "learning_rate": 0.001, "loss": 0.3747, "step": 11429 }, { "epoch": 0.31537885816573946, "grad_norm": 0.003630647901445627, "learning_rate": 0.001, "loss": 0.4183, "step": 11430 }, { "epoch": 0.3154064503668038, "grad_norm": 0.002882918808609247, "learning_rate": 0.001, "loss": 0.4038, "step": 11431 }, { "epoch": 0.31543404256786817, "grad_norm": 0.0030421330593526363, "learning_rate": 0.001, "loss": 0.367, "step": 11432 }, { "epoch": 0.3154616347689326, "grad_norm": 0.00291895167902112, "learning_rate": 0.001, "loss": 0.427, "step": 11433 }, { "epoch": 0.3154892269699969, "grad_norm": 0.003857139963656664, "learning_rate": 0.001, "loss": 0.4236, "step": 11434 }, { "epoch": 0.3155168191710613, "grad_norm": 0.0034651218447834253, "learning_rate": 0.001, "loss": 0.4063, "step": 11435 }, { "epoch": 0.3155444113721257, "grad_norm": 0.0023657598067075014, "learning_rate": 0.001, "loss": 0.4227, "step": 11436 }, { "epoch": 0.31557200357319004, "grad_norm": 0.002382135484367609, "learning_rate": 0.001, "loss": 0.3864, "step": 11437 }, { "epoch": 0.3155995957742544, "grad_norm": 0.0034222460817545652, "learning_rate": 0.001, "loss": 0.418, "step": 11438 }, { "epoch": 0.3156271879753188, "grad_norm": 0.004517734050750732, "learning_rate": 0.001, "loss": 0.3899, "step": 11439 }, { "epoch": 0.31565478017638315, "grad_norm": 0.005458935163915157, "learning_rate": 0.001, "loss": 0.4125, "step": 11440 }, { "epoch": 0.3156823723774475, "grad_norm": 0.00377734680660069, "learning_rate": 0.001, "loss": 0.3623, "step": 11441 }, { "epoch": 0.31570996457851186, "grad_norm": 0.006429413799196482, "learning_rate": 0.001, "loss": 0.3875, "step": 11442 }, { "epoch": 0.31573755677957627, "grad_norm": 0.002111859619617462, "learning_rate": 0.001, "loss": 0.3918, "step": 11443 }, { "epoch": 0.3157651489806406, "grad_norm": 0.005765847861766815, "learning_rate": 0.001, "loss": 0.4058, "step": 11444 }, { "epoch": 0.315792741181705, "grad_norm": 0.0038511583115905523, "learning_rate": 0.001, "loss": 0.3508, "step": 11445 }, { "epoch": 0.3158203333827694, "grad_norm": 0.006009151693433523, "learning_rate": 0.001, "loss": 0.4063, "step": 11446 }, { "epoch": 0.31584792558383373, "grad_norm": 0.0027272317092865705, "learning_rate": 0.001, "loss": 0.4205, "step": 11447 }, { "epoch": 0.3158755177848981, "grad_norm": 0.0022494541481137276, "learning_rate": 0.001, "loss": 0.4016, "step": 11448 }, { "epoch": 0.3159031099859625, "grad_norm": 0.0025459870230406523, "learning_rate": 0.001, "loss": 0.3924, "step": 11449 }, { "epoch": 0.31593070218702685, "grad_norm": 0.008022737689316273, "learning_rate": 0.001, "loss": 0.3628, "step": 11450 }, { "epoch": 0.3159582943880912, "grad_norm": 0.0043347179889678955, "learning_rate": 0.001, "loss": 0.3537, "step": 11451 }, { "epoch": 0.31598588658915555, "grad_norm": 0.003317405702546239, "learning_rate": 0.001, "loss": 0.4351, "step": 11452 }, { "epoch": 0.31601347879021996, "grad_norm": 0.003171857912093401, "learning_rate": 0.001, "loss": 0.3757, "step": 11453 }, { "epoch": 0.3160410709912843, "grad_norm": 0.0032373506110161543, "learning_rate": 0.001, "loss": 0.4016, "step": 11454 }, { "epoch": 0.31606866319234866, "grad_norm": 0.003428233554586768, "learning_rate": 0.001, "loss": 0.3685, "step": 11455 }, { "epoch": 0.3160962553934131, "grad_norm": 0.0027770015876740217, "learning_rate": 0.001, "loss": 0.4151, "step": 11456 }, { "epoch": 0.3161238475944774, "grad_norm": 0.002452113199979067, "learning_rate": 0.001, "loss": 0.4296, "step": 11457 }, { "epoch": 0.3161514397955418, "grad_norm": 0.0024971971288323402, "learning_rate": 0.001, "loss": 0.413, "step": 11458 }, { "epoch": 0.3161790319966062, "grad_norm": 0.002314744982868433, "learning_rate": 0.001, "loss": 0.4278, "step": 11459 }, { "epoch": 0.31620662419767054, "grad_norm": 0.004431735258549452, "learning_rate": 0.001, "loss": 0.3828, "step": 11460 }, { "epoch": 0.3162342163987349, "grad_norm": 0.007284036837518215, "learning_rate": 0.001, "loss": 0.3353, "step": 11461 }, { "epoch": 0.31626180859979924, "grad_norm": 0.004431730601936579, "learning_rate": 0.001, "loss": 0.3701, "step": 11462 }, { "epoch": 0.31628940080086365, "grad_norm": 0.003810502588748932, "learning_rate": 0.001, "loss": 0.3924, "step": 11463 }, { "epoch": 0.316316993001928, "grad_norm": 0.004132286179810762, "learning_rate": 0.001, "loss": 0.379, "step": 11464 }, { "epoch": 0.31634458520299236, "grad_norm": 0.0026423779781907797, "learning_rate": 0.001, "loss": 0.3951, "step": 11465 }, { "epoch": 0.31637217740405676, "grad_norm": 0.0030175880528986454, "learning_rate": 0.001, "loss": 0.3806, "step": 11466 }, { "epoch": 0.3163997696051211, "grad_norm": 0.0023432145826518536, "learning_rate": 0.001, "loss": 0.422, "step": 11467 }, { "epoch": 0.31642736180618547, "grad_norm": 0.004060344770550728, "learning_rate": 0.001, "loss": 0.3897, "step": 11468 }, { "epoch": 0.3164549540072499, "grad_norm": 0.00273421430028975, "learning_rate": 0.001, "loss": 0.3729, "step": 11469 }, { "epoch": 0.31648254620831423, "grad_norm": 0.0024940611328929663, "learning_rate": 0.001, "loss": 0.3923, "step": 11470 }, { "epoch": 0.3165101384093786, "grad_norm": 0.005460789427161217, "learning_rate": 0.001, "loss": 0.4153, "step": 11471 }, { "epoch": 0.31653773061044294, "grad_norm": 0.0026238500140607357, "learning_rate": 0.001, "loss": 0.4299, "step": 11472 }, { "epoch": 0.31656532281150734, "grad_norm": 0.005485900212079287, "learning_rate": 0.001, "loss": 0.4071, "step": 11473 }, { "epoch": 0.3165929150125717, "grad_norm": 0.0072882408276200294, "learning_rate": 0.001, "loss": 0.4056, "step": 11474 }, { "epoch": 0.31662050721363605, "grad_norm": 0.002355532720685005, "learning_rate": 0.001, "loss": 0.4526, "step": 11475 }, { "epoch": 0.31664809941470046, "grad_norm": 0.0029103090055286884, "learning_rate": 0.001, "loss": 0.4019, "step": 11476 }, { "epoch": 0.3166756916157648, "grad_norm": 0.005752061028033495, "learning_rate": 0.001, "loss": 0.4086, "step": 11477 }, { "epoch": 0.31670328381682916, "grad_norm": 0.0035546228755265474, "learning_rate": 0.001, "loss": 0.4, "step": 11478 }, { "epoch": 0.31673087601789357, "grad_norm": 0.007723371963948011, "learning_rate": 0.001, "loss": 0.4098, "step": 11479 }, { "epoch": 0.3167584682189579, "grad_norm": 0.005489727947860956, "learning_rate": 0.001, "loss": 0.3787, "step": 11480 }, { "epoch": 0.3167860604200223, "grad_norm": 0.0035039815120399, "learning_rate": 0.001, "loss": 0.4038, "step": 11481 }, { "epoch": 0.3168136526210866, "grad_norm": 0.00970650278031826, "learning_rate": 0.001, "loss": 0.4075, "step": 11482 }, { "epoch": 0.31684124482215104, "grad_norm": 0.002773486776277423, "learning_rate": 0.001, "loss": 0.4455, "step": 11483 }, { "epoch": 0.3168688370232154, "grad_norm": 0.002964557381346822, "learning_rate": 0.001, "loss": 0.389, "step": 11484 }, { "epoch": 0.31689642922427974, "grad_norm": 0.002768107457086444, "learning_rate": 0.001, "loss": 0.4399, "step": 11485 }, { "epoch": 0.31692402142534415, "grad_norm": 0.003310150234028697, "learning_rate": 0.001, "loss": 0.3921, "step": 11486 }, { "epoch": 0.3169516136264085, "grad_norm": 0.0038422702345997095, "learning_rate": 0.001, "loss": 0.3947, "step": 11487 }, { "epoch": 0.31697920582747285, "grad_norm": 0.0059617673978209496, "learning_rate": 0.001, "loss": 0.3932, "step": 11488 }, { "epoch": 0.3170067980285372, "grad_norm": 0.004942035768181086, "learning_rate": 0.001, "loss": 0.4235, "step": 11489 }, { "epoch": 0.3170343902296016, "grad_norm": 0.005482214502990246, "learning_rate": 0.001, "loss": 0.3917, "step": 11490 }, { "epoch": 0.31706198243066597, "grad_norm": 0.004851524252444506, "learning_rate": 0.001, "loss": 0.3555, "step": 11491 }, { "epoch": 0.3170895746317303, "grad_norm": 0.0029763453640043736, "learning_rate": 0.001, "loss": 0.4055, "step": 11492 }, { "epoch": 0.31711716683279473, "grad_norm": 0.0036718971095979214, "learning_rate": 0.001, "loss": 0.3886, "step": 11493 }, { "epoch": 0.3171447590338591, "grad_norm": 0.003948654048144817, "learning_rate": 0.001, "loss": 0.4239, "step": 11494 }, { "epoch": 0.31717235123492343, "grad_norm": 0.011904267594218254, "learning_rate": 0.001, "loss": 0.3985, "step": 11495 }, { "epoch": 0.31719994343598784, "grad_norm": 0.003118073334917426, "learning_rate": 0.001, "loss": 0.383, "step": 11496 }, { "epoch": 0.3172275356370522, "grad_norm": 0.0038996157236397266, "learning_rate": 0.001, "loss": 0.3929, "step": 11497 }, { "epoch": 0.31725512783811655, "grad_norm": 0.003080842550843954, "learning_rate": 0.001, "loss": 0.379, "step": 11498 }, { "epoch": 0.3172827200391809, "grad_norm": 0.0027819147799164057, "learning_rate": 0.001, "loss": 0.3748, "step": 11499 }, { "epoch": 0.3173103122402453, "grad_norm": 0.0029706538189202547, "learning_rate": 0.001, "loss": 0.3841, "step": 11500 }, { "epoch": 0.3173103122402453, "eval_runtime": 23.5592, "eval_samples_per_second": 1.358, "eval_steps_per_second": 0.17, "step": 11500 }, { "epoch": 0.31733790444130966, "grad_norm": 0.002240256406366825, "learning_rate": 0.001, "loss": 0.4069, "step": 11501 }, { "epoch": 0.317365496642374, "grad_norm": 0.0034430986270308495, "learning_rate": 0.001, "loss": 0.3758, "step": 11502 }, { "epoch": 0.3173930888434384, "grad_norm": 0.01547628827393055, "learning_rate": 0.001, "loss": 0.3948, "step": 11503 }, { "epoch": 0.3174206810445028, "grad_norm": 0.003593276022002101, "learning_rate": 0.001, "loss": 0.41, "step": 11504 }, { "epoch": 0.3174482732455671, "grad_norm": 0.00244096084497869, "learning_rate": 0.001, "loss": 0.4129, "step": 11505 }, { "epoch": 0.31747586544663153, "grad_norm": 0.002778639318421483, "learning_rate": 0.001, "loss": 0.3911, "step": 11506 }, { "epoch": 0.3175034576476959, "grad_norm": 0.018177034333348274, "learning_rate": 0.001, "loss": 0.3928, "step": 11507 }, { "epoch": 0.31753104984876024, "grad_norm": 0.005340322386473417, "learning_rate": 0.001, "loss": 0.4106, "step": 11508 }, { "epoch": 0.3175586420498246, "grad_norm": 0.0037258395459502935, "learning_rate": 0.001, "loss": 0.4093, "step": 11509 }, { "epoch": 0.317586234250889, "grad_norm": 0.004941198974847794, "learning_rate": 0.001, "loss": 0.3956, "step": 11510 }, { "epoch": 0.31761382645195335, "grad_norm": 0.0029092850163578987, "learning_rate": 0.001, "loss": 0.3928, "step": 11511 }, { "epoch": 0.3176414186530177, "grad_norm": 0.0023247981444001198, "learning_rate": 0.001, "loss": 0.3985, "step": 11512 }, { "epoch": 0.3176690108540821, "grad_norm": 0.0024109010118991137, "learning_rate": 0.001, "loss": 0.3925, "step": 11513 }, { "epoch": 0.31769660305514646, "grad_norm": 0.004136643372476101, "learning_rate": 0.001, "loss": 0.4308, "step": 11514 }, { "epoch": 0.3177241952562108, "grad_norm": 0.002965535270050168, "learning_rate": 0.001, "loss": 0.3983, "step": 11515 }, { "epoch": 0.3177517874572752, "grad_norm": 0.0028476836159825325, "learning_rate": 0.001, "loss": 0.4078, "step": 11516 }, { "epoch": 0.3177793796583396, "grad_norm": 0.002511124825105071, "learning_rate": 0.001, "loss": 0.3867, "step": 11517 }, { "epoch": 0.31780697185940393, "grad_norm": 0.002977583557367325, "learning_rate": 0.001, "loss": 0.403, "step": 11518 }, { "epoch": 0.3178345640604683, "grad_norm": 0.0023715696297585964, "learning_rate": 0.001, "loss": 0.3795, "step": 11519 }, { "epoch": 0.3178621562615327, "grad_norm": 0.007563414517790079, "learning_rate": 0.001, "loss": 0.388, "step": 11520 }, { "epoch": 0.31788974846259704, "grad_norm": 0.004035325720906258, "learning_rate": 0.001, "loss": 0.379, "step": 11521 }, { "epoch": 0.3179173406636614, "grad_norm": 0.00366159132681787, "learning_rate": 0.001, "loss": 0.4218, "step": 11522 }, { "epoch": 0.3179449328647258, "grad_norm": 0.0030379300005733967, "learning_rate": 0.001, "loss": 0.4373, "step": 11523 }, { "epoch": 0.31797252506579016, "grad_norm": 0.002500406000763178, "learning_rate": 0.001, "loss": 0.4019, "step": 11524 }, { "epoch": 0.3180001172668545, "grad_norm": 0.0029437800403684378, "learning_rate": 0.001, "loss": 0.4073, "step": 11525 }, { "epoch": 0.3180277094679189, "grad_norm": 0.003492504358291626, "learning_rate": 0.001, "loss": 0.3619, "step": 11526 }, { "epoch": 0.31805530166898327, "grad_norm": 0.002901204163208604, "learning_rate": 0.001, "loss": 0.3956, "step": 11527 }, { "epoch": 0.3180828938700476, "grad_norm": 0.003703712485730648, "learning_rate": 0.001, "loss": 0.4045, "step": 11528 }, { "epoch": 0.318110486071112, "grad_norm": 0.003048625076189637, "learning_rate": 0.001, "loss": 0.3834, "step": 11529 }, { "epoch": 0.3181380782721764, "grad_norm": 0.0022534143645316362, "learning_rate": 0.001, "loss": 0.4213, "step": 11530 }, { "epoch": 0.31816567047324074, "grad_norm": 0.0026876600459218025, "learning_rate": 0.001, "loss": 0.3785, "step": 11531 }, { "epoch": 0.3181932626743051, "grad_norm": 0.00236628670245409, "learning_rate": 0.001, "loss": 0.4376, "step": 11532 }, { "epoch": 0.3182208548753695, "grad_norm": 0.004099288955330849, "learning_rate": 0.001, "loss": 0.3916, "step": 11533 }, { "epoch": 0.31824844707643385, "grad_norm": 0.0041442555375397205, "learning_rate": 0.001, "loss": 0.358, "step": 11534 }, { "epoch": 0.3182760392774982, "grad_norm": 0.003338357200846076, "learning_rate": 0.001, "loss": 0.4307, "step": 11535 }, { "epoch": 0.3183036314785626, "grad_norm": 0.002358140889555216, "learning_rate": 0.001, "loss": 0.4438, "step": 11536 }, { "epoch": 0.31833122367962696, "grad_norm": 0.007097299210727215, "learning_rate": 0.001, "loss": 0.3917, "step": 11537 }, { "epoch": 0.3183588158806913, "grad_norm": 0.003606355981901288, "learning_rate": 0.001, "loss": 0.4283, "step": 11538 }, { "epoch": 0.31838640808175567, "grad_norm": 0.0027023248840123415, "learning_rate": 0.001, "loss": 0.379, "step": 11539 }, { "epoch": 0.3184140002828201, "grad_norm": 0.0069968136958777905, "learning_rate": 0.001, "loss": 0.3748, "step": 11540 }, { "epoch": 0.31844159248388443, "grad_norm": 0.0028446821961551905, "learning_rate": 0.001, "loss": 0.4025, "step": 11541 }, { "epoch": 0.3184691846849488, "grad_norm": 0.003783198306336999, "learning_rate": 0.001, "loss": 0.3773, "step": 11542 }, { "epoch": 0.3184967768860132, "grad_norm": 0.0042613414116203785, "learning_rate": 0.001, "loss": 0.4125, "step": 11543 }, { "epoch": 0.31852436908707754, "grad_norm": 0.023959210142493248, "learning_rate": 0.001, "loss": 0.4045, "step": 11544 }, { "epoch": 0.3185519612881419, "grad_norm": 0.02399979531764984, "learning_rate": 0.001, "loss": 0.3862, "step": 11545 }, { "epoch": 0.3185795534892063, "grad_norm": 0.003035551868379116, "learning_rate": 0.001, "loss": 0.427, "step": 11546 }, { "epoch": 0.31860714569027065, "grad_norm": 0.0028156766202300787, "learning_rate": 0.001, "loss": 0.394, "step": 11547 }, { "epoch": 0.318634737891335, "grad_norm": 0.0029186957981437445, "learning_rate": 0.001, "loss": 0.4204, "step": 11548 }, { "epoch": 0.31866233009239936, "grad_norm": 0.0037356752436608076, "learning_rate": 0.001, "loss": 0.4317, "step": 11549 }, { "epoch": 0.31868992229346377, "grad_norm": 0.0047831544652581215, "learning_rate": 0.001, "loss": 0.3895, "step": 11550 }, { "epoch": 0.3187175144945281, "grad_norm": 0.0023849967401474714, "learning_rate": 0.001, "loss": 0.3931, "step": 11551 }, { "epoch": 0.3187451066955925, "grad_norm": 0.004315240308642387, "learning_rate": 0.001, "loss": 0.3765, "step": 11552 }, { "epoch": 0.3187726988966569, "grad_norm": 0.003803263884037733, "learning_rate": 0.001, "loss": 0.393, "step": 11553 }, { "epoch": 0.31880029109772123, "grad_norm": 0.004018161911517382, "learning_rate": 0.001, "loss": 0.3923, "step": 11554 }, { "epoch": 0.3188278832987856, "grad_norm": 0.0036311408039182425, "learning_rate": 0.001, "loss": 0.3995, "step": 11555 }, { "epoch": 0.31885547549985, "grad_norm": 0.004630459472537041, "learning_rate": 0.001, "loss": 0.4245, "step": 11556 }, { "epoch": 0.31888306770091435, "grad_norm": 0.0048345946706831455, "learning_rate": 0.001, "loss": 0.3803, "step": 11557 }, { "epoch": 0.3189106599019787, "grad_norm": 0.005161342676728964, "learning_rate": 0.001, "loss": 0.3893, "step": 11558 }, { "epoch": 0.31893825210304305, "grad_norm": 0.0023576943203806877, "learning_rate": 0.001, "loss": 0.4402, "step": 11559 }, { "epoch": 0.31896584430410746, "grad_norm": 0.004130503162741661, "learning_rate": 0.001, "loss": 0.3545, "step": 11560 }, { "epoch": 0.3189934365051718, "grad_norm": 0.005308313295245171, "learning_rate": 0.001, "loss": 0.3788, "step": 11561 }, { "epoch": 0.31902102870623616, "grad_norm": 0.0037142925430089235, "learning_rate": 0.001, "loss": 0.3961, "step": 11562 }, { "epoch": 0.3190486209073006, "grad_norm": 0.0037768762558698654, "learning_rate": 0.001, "loss": 0.3944, "step": 11563 }, { "epoch": 0.3190762131083649, "grad_norm": 0.004423100501298904, "learning_rate": 0.001, "loss": 0.3734, "step": 11564 }, { "epoch": 0.3191038053094293, "grad_norm": 0.0066397832706570625, "learning_rate": 0.001, "loss": 0.4044, "step": 11565 }, { "epoch": 0.3191313975104937, "grad_norm": 0.003907995298504829, "learning_rate": 0.001, "loss": 0.3999, "step": 11566 }, { "epoch": 0.31915898971155804, "grad_norm": 0.003112133825197816, "learning_rate": 0.001, "loss": 0.4219, "step": 11567 }, { "epoch": 0.3191865819126224, "grad_norm": 0.002988254651427269, "learning_rate": 0.001, "loss": 0.3865, "step": 11568 }, { "epoch": 0.31921417411368674, "grad_norm": 0.023827673867344856, "learning_rate": 0.001, "loss": 0.4147, "step": 11569 }, { "epoch": 0.31924176631475115, "grad_norm": 0.0068152598105371, "learning_rate": 0.001, "loss": 0.433, "step": 11570 }, { "epoch": 0.3192693585158155, "grad_norm": 0.004217714536935091, "learning_rate": 0.001, "loss": 0.3661, "step": 11571 }, { "epoch": 0.31929695071687986, "grad_norm": 0.002245552372187376, "learning_rate": 0.001, "loss": 0.3537, "step": 11572 }, { "epoch": 0.31932454291794427, "grad_norm": 0.0026237708516418934, "learning_rate": 0.001, "loss": 0.3694, "step": 11573 }, { "epoch": 0.3193521351190086, "grad_norm": 0.0031141149811446667, "learning_rate": 0.001, "loss": 0.4304, "step": 11574 }, { "epoch": 0.31937972732007297, "grad_norm": 0.004059195052832365, "learning_rate": 0.001, "loss": 0.4158, "step": 11575 }, { "epoch": 0.3194073195211373, "grad_norm": 0.002976267132908106, "learning_rate": 0.001, "loss": 0.4212, "step": 11576 }, { "epoch": 0.31943491172220173, "grad_norm": 0.0048133903183043, "learning_rate": 0.001, "loss": 0.3741, "step": 11577 }, { "epoch": 0.3194625039232661, "grad_norm": 0.002898376202210784, "learning_rate": 0.001, "loss": 0.3809, "step": 11578 }, { "epoch": 0.31949009612433044, "grad_norm": 0.0033135826233774424, "learning_rate": 0.001, "loss": 0.3935, "step": 11579 }, { "epoch": 0.31951768832539484, "grad_norm": 0.005857154726982117, "learning_rate": 0.001, "loss": 0.3859, "step": 11580 }, { "epoch": 0.3195452805264592, "grad_norm": 0.0028846007771790028, "learning_rate": 0.001, "loss": 0.3835, "step": 11581 }, { "epoch": 0.31957287272752355, "grad_norm": 0.002914070850238204, "learning_rate": 0.001, "loss": 0.3684, "step": 11582 }, { "epoch": 0.31960046492858796, "grad_norm": 0.002994397422298789, "learning_rate": 0.001, "loss": 0.3825, "step": 11583 }, { "epoch": 0.3196280571296523, "grad_norm": 0.0033264674711972475, "learning_rate": 0.001, "loss": 0.4124, "step": 11584 }, { "epoch": 0.31965564933071666, "grad_norm": 0.002203144831582904, "learning_rate": 0.001, "loss": 0.4649, "step": 11585 }, { "epoch": 0.319683241531781, "grad_norm": 0.004528568591922522, "learning_rate": 0.001, "loss": 0.3861, "step": 11586 }, { "epoch": 0.3197108337328454, "grad_norm": 0.003971675876528025, "learning_rate": 0.001, "loss": 0.387, "step": 11587 }, { "epoch": 0.3197384259339098, "grad_norm": 0.003969928715378046, "learning_rate": 0.001, "loss": 0.3971, "step": 11588 }, { "epoch": 0.31976601813497413, "grad_norm": 0.0036615943536162376, "learning_rate": 0.001, "loss": 0.3822, "step": 11589 }, { "epoch": 0.31979361033603854, "grad_norm": 0.0023915197234600782, "learning_rate": 0.001, "loss": 0.4089, "step": 11590 }, { "epoch": 0.3198212025371029, "grad_norm": 0.0024923242162913084, "learning_rate": 0.001, "loss": 0.4314, "step": 11591 }, { "epoch": 0.31984879473816724, "grad_norm": 0.002602427499368787, "learning_rate": 0.001, "loss": 0.3861, "step": 11592 }, { "epoch": 0.31987638693923165, "grad_norm": 0.005880540236830711, "learning_rate": 0.001, "loss": 0.3874, "step": 11593 }, { "epoch": 0.319903979140296, "grad_norm": 0.00876854732632637, "learning_rate": 0.001, "loss": 0.3895, "step": 11594 }, { "epoch": 0.31993157134136035, "grad_norm": 0.006111782975494862, "learning_rate": 0.001, "loss": 0.4055, "step": 11595 }, { "epoch": 0.3199591635424247, "grad_norm": 0.006039989646524191, "learning_rate": 0.001, "loss": 0.4061, "step": 11596 }, { "epoch": 0.3199867557434891, "grad_norm": 0.008590095676481724, "learning_rate": 0.001, "loss": 0.3448, "step": 11597 }, { "epoch": 0.32001434794455347, "grad_norm": 0.015925157815217972, "learning_rate": 0.001, "loss": 0.409, "step": 11598 }, { "epoch": 0.3200419401456178, "grad_norm": 0.003673270810395479, "learning_rate": 0.001, "loss": 0.3928, "step": 11599 }, { "epoch": 0.32006953234668223, "grad_norm": 0.0026890907902270555, "learning_rate": 0.001, "loss": 0.4251, "step": 11600 }, { "epoch": 0.3200971245477466, "grad_norm": 0.0037518092431128025, "learning_rate": 0.001, "loss": 0.4057, "step": 11601 }, { "epoch": 0.32012471674881093, "grad_norm": 0.003528765868395567, "learning_rate": 0.001, "loss": 0.4144, "step": 11602 }, { "epoch": 0.32015230894987534, "grad_norm": 0.0036971168592572212, "learning_rate": 0.001, "loss": 0.3964, "step": 11603 }, { "epoch": 0.3201799011509397, "grad_norm": 0.003124906914308667, "learning_rate": 0.001, "loss": 0.3936, "step": 11604 }, { "epoch": 0.32020749335200405, "grad_norm": 0.0033891245257109404, "learning_rate": 0.001, "loss": 0.4146, "step": 11605 }, { "epoch": 0.3202350855530684, "grad_norm": 0.002698018215596676, "learning_rate": 0.001, "loss": 0.4173, "step": 11606 }, { "epoch": 0.3202626777541328, "grad_norm": 0.0034322156570851803, "learning_rate": 0.001, "loss": 0.3921, "step": 11607 }, { "epoch": 0.32029026995519716, "grad_norm": 0.0033172587864100933, "learning_rate": 0.001, "loss": 0.3796, "step": 11608 }, { "epoch": 0.3203178621562615, "grad_norm": 0.0030292505398392677, "learning_rate": 0.001, "loss": 0.3811, "step": 11609 }, { "epoch": 0.3203454543573259, "grad_norm": 0.0033198799937963486, "learning_rate": 0.001, "loss": 0.4154, "step": 11610 }, { "epoch": 0.3203730465583903, "grad_norm": 0.003341773757711053, "learning_rate": 0.001, "loss": 0.3965, "step": 11611 }, { "epoch": 0.3204006387594546, "grad_norm": 0.004839817062020302, "learning_rate": 0.001, "loss": 0.3685, "step": 11612 }, { "epoch": 0.32042823096051903, "grad_norm": 0.0031633905600756407, "learning_rate": 0.001, "loss": 0.3595, "step": 11613 }, { "epoch": 0.3204558231615834, "grad_norm": 0.011446718126535416, "learning_rate": 0.001, "loss": 0.4126, "step": 11614 }, { "epoch": 0.32048341536264774, "grad_norm": 0.011381502263247967, "learning_rate": 0.001, "loss": 0.3986, "step": 11615 }, { "epoch": 0.3205110075637121, "grad_norm": 0.013346023857593536, "learning_rate": 0.001, "loss": 0.4054, "step": 11616 }, { "epoch": 0.3205385997647765, "grad_norm": 0.004878256935626268, "learning_rate": 0.001, "loss": 0.4247, "step": 11617 }, { "epoch": 0.32056619196584085, "grad_norm": 0.005075674969702959, "learning_rate": 0.001, "loss": 0.406, "step": 11618 }, { "epoch": 0.3205937841669052, "grad_norm": 0.005384589079767466, "learning_rate": 0.001, "loss": 0.4004, "step": 11619 }, { "epoch": 0.3206213763679696, "grad_norm": 0.004261939786374569, "learning_rate": 0.001, "loss": 0.3931, "step": 11620 }, { "epoch": 0.32064896856903397, "grad_norm": 0.008900157175958157, "learning_rate": 0.001, "loss": 0.4182, "step": 11621 }, { "epoch": 0.3206765607700983, "grad_norm": 0.0024166591465473175, "learning_rate": 0.001, "loss": 0.4527, "step": 11622 }, { "epoch": 0.3207041529711627, "grad_norm": 0.0027629456017166376, "learning_rate": 0.001, "loss": 0.3968, "step": 11623 }, { "epoch": 0.3207317451722271, "grad_norm": 0.002562351291999221, "learning_rate": 0.001, "loss": 0.3683, "step": 11624 }, { "epoch": 0.32075933737329143, "grad_norm": 0.0035787278320640326, "learning_rate": 0.001, "loss": 0.388, "step": 11625 }, { "epoch": 0.3207869295743558, "grad_norm": 0.0025681303814053535, "learning_rate": 0.001, "loss": 0.414, "step": 11626 }, { "epoch": 0.3208145217754202, "grad_norm": 0.003248112043365836, "learning_rate": 0.001, "loss": 0.4581, "step": 11627 }, { "epoch": 0.32084211397648454, "grad_norm": 0.001800362253561616, "learning_rate": 0.001, "loss": 0.4295, "step": 11628 }, { "epoch": 0.3208697061775489, "grad_norm": 0.002697241958230734, "learning_rate": 0.001, "loss": 0.3712, "step": 11629 }, { "epoch": 0.3208972983786133, "grad_norm": 0.0029813311994075775, "learning_rate": 0.001, "loss": 0.3826, "step": 11630 }, { "epoch": 0.32092489057967766, "grad_norm": 0.006515763234347105, "learning_rate": 0.001, "loss": 0.3937, "step": 11631 }, { "epoch": 0.320952482780742, "grad_norm": 0.002813748549669981, "learning_rate": 0.001, "loss": 0.3896, "step": 11632 }, { "epoch": 0.3209800749818064, "grad_norm": 0.00456245755776763, "learning_rate": 0.001, "loss": 0.4173, "step": 11633 }, { "epoch": 0.32100766718287077, "grad_norm": 0.0027176521252840757, "learning_rate": 0.001, "loss": 0.4196, "step": 11634 }, { "epoch": 0.3210352593839351, "grad_norm": 0.003955816384404898, "learning_rate": 0.001, "loss": 0.4418, "step": 11635 }, { "epoch": 0.3210628515849995, "grad_norm": 0.005285539198666811, "learning_rate": 0.001, "loss": 0.3808, "step": 11636 }, { "epoch": 0.3210904437860639, "grad_norm": 0.0025900506880134344, "learning_rate": 0.001, "loss": 0.4138, "step": 11637 }, { "epoch": 0.32111803598712824, "grad_norm": 0.006391063332557678, "learning_rate": 0.001, "loss": 0.4104, "step": 11638 }, { "epoch": 0.3211456281881926, "grad_norm": 0.0032550417818129063, "learning_rate": 0.001, "loss": 0.3613, "step": 11639 }, { "epoch": 0.321173220389257, "grad_norm": 0.0030542064923793077, "learning_rate": 0.001, "loss": 0.4094, "step": 11640 }, { "epoch": 0.32120081259032135, "grad_norm": 0.0037448366638273, "learning_rate": 0.001, "loss": 0.3858, "step": 11641 }, { "epoch": 0.3212284047913857, "grad_norm": 0.003444848582148552, "learning_rate": 0.001, "loss": 0.4203, "step": 11642 }, { "epoch": 0.3212559969924501, "grad_norm": 0.004159749951213598, "learning_rate": 0.001, "loss": 0.4023, "step": 11643 }, { "epoch": 0.32128358919351446, "grad_norm": 0.002784531796351075, "learning_rate": 0.001, "loss": 0.4541, "step": 11644 }, { "epoch": 0.3213111813945788, "grad_norm": 0.004061343614012003, "learning_rate": 0.001, "loss": 0.377, "step": 11645 }, { "epoch": 0.32133877359564317, "grad_norm": 0.003336769063025713, "learning_rate": 0.001, "loss": 0.3717, "step": 11646 }, { "epoch": 0.3213663657967076, "grad_norm": 0.002995783928781748, "learning_rate": 0.001, "loss": 0.3825, "step": 11647 }, { "epoch": 0.32139395799777193, "grad_norm": 0.0039530228823423386, "learning_rate": 0.001, "loss": 0.3861, "step": 11648 }, { "epoch": 0.3214215501988363, "grad_norm": 0.0023765622172504663, "learning_rate": 0.001, "loss": 0.3891, "step": 11649 }, { "epoch": 0.3214491423999007, "grad_norm": 0.006312607321888208, "learning_rate": 0.001, "loss": 0.3821, "step": 11650 }, { "epoch": 0.32147673460096504, "grad_norm": 0.013422048650681973, "learning_rate": 0.001, "loss": 0.3958, "step": 11651 }, { "epoch": 0.3215043268020294, "grad_norm": 0.0030129451770335436, "learning_rate": 0.001, "loss": 0.4134, "step": 11652 }, { "epoch": 0.3215319190030938, "grad_norm": 0.003410701872780919, "learning_rate": 0.001, "loss": 0.3984, "step": 11653 }, { "epoch": 0.32155951120415815, "grad_norm": 0.004033817909657955, "learning_rate": 0.001, "loss": 0.4192, "step": 11654 }, { "epoch": 0.3215871034052225, "grad_norm": 0.01334238052368164, "learning_rate": 0.001, "loss": 0.4287, "step": 11655 }, { "epoch": 0.32161469560628686, "grad_norm": 0.0067011150531470776, "learning_rate": 0.001, "loss": 0.3896, "step": 11656 }, { "epoch": 0.32164228780735127, "grad_norm": 0.022604364901781082, "learning_rate": 0.001, "loss": 0.3968, "step": 11657 }, { "epoch": 0.3216698800084156, "grad_norm": 0.00880459789186716, "learning_rate": 0.001, "loss": 0.4127, "step": 11658 }, { "epoch": 0.32169747220948, "grad_norm": 0.00856684148311615, "learning_rate": 0.001, "loss": 0.4005, "step": 11659 }, { "epoch": 0.3217250644105444, "grad_norm": 0.009844634681940079, "learning_rate": 0.001, "loss": 0.4195, "step": 11660 }, { "epoch": 0.32175265661160873, "grad_norm": 0.002705506980419159, "learning_rate": 0.001, "loss": 0.4083, "step": 11661 }, { "epoch": 0.3217802488126731, "grad_norm": 0.004265344236046076, "learning_rate": 0.001, "loss": 0.3886, "step": 11662 }, { "epoch": 0.3218078410137375, "grad_norm": 0.0042647975496947765, "learning_rate": 0.001, "loss": 0.3847, "step": 11663 }, { "epoch": 0.32183543321480185, "grad_norm": 0.0029369243420660496, "learning_rate": 0.001, "loss": 0.4092, "step": 11664 }, { "epoch": 0.3218630254158662, "grad_norm": 0.00970220472663641, "learning_rate": 0.001, "loss": 0.4123, "step": 11665 }, { "epoch": 0.32189061761693055, "grad_norm": 0.006463578902184963, "learning_rate": 0.001, "loss": 0.4051, "step": 11666 }, { "epoch": 0.32191820981799496, "grad_norm": 0.0018640676280483603, "learning_rate": 0.001, "loss": 0.4162, "step": 11667 }, { "epoch": 0.3219458020190593, "grad_norm": 0.002777427202090621, "learning_rate": 0.001, "loss": 0.4171, "step": 11668 }, { "epoch": 0.32197339422012367, "grad_norm": 0.004610841162502766, "learning_rate": 0.001, "loss": 0.3625, "step": 11669 }, { "epoch": 0.3220009864211881, "grad_norm": 0.006540005095303059, "learning_rate": 0.001, "loss": 0.4204, "step": 11670 }, { "epoch": 0.3220285786222524, "grad_norm": 0.005402629263699055, "learning_rate": 0.001, "loss": 0.4004, "step": 11671 }, { "epoch": 0.3220561708233168, "grad_norm": 0.00336782680824399, "learning_rate": 0.001, "loss": 0.4244, "step": 11672 }, { "epoch": 0.32208376302438113, "grad_norm": 0.005407973658293486, "learning_rate": 0.001, "loss": 0.388, "step": 11673 }, { "epoch": 0.32211135522544554, "grad_norm": 0.002696128562092781, "learning_rate": 0.001, "loss": 0.3825, "step": 11674 }, { "epoch": 0.3221389474265099, "grad_norm": 0.0032647987827658653, "learning_rate": 0.001, "loss": 0.3783, "step": 11675 }, { "epoch": 0.32216653962757424, "grad_norm": 0.005423584952950478, "learning_rate": 0.001, "loss": 0.3873, "step": 11676 }, { "epoch": 0.32219413182863865, "grad_norm": 0.003348333528265357, "learning_rate": 0.001, "loss": 0.3715, "step": 11677 }, { "epoch": 0.322221724029703, "grad_norm": 0.002469028811901808, "learning_rate": 0.001, "loss": 0.3968, "step": 11678 }, { "epoch": 0.32224931623076736, "grad_norm": 0.0023266442585736513, "learning_rate": 0.001, "loss": 0.398, "step": 11679 }, { "epoch": 0.32227690843183177, "grad_norm": 0.003974389284849167, "learning_rate": 0.001, "loss": 0.379, "step": 11680 }, { "epoch": 0.3223045006328961, "grad_norm": 0.002209985861554742, "learning_rate": 0.001, "loss": 0.4158, "step": 11681 }, { "epoch": 0.32233209283396047, "grad_norm": 0.002755221212282777, "learning_rate": 0.001, "loss": 0.4107, "step": 11682 }, { "epoch": 0.3223596850350248, "grad_norm": 0.003273082198575139, "learning_rate": 0.001, "loss": 0.3741, "step": 11683 }, { "epoch": 0.32238727723608923, "grad_norm": 0.003454297548159957, "learning_rate": 0.001, "loss": 0.4128, "step": 11684 }, { "epoch": 0.3224148694371536, "grad_norm": 0.0033135886769741774, "learning_rate": 0.001, "loss": 0.4422, "step": 11685 }, { "epoch": 0.32244246163821794, "grad_norm": 0.00300540286116302, "learning_rate": 0.001, "loss": 0.3943, "step": 11686 }, { "epoch": 0.32247005383928234, "grad_norm": 0.0025454771239310503, "learning_rate": 0.001, "loss": 0.4337, "step": 11687 }, { "epoch": 0.3224976460403467, "grad_norm": 0.0030585303902626038, "learning_rate": 0.001, "loss": 0.4475, "step": 11688 }, { "epoch": 0.32252523824141105, "grad_norm": 0.0024806379806250334, "learning_rate": 0.001, "loss": 0.3628, "step": 11689 }, { "epoch": 0.32255283044247546, "grad_norm": 0.002650330774486065, "learning_rate": 0.001, "loss": 0.4029, "step": 11690 }, { "epoch": 0.3225804226435398, "grad_norm": 0.003308363724499941, "learning_rate": 0.001, "loss": 0.4157, "step": 11691 }, { "epoch": 0.32260801484460416, "grad_norm": 0.0027979854494333267, "learning_rate": 0.001, "loss": 0.424, "step": 11692 }, { "epoch": 0.3226356070456685, "grad_norm": 0.0035791087429970503, "learning_rate": 0.001, "loss": 0.3991, "step": 11693 }, { "epoch": 0.3226631992467329, "grad_norm": 0.004163493402302265, "learning_rate": 0.001, "loss": 0.3944, "step": 11694 }, { "epoch": 0.3226907914477973, "grad_norm": 0.0028729864861816168, "learning_rate": 0.001, "loss": 0.4305, "step": 11695 }, { "epoch": 0.32271838364886163, "grad_norm": 0.018418997526168823, "learning_rate": 0.001, "loss": 0.4063, "step": 11696 }, { "epoch": 0.32274597584992604, "grad_norm": 0.0038654280360788107, "learning_rate": 0.001, "loss": 0.3831, "step": 11697 }, { "epoch": 0.3227735680509904, "grad_norm": 0.002807804848998785, "learning_rate": 0.001, "loss": 0.4042, "step": 11698 }, { "epoch": 0.32280116025205474, "grad_norm": 0.003091311315074563, "learning_rate": 0.001, "loss": 0.3997, "step": 11699 }, { "epoch": 0.32282875245311915, "grad_norm": 0.002582644810900092, "learning_rate": 0.001, "loss": 0.3888, "step": 11700 }, { "epoch": 0.3228563446541835, "grad_norm": 0.0029077474027872086, "learning_rate": 0.001, "loss": 0.3997, "step": 11701 }, { "epoch": 0.32288393685524786, "grad_norm": 0.004372213501483202, "learning_rate": 0.001, "loss": 0.4026, "step": 11702 }, { "epoch": 0.3229115290563122, "grad_norm": 0.005009527318179607, "learning_rate": 0.001, "loss": 0.4202, "step": 11703 }, { "epoch": 0.3229391212573766, "grad_norm": 0.002494605490937829, "learning_rate": 0.001, "loss": 0.4044, "step": 11704 }, { "epoch": 0.32296671345844097, "grad_norm": 0.0030128166545182467, "learning_rate": 0.001, "loss": 0.3958, "step": 11705 }, { "epoch": 0.3229943056595053, "grad_norm": 0.005615463946014643, "learning_rate": 0.001, "loss": 0.3868, "step": 11706 }, { "epoch": 0.32302189786056973, "grad_norm": 0.0026556167285889387, "learning_rate": 0.001, "loss": 0.4277, "step": 11707 }, { "epoch": 0.3230494900616341, "grad_norm": 0.004147225525230169, "learning_rate": 0.001, "loss": 0.4191, "step": 11708 }, { "epoch": 0.32307708226269843, "grad_norm": 0.004111787304282188, "learning_rate": 0.001, "loss": 0.3782, "step": 11709 }, { "epoch": 0.32310467446376284, "grad_norm": 0.0030196192674338818, "learning_rate": 0.001, "loss": 0.4388, "step": 11710 }, { "epoch": 0.3231322666648272, "grad_norm": 0.0029588905163109303, "learning_rate": 0.001, "loss": 0.4076, "step": 11711 }, { "epoch": 0.32315985886589155, "grad_norm": 0.0030484474264085293, "learning_rate": 0.001, "loss": 0.4257, "step": 11712 }, { "epoch": 0.3231874510669559, "grad_norm": 0.0031253311317414045, "learning_rate": 0.001, "loss": 0.3743, "step": 11713 }, { "epoch": 0.3232150432680203, "grad_norm": 0.004489220213145018, "learning_rate": 0.001, "loss": 0.3902, "step": 11714 }, { "epoch": 0.32324263546908466, "grad_norm": 0.0034049181267619133, "learning_rate": 0.001, "loss": 0.4251, "step": 11715 }, { "epoch": 0.323270227670149, "grad_norm": 0.0045754867605865, "learning_rate": 0.001, "loss": 0.3577, "step": 11716 }, { "epoch": 0.3232978198712134, "grad_norm": 0.0035644634626805782, "learning_rate": 0.001, "loss": 0.4277, "step": 11717 }, { "epoch": 0.3233254120722778, "grad_norm": 0.0033633566927164793, "learning_rate": 0.001, "loss": 0.3759, "step": 11718 }, { "epoch": 0.3233530042733421, "grad_norm": 0.003968440927565098, "learning_rate": 0.001, "loss": 0.3516, "step": 11719 }, { "epoch": 0.32338059647440653, "grad_norm": 0.003708978882059455, "learning_rate": 0.001, "loss": 0.3837, "step": 11720 }, { "epoch": 0.3234081886754709, "grad_norm": 0.002603841945528984, "learning_rate": 0.001, "loss": 0.4278, "step": 11721 }, { "epoch": 0.32343578087653524, "grad_norm": 0.0024553535040467978, "learning_rate": 0.001, "loss": 0.3641, "step": 11722 }, { "epoch": 0.3234633730775996, "grad_norm": 0.0033427192829549313, "learning_rate": 0.001, "loss": 0.3833, "step": 11723 }, { "epoch": 0.323490965278664, "grad_norm": 0.008662039414048195, "learning_rate": 0.001, "loss": 0.4312, "step": 11724 }, { "epoch": 0.32351855747972835, "grad_norm": 0.002312082564458251, "learning_rate": 0.001, "loss": 0.4505, "step": 11725 }, { "epoch": 0.3235461496807927, "grad_norm": 0.0026776569429785013, "learning_rate": 0.001, "loss": 0.3886, "step": 11726 }, { "epoch": 0.3235737418818571, "grad_norm": 0.003623252734541893, "learning_rate": 0.001, "loss": 0.4441, "step": 11727 }, { "epoch": 0.32360133408292147, "grad_norm": 0.004855128470808268, "learning_rate": 0.001, "loss": 0.4372, "step": 11728 }, { "epoch": 0.3236289262839858, "grad_norm": 0.0022675027139484882, "learning_rate": 0.001, "loss": 0.4128, "step": 11729 }, { "epoch": 0.3236565184850502, "grad_norm": 0.003087605582550168, "learning_rate": 0.001, "loss": 0.3768, "step": 11730 }, { "epoch": 0.3236841106861146, "grad_norm": 0.003196093952283263, "learning_rate": 0.001, "loss": 0.411, "step": 11731 }, { "epoch": 0.32371170288717893, "grad_norm": 0.002098439959809184, "learning_rate": 0.001, "loss": 0.4549, "step": 11732 }, { "epoch": 0.3237392950882433, "grad_norm": 0.0027286570984870195, "learning_rate": 0.001, "loss": 0.4001, "step": 11733 }, { "epoch": 0.3237668872893077, "grad_norm": 0.0031044287607073784, "learning_rate": 0.001, "loss": 0.4143, "step": 11734 }, { "epoch": 0.32379447949037204, "grad_norm": 0.002601184183731675, "learning_rate": 0.001, "loss": 0.3938, "step": 11735 }, { "epoch": 0.3238220716914364, "grad_norm": 0.003185172798112035, "learning_rate": 0.001, "loss": 0.37, "step": 11736 }, { "epoch": 0.3238496638925008, "grad_norm": 0.0032646963372826576, "learning_rate": 0.001, "loss": 0.4073, "step": 11737 }, { "epoch": 0.32387725609356516, "grad_norm": 0.004382995422929525, "learning_rate": 0.001, "loss": 0.3969, "step": 11738 }, { "epoch": 0.3239048482946295, "grad_norm": 0.004330406431108713, "learning_rate": 0.001, "loss": 0.3801, "step": 11739 }, { "epoch": 0.3239324404956939, "grad_norm": 0.01158521044999361, "learning_rate": 0.001, "loss": 0.3747, "step": 11740 }, { "epoch": 0.32396003269675827, "grad_norm": 0.002962679835036397, "learning_rate": 0.001, "loss": 0.4056, "step": 11741 }, { "epoch": 0.3239876248978226, "grad_norm": 0.0029328917153179646, "learning_rate": 0.001, "loss": 0.4242, "step": 11742 }, { "epoch": 0.324015217098887, "grad_norm": 0.004508418962359428, "learning_rate": 0.001, "loss": 0.4069, "step": 11743 }, { "epoch": 0.3240428092999514, "grad_norm": 0.0027705549728125334, "learning_rate": 0.001, "loss": 0.4, "step": 11744 }, { "epoch": 0.32407040150101574, "grad_norm": 0.0022474832367151976, "learning_rate": 0.001, "loss": 0.4098, "step": 11745 }, { "epoch": 0.3240979937020801, "grad_norm": 0.003890387015417218, "learning_rate": 0.001, "loss": 0.3878, "step": 11746 }, { "epoch": 0.3241255859031445, "grad_norm": 0.0027710788417607546, "learning_rate": 0.001, "loss": 0.3838, "step": 11747 }, { "epoch": 0.32415317810420885, "grad_norm": 0.002632656367495656, "learning_rate": 0.001, "loss": 0.3931, "step": 11748 }, { "epoch": 0.3241807703052732, "grad_norm": 0.0026942237745970488, "learning_rate": 0.001, "loss": 0.3633, "step": 11749 }, { "epoch": 0.3242083625063376, "grad_norm": 0.0025526436511427164, "learning_rate": 0.001, "loss": 0.404, "step": 11750 }, { "epoch": 0.32423595470740196, "grad_norm": 0.002429589629173279, "learning_rate": 0.001, "loss": 0.3975, "step": 11751 }, { "epoch": 0.3242635469084663, "grad_norm": 0.003830046160146594, "learning_rate": 0.001, "loss": 0.3961, "step": 11752 }, { "epoch": 0.32429113910953067, "grad_norm": 0.003458908060565591, "learning_rate": 0.001, "loss": 0.4292, "step": 11753 }, { "epoch": 0.3243187313105951, "grad_norm": 0.002460696967318654, "learning_rate": 0.001, "loss": 0.3917, "step": 11754 }, { "epoch": 0.32434632351165943, "grad_norm": 0.004534050356596708, "learning_rate": 0.001, "loss": 0.4216, "step": 11755 }, { "epoch": 0.3243739157127238, "grad_norm": 0.0023591818753629923, "learning_rate": 0.001, "loss": 0.3782, "step": 11756 }, { "epoch": 0.3244015079137882, "grad_norm": 0.0029876313637942076, "learning_rate": 0.001, "loss": 0.3675, "step": 11757 }, { "epoch": 0.32442910011485254, "grad_norm": 0.003414266975596547, "learning_rate": 0.001, "loss": 0.3941, "step": 11758 }, { "epoch": 0.3244566923159169, "grad_norm": 0.004012831952422857, "learning_rate": 0.001, "loss": 0.3652, "step": 11759 }, { "epoch": 0.3244842845169813, "grad_norm": 0.0034918745514005423, "learning_rate": 0.001, "loss": 0.419, "step": 11760 }, { "epoch": 0.32451187671804566, "grad_norm": 0.002764191012829542, "learning_rate": 0.001, "loss": 0.3974, "step": 11761 }, { "epoch": 0.32453946891911, "grad_norm": 0.003841676749289036, "learning_rate": 0.001, "loss": 0.3758, "step": 11762 }, { "epoch": 0.32456706112017436, "grad_norm": 0.004220995120704174, "learning_rate": 0.001, "loss": 0.422, "step": 11763 }, { "epoch": 0.32459465332123877, "grad_norm": 0.003612626576796174, "learning_rate": 0.001, "loss": 0.4017, "step": 11764 }, { "epoch": 0.3246222455223031, "grad_norm": 0.0025354884564876556, "learning_rate": 0.001, "loss": 0.3927, "step": 11765 }, { "epoch": 0.3246498377233675, "grad_norm": 0.003477931022644043, "learning_rate": 0.001, "loss": 0.3919, "step": 11766 }, { "epoch": 0.3246774299244319, "grad_norm": 0.004952993709594011, "learning_rate": 0.001, "loss": 0.4138, "step": 11767 }, { "epoch": 0.32470502212549623, "grad_norm": 0.0031629884615540504, "learning_rate": 0.001, "loss": 0.358, "step": 11768 }, { "epoch": 0.3247326143265606, "grad_norm": 0.002113747876137495, "learning_rate": 0.001, "loss": 0.4527, "step": 11769 }, { "epoch": 0.32476020652762494, "grad_norm": 0.002285837195813656, "learning_rate": 0.001, "loss": 0.4368, "step": 11770 }, { "epoch": 0.32478779872868935, "grad_norm": 0.003418036038056016, "learning_rate": 0.001, "loss": 0.4313, "step": 11771 }, { "epoch": 0.3248153909297537, "grad_norm": 0.00575110362842679, "learning_rate": 0.001, "loss": 0.399, "step": 11772 }, { "epoch": 0.32484298313081805, "grad_norm": 0.0031514798756688833, "learning_rate": 0.001, "loss": 0.4403, "step": 11773 }, { "epoch": 0.32487057533188246, "grad_norm": 0.0029524280689656734, "learning_rate": 0.001, "loss": 0.3897, "step": 11774 }, { "epoch": 0.3248981675329468, "grad_norm": 0.0038776015862822533, "learning_rate": 0.001, "loss": 0.4326, "step": 11775 }, { "epoch": 0.32492575973401117, "grad_norm": 0.006274119019508362, "learning_rate": 0.001, "loss": 0.3959, "step": 11776 }, { "epoch": 0.3249533519350756, "grad_norm": 0.00265169283375144, "learning_rate": 0.001, "loss": 0.395, "step": 11777 }, { "epoch": 0.3249809441361399, "grad_norm": 0.0040013487450778484, "learning_rate": 0.001, "loss": 0.3982, "step": 11778 }, { "epoch": 0.3250085363372043, "grad_norm": 0.004476572852581739, "learning_rate": 0.001, "loss": 0.426, "step": 11779 }, { "epoch": 0.32503612853826863, "grad_norm": 0.00346595561131835, "learning_rate": 0.001, "loss": 0.3974, "step": 11780 }, { "epoch": 0.32506372073933304, "grad_norm": 0.004196068271994591, "learning_rate": 0.001, "loss": 0.3683, "step": 11781 }, { "epoch": 0.3250913129403974, "grad_norm": 0.0036759015638381243, "learning_rate": 0.001, "loss": 0.3842, "step": 11782 }, { "epoch": 0.32511890514146174, "grad_norm": 0.0028389294166117907, "learning_rate": 0.001, "loss": 0.3608, "step": 11783 }, { "epoch": 0.32514649734252615, "grad_norm": 0.00459603127092123, "learning_rate": 0.001, "loss": 0.4131, "step": 11784 }, { "epoch": 0.3251740895435905, "grad_norm": 0.005619920324534178, "learning_rate": 0.001, "loss": 0.3996, "step": 11785 }, { "epoch": 0.32520168174465486, "grad_norm": 0.009859694167971611, "learning_rate": 0.001, "loss": 0.3829, "step": 11786 }, { "epoch": 0.32522927394571927, "grad_norm": 0.006388423964381218, "learning_rate": 0.001, "loss": 0.4, "step": 11787 }, { "epoch": 0.3252568661467836, "grad_norm": 0.006722534541040659, "learning_rate": 0.001, "loss": 0.3912, "step": 11788 }, { "epoch": 0.32528445834784797, "grad_norm": 0.008275543339550495, "learning_rate": 0.001, "loss": 0.3877, "step": 11789 }, { "epoch": 0.3253120505489123, "grad_norm": 0.004540057387202978, "learning_rate": 0.001, "loss": 0.43, "step": 11790 }, { "epoch": 0.32533964274997673, "grad_norm": 0.0027223837096244097, "learning_rate": 0.001, "loss": 0.427, "step": 11791 }, { "epoch": 0.3253672349510411, "grad_norm": 0.003166953567415476, "learning_rate": 0.001, "loss": 0.4142, "step": 11792 }, { "epoch": 0.32539482715210544, "grad_norm": 0.004159968346357346, "learning_rate": 0.001, "loss": 0.3904, "step": 11793 }, { "epoch": 0.32542241935316985, "grad_norm": 0.002168782986700535, "learning_rate": 0.001, "loss": 0.4455, "step": 11794 }, { "epoch": 0.3254500115542342, "grad_norm": 0.0049192942678928375, "learning_rate": 0.001, "loss": 0.3977, "step": 11795 }, { "epoch": 0.32547760375529855, "grad_norm": 0.0027885152958333492, "learning_rate": 0.001, "loss": 0.3801, "step": 11796 }, { "epoch": 0.32550519595636296, "grad_norm": 0.005841918755322695, "learning_rate": 0.001, "loss": 0.3976, "step": 11797 }, { "epoch": 0.3255327881574273, "grad_norm": 0.00430692546069622, "learning_rate": 0.001, "loss": 0.4142, "step": 11798 }, { "epoch": 0.32556038035849166, "grad_norm": 0.003325339872390032, "learning_rate": 0.001, "loss": 0.3958, "step": 11799 }, { "epoch": 0.325587972559556, "grad_norm": 0.0032596606761217117, "learning_rate": 0.001, "loss": 0.3914, "step": 11800 }, { "epoch": 0.3256155647606204, "grad_norm": 0.004303377121686935, "learning_rate": 0.001, "loss": 0.3912, "step": 11801 }, { "epoch": 0.3256431569616848, "grad_norm": 0.002758364425972104, "learning_rate": 0.001, "loss": 0.4049, "step": 11802 }, { "epoch": 0.32567074916274913, "grad_norm": 0.00543051864951849, "learning_rate": 0.001, "loss": 0.3508, "step": 11803 }, { "epoch": 0.32569834136381354, "grad_norm": 0.003291371511295438, "learning_rate": 0.001, "loss": 0.4341, "step": 11804 }, { "epoch": 0.3257259335648779, "grad_norm": 0.0028754028026014566, "learning_rate": 0.001, "loss": 0.3812, "step": 11805 }, { "epoch": 0.32575352576594224, "grad_norm": 0.002680160803720355, "learning_rate": 0.001, "loss": 0.3901, "step": 11806 }, { "epoch": 0.32578111796700665, "grad_norm": 0.0026779999025166035, "learning_rate": 0.001, "loss": 0.3638, "step": 11807 }, { "epoch": 0.325808710168071, "grad_norm": 0.0029692722018808126, "learning_rate": 0.001, "loss": 0.3997, "step": 11808 }, { "epoch": 0.32583630236913536, "grad_norm": 0.00280319363810122, "learning_rate": 0.001, "loss": 0.4106, "step": 11809 }, { "epoch": 0.3258638945701997, "grad_norm": 0.002986667212098837, "learning_rate": 0.001, "loss": 0.4094, "step": 11810 }, { "epoch": 0.3258914867712641, "grad_norm": 0.003013553563505411, "learning_rate": 0.001, "loss": 0.4467, "step": 11811 }, { "epoch": 0.32591907897232847, "grad_norm": 0.002967800246551633, "learning_rate": 0.001, "loss": 0.4084, "step": 11812 }, { "epoch": 0.3259466711733928, "grad_norm": 0.0031372224912047386, "learning_rate": 0.001, "loss": 0.3709, "step": 11813 }, { "epoch": 0.32597426337445723, "grad_norm": 0.0031039847526699305, "learning_rate": 0.001, "loss": 0.3604, "step": 11814 }, { "epoch": 0.3260018555755216, "grad_norm": 0.003049139864742756, "learning_rate": 0.001, "loss": 0.377, "step": 11815 }, { "epoch": 0.32602944777658593, "grad_norm": 0.0029599741101264954, "learning_rate": 0.001, "loss": 0.4062, "step": 11816 }, { "epoch": 0.32605703997765034, "grad_norm": 0.003724336624145508, "learning_rate": 0.001, "loss": 0.407, "step": 11817 }, { "epoch": 0.3260846321787147, "grad_norm": 0.0028906487859785557, "learning_rate": 0.001, "loss": 0.414, "step": 11818 }, { "epoch": 0.32611222437977905, "grad_norm": 0.003816339885815978, "learning_rate": 0.001, "loss": 0.3857, "step": 11819 }, { "epoch": 0.3261398165808434, "grad_norm": 0.005886279512196779, "learning_rate": 0.001, "loss": 0.4197, "step": 11820 }, { "epoch": 0.3261674087819078, "grad_norm": 0.0029236190021038055, "learning_rate": 0.001, "loss": 0.4003, "step": 11821 }, { "epoch": 0.32619500098297216, "grad_norm": 0.005116250831633806, "learning_rate": 0.001, "loss": 0.3837, "step": 11822 }, { "epoch": 0.3262225931840365, "grad_norm": 0.0027175480499863625, "learning_rate": 0.001, "loss": 0.3772, "step": 11823 }, { "epoch": 0.3262501853851009, "grad_norm": 0.005795364733785391, "learning_rate": 0.001, "loss": 0.3854, "step": 11824 }, { "epoch": 0.3262777775861653, "grad_norm": 0.0028781124856323004, "learning_rate": 0.001, "loss": 0.4071, "step": 11825 }, { "epoch": 0.3263053697872296, "grad_norm": 0.008585303090512753, "learning_rate": 0.001, "loss": 0.3939, "step": 11826 }, { "epoch": 0.32633296198829403, "grad_norm": 0.0027091645170003176, "learning_rate": 0.001, "loss": 0.3958, "step": 11827 }, { "epoch": 0.3263605541893584, "grad_norm": 0.0028620229568332434, "learning_rate": 0.001, "loss": 0.4246, "step": 11828 }, { "epoch": 0.32638814639042274, "grad_norm": 0.0028989813290536404, "learning_rate": 0.001, "loss": 0.3798, "step": 11829 }, { "epoch": 0.3264157385914871, "grad_norm": 0.005880692508071661, "learning_rate": 0.001, "loss": 0.3887, "step": 11830 }, { "epoch": 0.3264433307925515, "grad_norm": 0.005375871900469065, "learning_rate": 0.001, "loss": 0.3917, "step": 11831 }, { "epoch": 0.32647092299361585, "grad_norm": 0.00993763655424118, "learning_rate": 0.001, "loss": 0.3602, "step": 11832 }, { "epoch": 0.3264985151946802, "grad_norm": 0.004657833371311426, "learning_rate": 0.001, "loss": 0.408, "step": 11833 }, { "epoch": 0.3265261073957446, "grad_norm": 0.0023800579365342855, "learning_rate": 0.001, "loss": 0.3682, "step": 11834 }, { "epoch": 0.32655369959680897, "grad_norm": 0.003295092610642314, "learning_rate": 0.001, "loss": 0.405, "step": 11835 }, { "epoch": 0.3265812917978733, "grad_norm": 0.0033699970226734877, "learning_rate": 0.001, "loss": 0.4249, "step": 11836 }, { "epoch": 0.3266088839989377, "grad_norm": 0.0023069006856530905, "learning_rate": 0.001, "loss": 0.381, "step": 11837 }, { "epoch": 0.3266364762000021, "grad_norm": 0.002622458152472973, "learning_rate": 0.001, "loss": 0.4079, "step": 11838 }, { "epoch": 0.32666406840106643, "grad_norm": 0.0072345067746937275, "learning_rate": 0.001, "loss": 0.3669, "step": 11839 }, { "epoch": 0.3266916606021308, "grad_norm": 0.002343923319131136, "learning_rate": 0.001, "loss": 0.4473, "step": 11840 }, { "epoch": 0.3267192528031952, "grad_norm": 0.003922601230442524, "learning_rate": 0.001, "loss": 0.3833, "step": 11841 }, { "epoch": 0.32674684500425955, "grad_norm": 0.002412164816632867, "learning_rate": 0.001, "loss": 0.43, "step": 11842 }, { "epoch": 0.3267744372053239, "grad_norm": 0.004316416569054127, "learning_rate": 0.001, "loss": 0.4081, "step": 11843 }, { "epoch": 0.3268020294063883, "grad_norm": 0.0025885479990392923, "learning_rate": 0.001, "loss": 0.3644, "step": 11844 }, { "epoch": 0.32682962160745266, "grad_norm": 0.008287443779408932, "learning_rate": 0.001, "loss": 0.3932, "step": 11845 }, { "epoch": 0.326857213808517, "grad_norm": 0.0027159906458109617, "learning_rate": 0.001, "loss": 0.4268, "step": 11846 }, { "epoch": 0.3268848060095814, "grad_norm": 0.002340937964618206, "learning_rate": 0.001, "loss": 0.3868, "step": 11847 }, { "epoch": 0.32691239821064577, "grad_norm": 0.0027317411731928587, "learning_rate": 0.001, "loss": 0.3732, "step": 11848 }, { "epoch": 0.3269399904117101, "grad_norm": 0.0028033903799951077, "learning_rate": 0.001, "loss": 0.3836, "step": 11849 }, { "epoch": 0.3269675826127745, "grad_norm": 0.004709617234766483, "learning_rate": 0.001, "loss": 0.3827, "step": 11850 }, { "epoch": 0.3269951748138389, "grad_norm": 0.005631160456687212, "learning_rate": 0.001, "loss": 0.4108, "step": 11851 }, { "epoch": 0.32702276701490324, "grad_norm": 0.016221268102526665, "learning_rate": 0.001, "loss": 0.4047, "step": 11852 }, { "epoch": 0.3270503592159676, "grad_norm": 0.0028514659497886896, "learning_rate": 0.001, "loss": 0.4052, "step": 11853 }, { "epoch": 0.327077951417032, "grad_norm": 0.00452115572988987, "learning_rate": 0.001, "loss": 0.3871, "step": 11854 }, { "epoch": 0.32710554361809635, "grad_norm": 0.00457549886777997, "learning_rate": 0.001, "loss": 0.4025, "step": 11855 }, { "epoch": 0.3271331358191607, "grad_norm": 0.0027922464068979025, "learning_rate": 0.001, "loss": 0.3936, "step": 11856 }, { "epoch": 0.3271607280202251, "grad_norm": 0.005448077339679003, "learning_rate": 0.001, "loss": 0.3526, "step": 11857 }, { "epoch": 0.32718832022128946, "grad_norm": 0.007846580818295479, "learning_rate": 0.001, "loss": 0.4052, "step": 11858 }, { "epoch": 0.3272159124223538, "grad_norm": 0.0035153308417648077, "learning_rate": 0.001, "loss": 0.3719, "step": 11859 }, { "epoch": 0.32724350462341817, "grad_norm": 0.0028227800503373146, "learning_rate": 0.001, "loss": 0.3836, "step": 11860 }, { "epoch": 0.3272710968244826, "grad_norm": 0.004079067148268223, "learning_rate": 0.001, "loss": 0.3951, "step": 11861 }, { "epoch": 0.32729868902554693, "grad_norm": 0.007391565479338169, "learning_rate": 0.001, "loss": 0.3974, "step": 11862 }, { "epoch": 0.3273262812266113, "grad_norm": 0.0028963603544980288, "learning_rate": 0.001, "loss": 0.4337, "step": 11863 }, { "epoch": 0.3273538734276757, "grad_norm": 0.002462986158207059, "learning_rate": 0.001, "loss": 0.3778, "step": 11864 }, { "epoch": 0.32738146562874004, "grad_norm": 0.0026601895224303007, "learning_rate": 0.001, "loss": 0.4071, "step": 11865 }, { "epoch": 0.3274090578298044, "grad_norm": 0.0028002276085317135, "learning_rate": 0.001, "loss": 0.3891, "step": 11866 }, { "epoch": 0.32743665003086875, "grad_norm": 0.0025977008044719696, "learning_rate": 0.001, "loss": 0.4384, "step": 11867 }, { "epoch": 0.32746424223193316, "grad_norm": 0.0031972804572433233, "learning_rate": 0.001, "loss": 0.4012, "step": 11868 }, { "epoch": 0.3274918344329975, "grad_norm": 0.0031719047110527754, "learning_rate": 0.001, "loss": 0.386, "step": 11869 }, { "epoch": 0.32751942663406186, "grad_norm": 0.021741317585110664, "learning_rate": 0.001, "loss": 0.3954, "step": 11870 }, { "epoch": 0.32754701883512627, "grad_norm": 0.005185617599636316, "learning_rate": 0.001, "loss": 0.3913, "step": 11871 }, { "epoch": 0.3275746110361906, "grad_norm": 0.004073983523994684, "learning_rate": 0.001, "loss": 0.3319, "step": 11872 }, { "epoch": 0.327602203237255, "grad_norm": 0.003672317136079073, "learning_rate": 0.001, "loss": 0.3986, "step": 11873 }, { "epoch": 0.3276297954383194, "grad_norm": 0.003211831208318472, "learning_rate": 0.001, "loss": 0.3866, "step": 11874 }, { "epoch": 0.32765738763938373, "grad_norm": 0.005349930375814438, "learning_rate": 0.001, "loss": 0.3879, "step": 11875 }, { "epoch": 0.3276849798404481, "grad_norm": 0.001996625680476427, "learning_rate": 0.001, "loss": 0.4215, "step": 11876 }, { "epoch": 0.32771257204151244, "grad_norm": 0.0034636626951396465, "learning_rate": 0.001, "loss": 0.3961, "step": 11877 }, { "epoch": 0.32774016424257685, "grad_norm": 0.006974721793085337, "learning_rate": 0.001, "loss": 0.3961, "step": 11878 }, { "epoch": 0.3277677564436412, "grad_norm": 0.004640425555408001, "learning_rate": 0.001, "loss": 0.4197, "step": 11879 }, { "epoch": 0.32779534864470555, "grad_norm": 0.0025745509192347527, "learning_rate": 0.001, "loss": 0.3604, "step": 11880 }, { "epoch": 0.32782294084576996, "grad_norm": 0.0033829696476459503, "learning_rate": 0.001, "loss": 0.3901, "step": 11881 }, { "epoch": 0.3278505330468343, "grad_norm": 0.003960746806114912, "learning_rate": 0.001, "loss": 0.3708, "step": 11882 }, { "epoch": 0.32787812524789867, "grad_norm": 0.006613335572183132, "learning_rate": 0.001, "loss": 0.3652, "step": 11883 }, { "epoch": 0.3279057174489631, "grad_norm": 0.003319015959277749, "learning_rate": 0.001, "loss": 0.389, "step": 11884 }, { "epoch": 0.3279333096500274, "grad_norm": 0.0034291057381778955, "learning_rate": 0.001, "loss": 0.393, "step": 11885 }, { "epoch": 0.3279609018510918, "grad_norm": 0.0027653754223138094, "learning_rate": 0.001, "loss": 0.3873, "step": 11886 }, { "epoch": 0.32798849405215613, "grad_norm": 0.00384811544790864, "learning_rate": 0.001, "loss": 0.3987, "step": 11887 }, { "epoch": 0.32801608625322054, "grad_norm": 0.0028639482334256172, "learning_rate": 0.001, "loss": 0.3981, "step": 11888 }, { "epoch": 0.3280436784542849, "grad_norm": 0.0026863133534789085, "learning_rate": 0.001, "loss": 0.4167, "step": 11889 }, { "epoch": 0.32807127065534925, "grad_norm": 0.0028144351672381163, "learning_rate": 0.001, "loss": 0.3815, "step": 11890 }, { "epoch": 0.32809886285641365, "grad_norm": 0.010337802581489086, "learning_rate": 0.001, "loss": 0.3988, "step": 11891 }, { "epoch": 0.328126455057478, "grad_norm": 0.01729881763458252, "learning_rate": 0.001, "loss": 0.4123, "step": 11892 }, { "epoch": 0.32815404725854236, "grad_norm": 0.01979982666671276, "learning_rate": 0.001, "loss": 0.3943, "step": 11893 }, { "epoch": 0.32818163945960677, "grad_norm": 0.003908053506165743, "learning_rate": 0.001, "loss": 0.3937, "step": 11894 }, { "epoch": 0.3282092316606711, "grad_norm": 0.0044098771177232265, "learning_rate": 0.001, "loss": 0.4198, "step": 11895 }, { "epoch": 0.32823682386173547, "grad_norm": 0.0028675422072410583, "learning_rate": 0.001, "loss": 0.4231, "step": 11896 }, { "epoch": 0.3282644160627998, "grad_norm": 0.006255595479160547, "learning_rate": 0.001, "loss": 0.3649, "step": 11897 }, { "epoch": 0.32829200826386423, "grad_norm": 0.0025888034142553806, "learning_rate": 0.001, "loss": 0.4051, "step": 11898 }, { "epoch": 0.3283196004649286, "grad_norm": 0.0021037342958152294, "learning_rate": 0.001, "loss": 0.4037, "step": 11899 }, { "epoch": 0.32834719266599294, "grad_norm": 0.003093705978244543, "learning_rate": 0.001, "loss": 0.3568, "step": 11900 }, { "epoch": 0.32837478486705735, "grad_norm": 0.0024948539212346077, "learning_rate": 0.001, "loss": 0.4112, "step": 11901 }, { "epoch": 0.3284023770681217, "grad_norm": 0.0028005533386021852, "learning_rate": 0.001, "loss": 0.3761, "step": 11902 }, { "epoch": 0.32842996926918605, "grad_norm": 0.006543453317135572, "learning_rate": 0.001, "loss": 0.3957, "step": 11903 }, { "epoch": 0.32845756147025046, "grad_norm": 0.0032540878746658564, "learning_rate": 0.001, "loss": 0.4036, "step": 11904 }, { "epoch": 0.3284851536713148, "grad_norm": 0.017330944538116455, "learning_rate": 0.001, "loss": 0.392, "step": 11905 }, { "epoch": 0.32851274587237916, "grad_norm": 0.015614539384841919, "learning_rate": 0.001, "loss": 0.3827, "step": 11906 }, { "epoch": 0.3285403380734435, "grad_norm": 0.04733549430966377, "learning_rate": 0.001, "loss": 0.4134, "step": 11907 }, { "epoch": 0.3285679302745079, "grad_norm": 0.062019579112529755, "learning_rate": 0.001, "loss": 0.371, "step": 11908 }, { "epoch": 0.3285955224755723, "grad_norm": 0.008806095458567142, "learning_rate": 0.001, "loss": 0.4204, "step": 11909 }, { "epoch": 0.32862311467663663, "grad_norm": 0.052441034466028214, "learning_rate": 0.001, "loss": 0.3895, "step": 11910 }, { "epoch": 0.32865070687770104, "grad_norm": 0.004011763259768486, "learning_rate": 0.001, "loss": 0.3924, "step": 11911 }, { "epoch": 0.3286782990787654, "grad_norm": 0.004210361745208502, "learning_rate": 0.001, "loss": 0.3974, "step": 11912 }, { "epoch": 0.32870589127982974, "grad_norm": 0.009359506890177727, "learning_rate": 0.001, "loss": 0.352, "step": 11913 }, { "epoch": 0.32873348348089415, "grad_norm": 0.007341593038290739, "learning_rate": 0.001, "loss": 0.3809, "step": 11914 }, { "epoch": 0.3287610756819585, "grad_norm": 0.005077636335045099, "learning_rate": 0.001, "loss": 0.3935, "step": 11915 }, { "epoch": 0.32878866788302286, "grad_norm": 0.01000258419662714, "learning_rate": 0.001, "loss": 0.407, "step": 11916 }, { "epoch": 0.3288162600840872, "grad_norm": 0.030418379232287407, "learning_rate": 0.001, "loss": 0.3816, "step": 11917 }, { "epoch": 0.3288438522851516, "grad_norm": 0.003933771047741175, "learning_rate": 0.001, "loss": 0.4188, "step": 11918 }, { "epoch": 0.32887144448621597, "grad_norm": 0.002575295278802514, "learning_rate": 0.001, "loss": 0.4108, "step": 11919 }, { "epoch": 0.3288990366872803, "grad_norm": 0.002374958945438266, "learning_rate": 0.001, "loss": 0.429, "step": 11920 }, { "epoch": 0.32892662888834473, "grad_norm": 0.003249475732445717, "learning_rate": 0.001, "loss": 0.3692, "step": 11921 }, { "epoch": 0.3289542210894091, "grad_norm": 0.002772397128865123, "learning_rate": 0.001, "loss": 0.3916, "step": 11922 }, { "epoch": 0.32898181329047343, "grad_norm": 0.0042383186519145966, "learning_rate": 0.001, "loss": 0.3551, "step": 11923 }, { "epoch": 0.32900940549153784, "grad_norm": 0.003252743510529399, "learning_rate": 0.001, "loss": 0.4146, "step": 11924 }, { "epoch": 0.3290369976926022, "grad_norm": 0.0025390556547790766, "learning_rate": 0.001, "loss": 0.3918, "step": 11925 }, { "epoch": 0.32906458989366655, "grad_norm": 0.002343092579394579, "learning_rate": 0.001, "loss": 0.4287, "step": 11926 }, { "epoch": 0.3290921820947309, "grad_norm": 0.0033002211712300777, "learning_rate": 0.001, "loss": 0.3726, "step": 11927 }, { "epoch": 0.3291197742957953, "grad_norm": 0.003043625270947814, "learning_rate": 0.001, "loss": 0.4038, "step": 11928 }, { "epoch": 0.32914736649685966, "grad_norm": 0.006221500225365162, "learning_rate": 0.001, "loss": 0.4217, "step": 11929 }, { "epoch": 0.329174958697924, "grad_norm": 0.004655203316360712, "learning_rate": 0.001, "loss": 0.4154, "step": 11930 }, { "epoch": 0.3292025508989884, "grad_norm": 0.0021311945747584105, "learning_rate": 0.001, "loss": 0.3988, "step": 11931 }, { "epoch": 0.3292301431000528, "grad_norm": 0.0027006971649825573, "learning_rate": 0.001, "loss": 0.4346, "step": 11932 }, { "epoch": 0.3292577353011171, "grad_norm": 0.002889286493882537, "learning_rate": 0.001, "loss": 0.3541, "step": 11933 }, { "epoch": 0.32928532750218154, "grad_norm": 0.007051974534988403, "learning_rate": 0.001, "loss": 0.4284, "step": 11934 }, { "epoch": 0.3293129197032459, "grad_norm": 0.002629111986607313, "learning_rate": 0.001, "loss": 0.409, "step": 11935 }, { "epoch": 0.32934051190431024, "grad_norm": 0.004683708306401968, "learning_rate": 0.001, "loss": 0.3419, "step": 11936 }, { "epoch": 0.3293681041053746, "grad_norm": 0.003245966276153922, "learning_rate": 0.001, "loss": 0.394, "step": 11937 }, { "epoch": 0.329395696306439, "grad_norm": 0.0032094584312289953, "learning_rate": 0.001, "loss": 0.3986, "step": 11938 }, { "epoch": 0.32942328850750335, "grad_norm": 0.0030683856457471848, "learning_rate": 0.001, "loss": 0.3863, "step": 11939 }, { "epoch": 0.3294508807085677, "grad_norm": 0.0035426607355475426, "learning_rate": 0.001, "loss": 0.4418, "step": 11940 }, { "epoch": 0.3294784729096321, "grad_norm": 0.003129177028313279, "learning_rate": 0.001, "loss": 0.3972, "step": 11941 }, { "epoch": 0.32950606511069647, "grad_norm": 0.003153977682814002, "learning_rate": 0.001, "loss": 0.3795, "step": 11942 }, { "epoch": 0.3295336573117608, "grad_norm": 0.0056450688280165195, "learning_rate": 0.001, "loss": 0.3759, "step": 11943 }, { "epoch": 0.3295612495128252, "grad_norm": 0.0025312695652246475, "learning_rate": 0.001, "loss": 0.3729, "step": 11944 }, { "epoch": 0.3295888417138896, "grad_norm": 0.0025766692124307156, "learning_rate": 0.001, "loss": 0.4299, "step": 11945 }, { "epoch": 0.32961643391495393, "grad_norm": 0.005080906208604574, "learning_rate": 0.001, "loss": 0.4023, "step": 11946 }, { "epoch": 0.3296440261160183, "grad_norm": 0.0058040316216647625, "learning_rate": 0.001, "loss": 0.3528, "step": 11947 }, { "epoch": 0.3296716183170827, "grad_norm": 0.0035558012314140797, "learning_rate": 0.001, "loss": 0.3893, "step": 11948 }, { "epoch": 0.32969921051814705, "grad_norm": 0.003042223397642374, "learning_rate": 0.001, "loss": 0.3936, "step": 11949 }, { "epoch": 0.3297268027192114, "grad_norm": 0.0033221363555639982, "learning_rate": 0.001, "loss": 0.419, "step": 11950 }, { "epoch": 0.3297543949202758, "grad_norm": 0.0032754812855273485, "learning_rate": 0.001, "loss": 0.3948, "step": 11951 }, { "epoch": 0.32978198712134016, "grad_norm": 0.002569601172581315, "learning_rate": 0.001, "loss": 0.3701, "step": 11952 }, { "epoch": 0.3298095793224045, "grad_norm": 0.003200158243998885, "learning_rate": 0.001, "loss": 0.4138, "step": 11953 }, { "epoch": 0.32983717152346886, "grad_norm": 0.003082757582888007, "learning_rate": 0.001, "loss": 0.369, "step": 11954 }, { "epoch": 0.32986476372453327, "grad_norm": 0.002875220263376832, "learning_rate": 0.001, "loss": 0.3803, "step": 11955 }, { "epoch": 0.3298923559255976, "grad_norm": 0.002740328898653388, "learning_rate": 0.001, "loss": 0.3595, "step": 11956 }, { "epoch": 0.329919948126662, "grad_norm": 0.004781671334058046, "learning_rate": 0.001, "loss": 0.4139, "step": 11957 }, { "epoch": 0.3299475403277264, "grad_norm": 0.002997961826622486, "learning_rate": 0.001, "loss": 0.401, "step": 11958 }, { "epoch": 0.32997513252879074, "grad_norm": 0.002518662018701434, "learning_rate": 0.001, "loss": 0.3957, "step": 11959 }, { "epoch": 0.3300027247298551, "grad_norm": 0.0048488592728972435, "learning_rate": 0.001, "loss": 0.4122, "step": 11960 }, { "epoch": 0.3300303169309195, "grad_norm": 0.005046727601438761, "learning_rate": 0.001, "loss": 0.388, "step": 11961 }, { "epoch": 0.33005790913198385, "grad_norm": 0.0023717847652733326, "learning_rate": 0.001, "loss": 0.4206, "step": 11962 }, { "epoch": 0.3300855013330482, "grad_norm": 0.002914504613727331, "learning_rate": 0.001, "loss": 0.4109, "step": 11963 }, { "epoch": 0.33011309353411256, "grad_norm": 0.006090945564210415, "learning_rate": 0.001, "loss": 0.3835, "step": 11964 }, { "epoch": 0.33014068573517696, "grad_norm": 0.0030452022328972816, "learning_rate": 0.001, "loss": 0.3956, "step": 11965 }, { "epoch": 0.3301682779362413, "grad_norm": 0.0026935841888189316, "learning_rate": 0.001, "loss": 0.414, "step": 11966 }, { "epoch": 0.33019587013730567, "grad_norm": 0.003028253326192498, "learning_rate": 0.001, "loss": 0.3754, "step": 11967 }, { "epoch": 0.3302234623383701, "grad_norm": 0.004835574887692928, "learning_rate": 0.001, "loss": 0.3862, "step": 11968 }, { "epoch": 0.33025105453943443, "grad_norm": 0.0029693404212594032, "learning_rate": 0.001, "loss": 0.403, "step": 11969 }, { "epoch": 0.3302786467404988, "grad_norm": 0.0037660168018192053, "learning_rate": 0.001, "loss": 0.4376, "step": 11970 }, { "epoch": 0.3303062389415632, "grad_norm": 0.0026343679055571556, "learning_rate": 0.001, "loss": 0.4401, "step": 11971 }, { "epoch": 0.33033383114262754, "grad_norm": 0.002500650705769658, "learning_rate": 0.001, "loss": 0.4046, "step": 11972 }, { "epoch": 0.3303614233436919, "grad_norm": 0.0031541388016194105, "learning_rate": 0.001, "loss": 0.4058, "step": 11973 }, { "epoch": 0.33038901554475625, "grad_norm": 0.0035852715373039246, "learning_rate": 0.001, "loss": 0.3842, "step": 11974 }, { "epoch": 0.33041660774582066, "grad_norm": 0.0040337685495615005, "learning_rate": 0.001, "loss": 0.4281, "step": 11975 }, { "epoch": 0.330444199946885, "grad_norm": 0.0031562617514282465, "learning_rate": 0.001, "loss": 0.3547, "step": 11976 }, { "epoch": 0.33047179214794936, "grad_norm": 0.004418304655700922, "learning_rate": 0.001, "loss": 0.3938, "step": 11977 }, { "epoch": 0.33049938434901377, "grad_norm": 0.0028235798235982656, "learning_rate": 0.001, "loss": 0.3749, "step": 11978 }, { "epoch": 0.3305269765500781, "grad_norm": 0.0024365021381527185, "learning_rate": 0.001, "loss": 0.4025, "step": 11979 }, { "epoch": 0.3305545687511425, "grad_norm": 0.003930834122002125, "learning_rate": 0.001, "loss": 0.3923, "step": 11980 }, { "epoch": 0.3305821609522069, "grad_norm": 0.002820041496306658, "learning_rate": 0.001, "loss": 0.4124, "step": 11981 }, { "epoch": 0.33060975315327124, "grad_norm": 0.0039013477507978678, "learning_rate": 0.001, "loss": 0.4256, "step": 11982 }, { "epoch": 0.3306373453543356, "grad_norm": 0.002920904429629445, "learning_rate": 0.001, "loss": 0.389, "step": 11983 }, { "epoch": 0.33066493755539994, "grad_norm": 0.0036882515996694565, "learning_rate": 0.001, "loss": 0.3485, "step": 11984 }, { "epoch": 0.33069252975646435, "grad_norm": 0.0034662606194615364, "learning_rate": 0.001, "loss": 0.4022, "step": 11985 }, { "epoch": 0.3307201219575287, "grad_norm": 0.003996389918029308, "learning_rate": 0.001, "loss": 0.4232, "step": 11986 }, { "epoch": 0.33074771415859305, "grad_norm": 0.002916965400800109, "learning_rate": 0.001, "loss": 0.4265, "step": 11987 }, { "epoch": 0.33077530635965746, "grad_norm": 0.0024744670372456312, "learning_rate": 0.001, "loss": 0.4377, "step": 11988 }, { "epoch": 0.3308028985607218, "grad_norm": 0.005105322692543268, "learning_rate": 0.001, "loss": 0.412, "step": 11989 }, { "epoch": 0.33083049076178617, "grad_norm": 0.0023484451230615377, "learning_rate": 0.001, "loss": 0.4235, "step": 11990 }, { "epoch": 0.3308580829628506, "grad_norm": 0.0028626341372728348, "learning_rate": 0.001, "loss": 0.3979, "step": 11991 }, { "epoch": 0.3308856751639149, "grad_norm": 0.0024867048487067223, "learning_rate": 0.001, "loss": 0.4314, "step": 11992 }, { "epoch": 0.3309132673649793, "grad_norm": 0.0038186991587281227, "learning_rate": 0.001, "loss": 0.3815, "step": 11993 }, { "epoch": 0.33094085956604363, "grad_norm": 0.002811636310070753, "learning_rate": 0.001, "loss": 0.4059, "step": 11994 }, { "epoch": 0.33096845176710804, "grad_norm": 0.0019408464431762695, "learning_rate": 0.001, "loss": 0.4154, "step": 11995 }, { "epoch": 0.3309960439681724, "grad_norm": 0.0032817174214869738, "learning_rate": 0.001, "loss": 0.39, "step": 11996 }, { "epoch": 0.33102363616923675, "grad_norm": 0.005393838509917259, "learning_rate": 0.001, "loss": 0.3443, "step": 11997 }, { "epoch": 0.33105122837030115, "grad_norm": 0.002987619722262025, "learning_rate": 0.001, "loss": 0.4412, "step": 11998 }, { "epoch": 0.3310788205713655, "grad_norm": 0.004077446181327105, "learning_rate": 0.001, "loss": 0.3921, "step": 11999 }, { "epoch": 0.33110641277242986, "grad_norm": 0.0028092057909816504, "learning_rate": 0.001, "loss": 0.3888, "step": 12000 }, { "epoch": 0.33110641277242986, "eval_runtime": 23.4032, "eval_samples_per_second": 1.367, "eval_steps_per_second": 0.171, "step": 12000 } ], "logging_steps": 1, "max_steps": 36242, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 9.647879841911721e+19, "train_batch_size": 2, "trial_name": null, "trial_params": null }