{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 76.46176911544228, "eval_steps": 500, "global_step": 51000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.014992503748125937, "grad_norm": 12.352495193481445, "learning_rate": 1.8e-07, "loss": 1.3395, "step": 10 }, { "epoch": 0.029985007496251874, "grad_norm": 12.565645217895508, "learning_rate": 3.8e-07, "loss": 1.4386, "step": 20 }, { "epoch": 0.044977511244377814, "grad_norm": 11.404107093811035, "learning_rate": 5.8e-07, "loss": 1.4002, "step": 30 }, { "epoch": 0.05997001499250375, "grad_norm": 11.824190139770508, "learning_rate": 7.8e-07, "loss": 1.2836, "step": 40 }, { "epoch": 0.07496251874062969, "grad_norm": 8.45156478881836, "learning_rate": 9.8e-07, "loss": 1.0406, "step": 50 }, { "epoch": 0.08995502248875563, "grad_norm": 4.005470275878906, "learning_rate": 1.18e-06, "loss": 0.8608, "step": 60 }, { "epoch": 0.10494752623688156, "grad_norm": 2.6651763916015625, "learning_rate": 1.3800000000000001e-06, "loss": 0.5767, "step": 70 }, { "epoch": 0.1199400299850075, "grad_norm": 1.5758684873580933, "learning_rate": 1.5800000000000003e-06, "loss": 0.5178, "step": 80 }, { "epoch": 0.13493253373313344, "grad_norm": 1.8937158584594727, "learning_rate": 1.7800000000000001e-06, "loss": 0.3893, "step": 90 }, { "epoch": 0.14992503748125938, "grad_norm": 1.6169902086257935, "learning_rate": 1.98e-06, "loss": 0.2966, "step": 100 }, { "epoch": 0.16491754122938532, "grad_norm": 1.6782653331756592, "learning_rate": 2.1800000000000003e-06, "loss": 0.2676, "step": 110 }, { "epoch": 0.17991004497751126, "grad_norm": 1.6396105289459229, "learning_rate": 2.38e-06, "loss": 0.2342, "step": 120 }, { "epoch": 0.19490254872563717, "grad_norm": 1.7966804504394531, "learning_rate": 2.5800000000000003e-06, "loss": 0.2083, "step": 130 }, { "epoch": 0.2098950524737631, "grad_norm": 1.0188837051391602, "learning_rate": 2.78e-06, "loss": 0.1703, "step": 140 }, { "epoch": 0.22488755622188905, "grad_norm": 1.0878279209136963, "learning_rate": 2.9800000000000003e-06, "loss": 0.1498, "step": 150 }, { "epoch": 0.239880059970015, "grad_norm": 1.0933284759521484, "learning_rate": 3.1800000000000005e-06, "loss": 0.1352, "step": 160 }, { "epoch": 0.25487256371814093, "grad_norm": 0.9707562923431396, "learning_rate": 3.38e-06, "loss": 0.1234, "step": 170 }, { "epoch": 0.2698650674662669, "grad_norm": 1.0383799076080322, "learning_rate": 3.58e-06, "loss": 0.0984, "step": 180 }, { "epoch": 0.2848575712143928, "grad_norm": 1.6284152269363403, "learning_rate": 3.7800000000000002e-06, "loss": 0.0916, "step": 190 }, { "epoch": 0.29985007496251875, "grad_norm": 1.2715067863464355, "learning_rate": 3.98e-06, "loss": 0.0899, "step": 200 }, { "epoch": 0.3148425787106447, "grad_norm": 1.0102485418319702, "learning_rate": 4.18e-06, "loss": 0.0922, "step": 210 }, { "epoch": 0.32983508245877063, "grad_norm": 1.253182291984558, "learning_rate": 4.38e-06, "loss": 0.0935, "step": 220 }, { "epoch": 0.3448275862068966, "grad_norm": 0.9783605933189392, "learning_rate": 4.58e-06, "loss": 0.0795, "step": 230 }, { "epoch": 0.3598200899550225, "grad_norm": 1.1638376712799072, "learning_rate": 4.780000000000001e-06, "loss": 0.082, "step": 240 }, { "epoch": 0.3748125937031484, "grad_norm": 1.4428989887237549, "learning_rate": 4.98e-06, "loss": 0.0738, "step": 250 }, { "epoch": 0.38980509745127434, "grad_norm": 0.9923336505889893, "learning_rate": 5.18e-06, "loss": 0.0694, "step": 260 }, { "epoch": 0.4047976011994003, "grad_norm": 0.864434003829956, "learning_rate": 5.38e-06, "loss": 0.0748, "step": 270 }, { "epoch": 0.4197901049475262, "grad_norm": 1.3046795129776, "learning_rate": 5.580000000000001e-06, "loss": 0.0764, "step": 280 }, { "epoch": 0.43478260869565216, "grad_norm": 1.2339755296707153, "learning_rate": 5.78e-06, "loss": 0.0762, "step": 290 }, { "epoch": 0.4497751124437781, "grad_norm": 1.7014669179916382, "learning_rate": 5.98e-06, "loss": 0.0694, "step": 300 }, { "epoch": 0.46476761619190404, "grad_norm": 1.3890693187713623, "learning_rate": 6.18e-06, "loss": 0.0697, "step": 310 }, { "epoch": 0.47976011994003, "grad_norm": 1.4853062629699707, "learning_rate": 6.38e-06, "loss": 0.0668, "step": 320 }, { "epoch": 0.4947526236881559, "grad_norm": 1.0670429468154907, "learning_rate": 6.58e-06, "loss": 0.0604, "step": 330 }, { "epoch": 0.5097451274362819, "grad_norm": 0.9075015187263489, "learning_rate": 6.78e-06, "loss": 0.0622, "step": 340 }, { "epoch": 0.5247376311844077, "grad_norm": 0.8022803664207458, "learning_rate": 6.98e-06, "loss": 0.0581, "step": 350 }, { "epoch": 0.5397301349325337, "grad_norm": 0.9733083844184875, "learning_rate": 7.180000000000001e-06, "loss": 0.0614, "step": 360 }, { "epoch": 0.5547226386806596, "grad_norm": 1.1611813306808472, "learning_rate": 7.3800000000000005e-06, "loss": 0.0647, "step": 370 }, { "epoch": 0.5697151424287856, "grad_norm": 0.8988940715789795, "learning_rate": 7.580000000000001e-06, "loss": 0.053, "step": 380 }, { "epoch": 0.5847076461769115, "grad_norm": 0.7930591702461243, "learning_rate": 7.78e-06, "loss": 0.0577, "step": 390 }, { "epoch": 0.5997001499250375, "grad_norm": 0.6655431985855103, "learning_rate": 7.98e-06, "loss": 0.054, "step": 400 }, { "epoch": 0.6146926536731634, "grad_norm": 0.7863489985466003, "learning_rate": 8.18e-06, "loss": 0.0568, "step": 410 }, { "epoch": 0.6296851574212894, "grad_norm": 1.0557104349136353, "learning_rate": 8.380000000000001e-06, "loss": 0.0539, "step": 420 }, { "epoch": 0.6446776611694153, "grad_norm": 1.5306823253631592, "learning_rate": 8.580000000000001e-06, "loss": 0.0653, "step": 430 }, { "epoch": 0.6596701649175413, "grad_norm": 0.949705183506012, "learning_rate": 8.78e-06, "loss": 0.0552, "step": 440 }, { "epoch": 0.6746626686656672, "grad_norm": 0.8784258961677551, "learning_rate": 8.98e-06, "loss": 0.0577, "step": 450 }, { "epoch": 0.6896551724137931, "grad_norm": 0.6771582961082458, "learning_rate": 9.180000000000002e-06, "loss": 0.0561, "step": 460 }, { "epoch": 0.704647676161919, "grad_norm": 0.9580478668212891, "learning_rate": 9.38e-06, "loss": 0.0583, "step": 470 }, { "epoch": 0.719640179910045, "grad_norm": 1.4079790115356445, "learning_rate": 9.58e-06, "loss": 0.0578, "step": 480 }, { "epoch": 0.7346326836581709, "grad_norm": 0.9963777661323547, "learning_rate": 9.78e-06, "loss": 0.0556, "step": 490 }, { "epoch": 0.7496251874062968, "grad_norm": 0.9055677652359009, "learning_rate": 9.980000000000001e-06, "loss": 0.0497, "step": 500 }, { "epoch": 0.7646176911544228, "grad_norm": 0.6979916095733643, "learning_rate": 1.018e-05, "loss": 0.0473, "step": 510 }, { "epoch": 0.7796101949025487, "grad_norm": 0.7936225533485413, "learning_rate": 1.038e-05, "loss": 0.0465, "step": 520 }, { "epoch": 0.7946026986506747, "grad_norm": 1.111262321472168, "learning_rate": 1.058e-05, "loss": 0.0447, "step": 530 }, { "epoch": 0.8095952023988006, "grad_norm": 0.6862292885780334, "learning_rate": 1.0780000000000002e-05, "loss": 0.0429, "step": 540 }, { "epoch": 0.8245877061469266, "grad_norm": 0.7422735691070557, "learning_rate": 1.098e-05, "loss": 0.0433, "step": 550 }, { "epoch": 0.8395802098950524, "grad_norm": 0.865845799446106, "learning_rate": 1.118e-05, "loss": 0.0397, "step": 560 }, { "epoch": 0.8545727136431784, "grad_norm": 0.7049374580383301, "learning_rate": 1.1380000000000001e-05, "loss": 0.0409, "step": 570 }, { "epoch": 0.8695652173913043, "grad_norm": 0.7126078009605408, "learning_rate": 1.1580000000000001e-05, "loss": 0.0465, "step": 580 }, { "epoch": 0.8845577211394303, "grad_norm": 0.8451477885246277, "learning_rate": 1.178e-05, "loss": 0.0447, "step": 590 }, { "epoch": 0.8995502248875562, "grad_norm": 0.7146437764167786, "learning_rate": 1.198e-05, "loss": 0.0459, "step": 600 }, { "epoch": 0.9145427286356822, "grad_norm": 0.9032754302024841, "learning_rate": 1.2180000000000002e-05, "loss": 0.0463, "step": 610 }, { "epoch": 0.9295352323838081, "grad_norm": 0.5720278024673462, "learning_rate": 1.238e-05, "loss": 0.0419, "step": 620 }, { "epoch": 0.9445277361319341, "grad_norm": 1.0492711067199707, "learning_rate": 1.258e-05, "loss": 0.0387, "step": 630 }, { "epoch": 0.95952023988006, "grad_norm": 0.6968193054199219, "learning_rate": 1.278e-05, "loss": 0.0398, "step": 640 }, { "epoch": 0.974512743628186, "grad_norm": 0.7909852862358093, "learning_rate": 1.2980000000000001e-05, "loss": 0.0385, "step": 650 }, { "epoch": 0.9895052473763118, "grad_norm": 0.6276836395263672, "learning_rate": 1.3180000000000001e-05, "loss": 0.0407, "step": 660 }, { "epoch": 1.0044977511244377, "grad_norm": 0.6238771080970764, "learning_rate": 1.338e-05, "loss": 0.0511, "step": 670 }, { "epoch": 1.0194902548725637, "grad_norm": 1.1283817291259766, "learning_rate": 1.358e-05, "loss": 0.0434, "step": 680 }, { "epoch": 1.0344827586206897, "grad_norm": 0.9786622524261475, "learning_rate": 1.3780000000000002e-05, "loss": 0.0377, "step": 690 }, { "epoch": 1.0494752623688155, "grad_norm": 0.8502009510993958, "learning_rate": 1.3980000000000002e-05, "loss": 0.0386, "step": 700 }, { "epoch": 1.0644677661169415, "grad_norm": 0.882853627204895, "learning_rate": 1.4180000000000001e-05, "loss": 0.0456, "step": 710 }, { "epoch": 1.0794602698650675, "grad_norm": 0.7679175734519958, "learning_rate": 1.4380000000000001e-05, "loss": 0.0403, "step": 720 }, { "epoch": 1.0944527736131935, "grad_norm": 0.4715464115142822, "learning_rate": 1.4580000000000003e-05, "loss": 0.0447, "step": 730 }, { "epoch": 1.1094452773613193, "grad_norm": 0.689719021320343, "learning_rate": 1.4779999999999999e-05, "loss": 0.0332, "step": 740 }, { "epoch": 1.1244377811094453, "grad_norm": 0.6541781425476074, "learning_rate": 1.4979999999999999e-05, "loss": 0.0408, "step": 750 }, { "epoch": 1.1394302848575713, "grad_norm": 0.6383222937583923, "learning_rate": 1.518e-05, "loss": 0.0379, "step": 760 }, { "epoch": 1.1544227886056972, "grad_norm": 0.6642134785652161, "learning_rate": 1.538e-05, "loss": 0.0377, "step": 770 }, { "epoch": 1.169415292353823, "grad_norm": 0.5306979417800903, "learning_rate": 1.558e-05, "loss": 0.0379, "step": 780 }, { "epoch": 1.184407796101949, "grad_norm": 0.5780134797096252, "learning_rate": 1.578e-05, "loss": 0.0367, "step": 790 }, { "epoch": 1.199400299850075, "grad_norm": 0.697882354259491, "learning_rate": 1.598e-05, "loss": 0.0403, "step": 800 }, { "epoch": 1.214392803598201, "grad_norm": 0.4227500855922699, "learning_rate": 1.618e-05, "loss": 0.0403, "step": 810 }, { "epoch": 1.2293853073463268, "grad_norm": 0.6536192297935486, "learning_rate": 1.6380000000000002e-05, "loss": 0.0427, "step": 820 }, { "epoch": 1.2443778110944528, "grad_norm": 0.6655498743057251, "learning_rate": 1.658e-05, "loss": 0.0403, "step": 830 }, { "epoch": 1.2593703148425788, "grad_norm": 0.6575920581817627, "learning_rate": 1.6780000000000002e-05, "loss": 0.0348, "step": 840 }, { "epoch": 1.2743628185907045, "grad_norm": 0.6371225118637085, "learning_rate": 1.698e-05, "loss": 0.045, "step": 850 }, { "epoch": 1.2893553223388305, "grad_norm": 0.7604775428771973, "learning_rate": 1.718e-05, "loss": 0.0368, "step": 860 }, { "epoch": 1.3043478260869565, "grad_norm": 0.9015881419181824, "learning_rate": 1.7380000000000003e-05, "loss": 0.0382, "step": 870 }, { "epoch": 1.3193403298350825, "grad_norm": 0.7450079321861267, "learning_rate": 1.758e-05, "loss": 0.0326, "step": 880 }, { "epoch": 1.3343328335832085, "grad_norm": 0.7840608358383179, "learning_rate": 1.7780000000000003e-05, "loss": 0.0323, "step": 890 }, { "epoch": 1.3493253373313343, "grad_norm": 0.9546213746070862, "learning_rate": 1.798e-05, "loss": 0.0421, "step": 900 }, { "epoch": 1.3643178410794603, "grad_norm": 0.8993039131164551, "learning_rate": 1.818e-05, "loss": 0.0344, "step": 910 }, { "epoch": 1.3793103448275863, "grad_norm": 0.6652002334594727, "learning_rate": 1.838e-05, "loss": 0.0396, "step": 920 }, { "epoch": 1.394302848575712, "grad_norm": 0.6655739545822144, "learning_rate": 1.858e-05, "loss": 0.0325, "step": 930 }, { "epoch": 1.409295352323838, "grad_norm": 0.9760321378707886, "learning_rate": 1.878e-05, "loss": 0.0337, "step": 940 }, { "epoch": 1.424287856071964, "grad_norm": 1.037689447402954, "learning_rate": 1.898e-05, "loss": 0.0394, "step": 950 }, { "epoch": 1.4392803598200898, "grad_norm": 0.8125677108764648, "learning_rate": 1.918e-05, "loss": 0.0307, "step": 960 }, { "epoch": 1.4542728635682158, "grad_norm": 0.4719066917896271, "learning_rate": 1.938e-05, "loss": 0.0335, "step": 970 }, { "epoch": 1.4692653673163418, "grad_norm": 0.654403567314148, "learning_rate": 1.9580000000000002e-05, "loss": 0.0355, "step": 980 }, { "epoch": 1.4842578710644678, "grad_norm": 0.476906955242157, "learning_rate": 1.978e-05, "loss": 0.0396, "step": 990 }, { "epoch": 1.4992503748125938, "grad_norm": 0.65264892578125, "learning_rate": 1.9980000000000002e-05, "loss": 0.033, "step": 1000 }, { "epoch": 1.5142428785607196, "grad_norm": 0.3506099581718445, "learning_rate": 2.0180000000000003e-05, "loss": 0.0325, "step": 1010 }, { "epoch": 1.5292353823088456, "grad_norm": 0.9508102536201477, "learning_rate": 2.038e-05, "loss": 0.0334, "step": 1020 }, { "epoch": 1.5442278860569716, "grad_norm": 0.4943068027496338, "learning_rate": 2.0580000000000003e-05, "loss": 0.0307, "step": 1030 }, { "epoch": 1.5592203898050974, "grad_norm": 0.6410338282585144, "learning_rate": 2.078e-05, "loss": 0.0383, "step": 1040 }, { "epoch": 1.5742128935532234, "grad_norm": 0.7681779861450195, "learning_rate": 2.098e-05, "loss": 0.0319, "step": 1050 }, { "epoch": 1.5892053973013494, "grad_norm": 0.8546875715255737, "learning_rate": 2.118e-05, "loss": 0.0485, "step": 1060 }, { "epoch": 1.6041979010494751, "grad_norm": 0.9468808770179749, "learning_rate": 2.138e-05, "loss": 0.0432, "step": 1070 }, { "epoch": 1.6191904047976013, "grad_norm": 0.9696308374404907, "learning_rate": 2.158e-05, "loss": 0.0363, "step": 1080 }, { "epoch": 1.6341829085457271, "grad_norm": 0.7556995749473572, "learning_rate": 2.178e-05, "loss": 0.0292, "step": 1090 }, { "epoch": 1.6491754122938531, "grad_norm": 0.5595796704292297, "learning_rate": 2.198e-05, "loss": 0.0359, "step": 1100 }, { "epoch": 1.6641679160419791, "grad_norm": 0.57950758934021, "learning_rate": 2.218e-05, "loss": 0.0334, "step": 1110 }, { "epoch": 1.6791604197901049, "grad_norm": 0.8545596599578857, "learning_rate": 2.2380000000000003e-05, "loss": 0.039, "step": 1120 }, { "epoch": 1.6941529235382309, "grad_norm": 0.5169274806976318, "learning_rate": 2.258e-05, "loss": 0.0394, "step": 1130 }, { "epoch": 1.7091454272863569, "grad_norm": 0.8701108694076538, "learning_rate": 2.2780000000000002e-05, "loss": 0.0359, "step": 1140 }, { "epoch": 1.7241379310344827, "grad_norm": 0.6969452500343323, "learning_rate": 2.298e-05, "loss": 0.0412, "step": 1150 }, { "epoch": 1.7391304347826086, "grad_norm": 0.5088011026382446, "learning_rate": 2.318e-05, "loss": 0.0313, "step": 1160 }, { "epoch": 1.7541229385307346, "grad_norm": 1.013283610343933, "learning_rate": 2.3380000000000003e-05, "loss": 0.0334, "step": 1170 }, { "epoch": 1.7691154422788604, "grad_norm": 0.8000363707542419, "learning_rate": 2.358e-05, "loss": 0.0376, "step": 1180 }, { "epoch": 1.7841079460269866, "grad_norm": 0.7613083124160767, "learning_rate": 2.3780000000000003e-05, "loss": 0.0341, "step": 1190 }, { "epoch": 1.7991004497751124, "grad_norm": 0.8471761345863342, "learning_rate": 2.398e-05, "loss": 0.0366, "step": 1200 }, { "epoch": 1.8140929535232384, "grad_norm": 0.5258248448371887, "learning_rate": 2.418e-05, "loss": 0.0349, "step": 1210 }, { "epoch": 1.8290854572713644, "grad_norm": 0.6498744487762451, "learning_rate": 2.438e-05, "loss": 0.0279, "step": 1220 }, { "epoch": 1.8440779610194902, "grad_norm": 0.43778473138809204, "learning_rate": 2.4580000000000002e-05, "loss": 0.0338, "step": 1230 }, { "epoch": 1.8590704647676162, "grad_norm": 0.3634156286716461, "learning_rate": 2.478e-05, "loss": 0.032, "step": 1240 }, { "epoch": 1.8740629685157422, "grad_norm": 0.5765547156333923, "learning_rate": 2.498e-05, "loss": 0.0335, "step": 1250 }, { "epoch": 1.889055472263868, "grad_norm": 0.8275583386421204, "learning_rate": 2.5180000000000003e-05, "loss": 0.0321, "step": 1260 }, { "epoch": 1.9040479760119942, "grad_norm": 0.8409108519554138, "learning_rate": 2.5380000000000004e-05, "loss": 0.0337, "step": 1270 }, { "epoch": 1.91904047976012, "grad_norm": 0.5717505812644958, "learning_rate": 2.5580000000000002e-05, "loss": 0.0326, "step": 1280 }, { "epoch": 1.934032983508246, "grad_norm": 0.5822479128837585, "learning_rate": 2.5779999999999997e-05, "loss": 0.0353, "step": 1290 }, { "epoch": 1.949025487256372, "grad_norm": 0.5897538661956787, "learning_rate": 2.598e-05, "loss": 0.031, "step": 1300 }, { "epoch": 1.9640179910044977, "grad_norm": 0.3169296979904175, "learning_rate": 2.618e-05, "loss": 0.031, "step": 1310 }, { "epoch": 1.9790104947526237, "grad_norm": 0.6235405206680298, "learning_rate": 2.6379999999999998e-05, "loss": 0.034, "step": 1320 }, { "epoch": 1.9940029985007497, "grad_norm": 0.6218909621238708, "learning_rate": 2.658e-05, "loss": 0.0324, "step": 1330 }, { "epoch": 2.0089955022488755, "grad_norm": 0.6903409957885742, "learning_rate": 2.678e-05, "loss": 0.0362, "step": 1340 }, { "epoch": 2.0239880059970017, "grad_norm": 0.3880787789821625, "learning_rate": 2.698e-05, "loss": 0.0319, "step": 1350 }, { "epoch": 2.0389805097451275, "grad_norm": 0.6971299648284912, "learning_rate": 2.718e-05, "loss": 0.031, "step": 1360 }, { "epoch": 2.0539730134932532, "grad_norm": 0.5031431317329407, "learning_rate": 2.738e-05, "loss": 0.0233, "step": 1370 }, { "epoch": 2.0689655172413794, "grad_norm": 0.8690423965454102, "learning_rate": 2.758e-05, "loss": 0.031, "step": 1380 }, { "epoch": 2.0839580209895052, "grad_norm": 0.7149677872657776, "learning_rate": 2.778e-05, "loss": 0.028, "step": 1390 }, { "epoch": 2.098950524737631, "grad_norm": 0.6184076070785522, "learning_rate": 2.798e-05, "loss": 0.0341, "step": 1400 }, { "epoch": 2.113943028485757, "grad_norm": 0.6456753611564636, "learning_rate": 2.818e-05, "loss": 0.0263, "step": 1410 }, { "epoch": 2.128935532233883, "grad_norm": 0.8995676636695862, "learning_rate": 2.8380000000000003e-05, "loss": 0.0316, "step": 1420 }, { "epoch": 2.143928035982009, "grad_norm": 0.5750619769096375, "learning_rate": 2.858e-05, "loss": 0.0316, "step": 1430 }, { "epoch": 2.158920539730135, "grad_norm": 0.6220912933349609, "learning_rate": 2.8780000000000002e-05, "loss": 0.031, "step": 1440 }, { "epoch": 2.1739130434782608, "grad_norm": 0.6296874284744263, "learning_rate": 2.898e-05, "loss": 0.0324, "step": 1450 }, { "epoch": 2.188905547226387, "grad_norm": 0.43397459387779236, "learning_rate": 2.9180000000000002e-05, "loss": 0.0276, "step": 1460 }, { "epoch": 2.2038980509745127, "grad_norm": 0.8279281258583069, "learning_rate": 2.9380000000000003e-05, "loss": 0.032, "step": 1470 }, { "epoch": 2.2188905547226385, "grad_norm": 0.7848488688468933, "learning_rate": 2.958e-05, "loss": 0.0311, "step": 1480 }, { "epoch": 2.2338830584707647, "grad_norm": 0.6944928765296936, "learning_rate": 2.9780000000000003e-05, "loss": 0.0267, "step": 1490 }, { "epoch": 2.2488755622188905, "grad_norm": 0.9375112652778625, "learning_rate": 2.998e-05, "loss": 0.0341, "step": 1500 }, { "epoch": 2.2638680659670163, "grad_norm": 0.8271047472953796, "learning_rate": 3.0180000000000002e-05, "loss": 0.0319, "step": 1510 }, { "epoch": 2.2788605697151425, "grad_norm": 0.712743878364563, "learning_rate": 3.0380000000000004e-05, "loss": 0.0302, "step": 1520 }, { "epoch": 2.2938530734632683, "grad_norm": 0.9209683537483215, "learning_rate": 3.058e-05, "loss": 0.0354, "step": 1530 }, { "epoch": 2.3088455772113945, "grad_norm": 0.509310245513916, "learning_rate": 3.078e-05, "loss": 0.0345, "step": 1540 }, { "epoch": 2.3238380809595203, "grad_norm": 0.3942641615867615, "learning_rate": 3.0980000000000005e-05, "loss": 0.0329, "step": 1550 }, { "epoch": 2.338830584707646, "grad_norm": 0.698744535446167, "learning_rate": 3.118e-05, "loss": 0.0321, "step": 1560 }, { "epoch": 2.3538230884557723, "grad_norm": 0.5828759074211121, "learning_rate": 3.138e-05, "loss": 0.0308, "step": 1570 }, { "epoch": 2.368815592203898, "grad_norm": 0.4720453917980194, "learning_rate": 3.1580000000000006e-05, "loss": 0.0298, "step": 1580 }, { "epoch": 2.383808095952024, "grad_norm": 0.6018348932266235, "learning_rate": 3.1780000000000004e-05, "loss": 0.0291, "step": 1590 }, { "epoch": 2.39880059970015, "grad_norm": 0.7781568765640259, "learning_rate": 3.198e-05, "loss": 0.0292, "step": 1600 }, { "epoch": 2.413793103448276, "grad_norm": 0.5917528867721558, "learning_rate": 3.218e-05, "loss": 0.0287, "step": 1610 }, { "epoch": 2.428785607196402, "grad_norm": 0.7644325494766235, "learning_rate": 3.238e-05, "loss": 0.026, "step": 1620 }, { "epoch": 2.443778110944528, "grad_norm": 0.5203229784965515, "learning_rate": 3.2579999999999996e-05, "loss": 0.0285, "step": 1630 }, { "epoch": 2.4587706146926536, "grad_norm": 0.41256290674209595, "learning_rate": 3.278e-05, "loss": 0.0347, "step": 1640 }, { "epoch": 2.47376311844078, "grad_norm": 0.6890048384666443, "learning_rate": 3.298e-05, "loss": 0.037, "step": 1650 }, { "epoch": 2.4887556221889056, "grad_norm": 0.6746430993080139, "learning_rate": 3.318e-05, "loss": 0.0253, "step": 1660 }, { "epoch": 2.5037481259370313, "grad_norm": 0.5123350024223328, "learning_rate": 3.338e-05, "loss": 0.0264, "step": 1670 }, { "epoch": 2.5187406296851576, "grad_norm": 0.41564497351646423, "learning_rate": 3.358e-05, "loss": 0.0289, "step": 1680 }, { "epoch": 2.5337331334332833, "grad_norm": 0.4995998442173004, "learning_rate": 3.378e-05, "loss": 0.0274, "step": 1690 }, { "epoch": 2.548725637181409, "grad_norm": 0.6151142120361328, "learning_rate": 3.398e-05, "loss": 0.026, "step": 1700 }, { "epoch": 2.5637181409295353, "grad_norm": 0.5302304029464722, "learning_rate": 3.418e-05, "loss": 0.0283, "step": 1710 }, { "epoch": 2.578710644677661, "grad_norm": 0.8876344561576843, "learning_rate": 3.438e-05, "loss": 0.0375, "step": 1720 }, { "epoch": 2.593703148425787, "grad_norm": 0.5906609296798706, "learning_rate": 3.4580000000000004e-05, "loss": 0.0347, "step": 1730 }, { "epoch": 2.608695652173913, "grad_norm": 0.8332546949386597, "learning_rate": 3.478e-05, "loss": 0.0295, "step": 1740 }, { "epoch": 2.623688155922039, "grad_norm": 0.7891860604286194, "learning_rate": 3.498e-05, "loss": 0.0342, "step": 1750 }, { "epoch": 2.638680659670165, "grad_norm": 0.6615274548530579, "learning_rate": 3.518e-05, "loss": 0.0321, "step": 1760 }, { "epoch": 2.653673163418291, "grad_norm": 0.8668050169944763, "learning_rate": 3.5380000000000003e-05, "loss": 0.0322, "step": 1770 }, { "epoch": 2.668665667166417, "grad_norm": 0.4949815571308136, "learning_rate": 3.558e-05, "loss": 0.0297, "step": 1780 }, { "epoch": 2.683658170914543, "grad_norm": 0.5212180018424988, "learning_rate": 3.578e-05, "loss": 0.0315, "step": 1790 }, { "epoch": 2.6986506746626686, "grad_norm": 0.6065674424171448, "learning_rate": 3.5980000000000004e-05, "loss": 0.0308, "step": 1800 }, { "epoch": 2.713643178410795, "grad_norm": 0.47689691185951233, "learning_rate": 3.618e-05, "loss": 0.0307, "step": 1810 }, { "epoch": 2.7286356821589206, "grad_norm": 0.552668571472168, "learning_rate": 3.638e-05, "loss": 0.0271, "step": 1820 }, { "epoch": 2.7436281859070464, "grad_norm": 0.7516595721244812, "learning_rate": 3.6580000000000006e-05, "loss": 0.0285, "step": 1830 }, { "epoch": 2.7586206896551726, "grad_norm": 0.4314671754837036, "learning_rate": 3.6780000000000004e-05, "loss": 0.0282, "step": 1840 }, { "epoch": 2.7736131934032984, "grad_norm": 0.4675828814506531, "learning_rate": 3.698e-05, "loss": 0.027, "step": 1850 }, { "epoch": 2.788605697151424, "grad_norm": 0.4131605327129364, "learning_rate": 3.7180000000000007e-05, "loss": 0.0338, "step": 1860 }, { "epoch": 2.8035982008995504, "grad_norm": 0.41675710678100586, "learning_rate": 3.7380000000000005e-05, "loss": 0.0293, "step": 1870 }, { "epoch": 2.818590704647676, "grad_norm": 0.4977822005748749, "learning_rate": 3.758e-05, "loss": 0.0291, "step": 1880 }, { "epoch": 2.833583208395802, "grad_norm": 0.6113176941871643, "learning_rate": 3.778000000000001e-05, "loss": 0.0224, "step": 1890 }, { "epoch": 2.848575712143928, "grad_norm": 0.6715021729469299, "learning_rate": 3.7980000000000006e-05, "loss": 0.0332, "step": 1900 }, { "epoch": 2.863568215892054, "grad_norm": 0.5996584892272949, "learning_rate": 3.818e-05, "loss": 0.0294, "step": 1910 }, { "epoch": 2.8785607196401797, "grad_norm": 0.43237099051475525, "learning_rate": 3.838e-05, "loss": 0.0256, "step": 1920 }, { "epoch": 2.893553223388306, "grad_norm": 0.3959678113460541, "learning_rate": 3.858e-05, "loss": 0.0278, "step": 1930 }, { "epoch": 2.9085457271364317, "grad_norm": 0.7138267159461975, "learning_rate": 3.878e-05, "loss": 0.0289, "step": 1940 }, { "epoch": 2.923538230884558, "grad_norm": 0.6589713096618652, "learning_rate": 3.898e-05, "loss": 0.0278, "step": 1950 }, { "epoch": 2.9385307346326837, "grad_norm": 0.5856595635414124, "learning_rate": 3.918e-05, "loss": 0.0287, "step": 1960 }, { "epoch": 2.9535232383808094, "grad_norm": 0.35172680020332336, "learning_rate": 3.938e-05, "loss": 0.0318, "step": 1970 }, { "epoch": 2.9685157421289357, "grad_norm": 0.5157833099365234, "learning_rate": 3.958e-05, "loss": 0.035, "step": 1980 }, { "epoch": 2.9835082458770614, "grad_norm": 0.555951714515686, "learning_rate": 3.978e-05, "loss": 0.0272, "step": 1990 }, { "epoch": 2.9985007496251876, "grad_norm": 0.43196767568588257, "learning_rate": 3.998e-05, "loss": 0.0245, "step": 2000 }, { "epoch": 3.0134932533733134, "grad_norm": 0.47135859727859497, "learning_rate": 4.018e-05, "loss": 0.025, "step": 2010 }, { "epoch": 3.028485757121439, "grad_norm": 0.46779969334602356, "learning_rate": 4.038e-05, "loss": 0.0259, "step": 2020 }, { "epoch": 3.0434782608695654, "grad_norm": 0.5308200120925903, "learning_rate": 4.058e-05, "loss": 0.0323, "step": 2030 }, { "epoch": 3.058470764617691, "grad_norm": 0.3834700584411621, "learning_rate": 4.078e-05, "loss": 0.032, "step": 2040 }, { "epoch": 3.073463268365817, "grad_norm": 0.45419391989707947, "learning_rate": 4.0980000000000004e-05, "loss": 0.0258, "step": 2050 }, { "epoch": 3.088455772113943, "grad_norm": 0.48124781250953674, "learning_rate": 4.118e-05, "loss": 0.0279, "step": 2060 }, { "epoch": 3.103448275862069, "grad_norm": 0.5233514308929443, "learning_rate": 4.138e-05, "loss": 0.0282, "step": 2070 }, { "epoch": 3.1184407796101947, "grad_norm": 0.26653027534484863, "learning_rate": 4.1580000000000005e-05, "loss": 0.0308, "step": 2080 }, { "epoch": 3.133433283358321, "grad_norm": 0.6718916296958923, "learning_rate": 4.178e-05, "loss": 0.0239, "step": 2090 }, { "epoch": 3.1484257871064467, "grad_norm": 0.674885630607605, "learning_rate": 4.198e-05, "loss": 0.0223, "step": 2100 }, { "epoch": 3.1634182908545725, "grad_norm": 0.4396958649158478, "learning_rate": 4.2180000000000006e-05, "loss": 0.0288, "step": 2110 }, { "epoch": 3.1784107946026987, "grad_norm": 0.45161083340644836, "learning_rate": 4.2380000000000004e-05, "loss": 0.026, "step": 2120 }, { "epoch": 3.1934032983508245, "grad_norm": 0.49589312076568604, "learning_rate": 4.258e-05, "loss": 0.0294, "step": 2130 }, { "epoch": 3.2083958020989507, "grad_norm": 0.34406957030296326, "learning_rate": 4.278e-05, "loss": 0.0272, "step": 2140 }, { "epoch": 3.2233883058470765, "grad_norm": 0.3701714277267456, "learning_rate": 4.2980000000000005e-05, "loss": 0.0301, "step": 2150 }, { "epoch": 3.2383808095952022, "grad_norm": 0.39216747879981995, "learning_rate": 4.318e-05, "loss": 0.0238, "step": 2160 }, { "epoch": 3.2533733133433285, "grad_norm": 0.5808532238006592, "learning_rate": 4.338e-05, "loss": 0.0261, "step": 2170 }, { "epoch": 3.2683658170914542, "grad_norm": 0.5334476232528687, "learning_rate": 4.3580000000000006e-05, "loss": 0.028, "step": 2180 }, { "epoch": 3.2833583208395805, "grad_norm": 0.4779290556907654, "learning_rate": 4.3780000000000004e-05, "loss": 0.0272, "step": 2190 }, { "epoch": 3.2983508245877062, "grad_norm": 0.7335325479507446, "learning_rate": 4.398e-05, "loss": 0.0299, "step": 2200 }, { "epoch": 3.313343328335832, "grad_norm": 0.5953013896942139, "learning_rate": 4.418000000000001e-05, "loss": 0.021, "step": 2210 }, { "epoch": 3.3283358320839582, "grad_norm": 0.5419385433197021, "learning_rate": 4.438e-05, "loss": 0.0265, "step": 2220 }, { "epoch": 3.343328335832084, "grad_norm": 0.5099077820777893, "learning_rate": 4.458e-05, "loss": 0.0264, "step": 2230 }, { "epoch": 3.3583208395802098, "grad_norm": 0.5408259630203247, "learning_rate": 4.478e-05, "loss": 0.0245, "step": 2240 }, { "epoch": 3.373313343328336, "grad_norm": 0.3827457129955292, "learning_rate": 4.498e-05, "loss": 0.0301, "step": 2250 }, { "epoch": 3.3883058470764618, "grad_norm": 0.506267249584198, "learning_rate": 4.518e-05, "loss": 0.0258, "step": 2260 }, { "epoch": 3.4032983508245875, "grad_norm": 0.4838460385799408, "learning_rate": 4.538e-05, "loss": 0.0245, "step": 2270 }, { "epoch": 3.4182908545727138, "grad_norm": 0.5581598281860352, "learning_rate": 4.558e-05, "loss": 0.0305, "step": 2280 }, { "epoch": 3.4332833583208395, "grad_norm": 0.39448389410972595, "learning_rate": 4.578e-05, "loss": 0.028, "step": 2290 }, { "epoch": 3.4482758620689653, "grad_norm": 0.5152983665466309, "learning_rate": 4.5980000000000004e-05, "loss": 0.027, "step": 2300 }, { "epoch": 3.4632683658170915, "grad_norm": 0.4579412639141083, "learning_rate": 4.618e-05, "loss": 0.0331, "step": 2310 }, { "epoch": 3.4782608695652173, "grad_norm": 0.5351470708847046, "learning_rate": 4.638e-05, "loss": 0.0236, "step": 2320 }, { "epoch": 3.4932533733133435, "grad_norm": 0.6208102703094482, "learning_rate": 4.6580000000000005e-05, "loss": 0.0301, "step": 2330 }, { "epoch": 3.5082458770614693, "grad_norm": 0.7647486925125122, "learning_rate": 4.678e-05, "loss": 0.0316, "step": 2340 }, { "epoch": 3.523238380809595, "grad_norm": 0.4687452018260956, "learning_rate": 4.698e-05, "loss": 0.0256, "step": 2350 }, { "epoch": 3.5382308845577213, "grad_norm": 0.3696313500404358, "learning_rate": 4.718e-05, "loss": 0.0225, "step": 2360 }, { "epoch": 3.553223388305847, "grad_norm": 0.465334415435791, "learning_rate": 4.7380000000000004e-05, "loss": 0.0232, "step": 2370 }, { "epoch": 3.5682158920539733, "grad_norm": 0.43046215176582336, "learning_rate": 4.758e-05, "loss": 0.0248, "step": 2380 }, { "epoch": 3.583208395802099, "grad_norm": 0.4796692728996277, "learning_rate": 4.778e-05, "loss": 0.0276, "step": 2390 }, { "epoch": 3.598200899550225, "grad_norm": 0.42800068855285645, "learning_rate": 4.7980000000000005e-05, "loss": 0.031, "step": 2400 }, { "epoch": 3.613193403298351, "grad_norm": 0.6262326240539551, "learning_rate": 4.818e-05, "loss": 0.0237, "step": 2410 }, { "epoch": 3.628185907046477, "grad_norm": 0.49391791224479675, "learning_rate": 4.838e-05, "loss": 0.0264, "step": 2420 }, { "epoch": 3.6431784107946026, "grad_norm": 0.3888670802116394, "learning_rate": 4.8580000000000006e-05, "loss": 0.0254, "step": 2430 }, { "epoch": 3.658170914542729, "grad_norm": 0.4104800522327423, "learning_rate": 4.8780000000000004e-05, "loss": 0.0289, "step": 2440 }, { "epoch": 3.6731634182908546, "grad_norm": 0.2679605484008789, "learning_rate": 4.898e-05, "loss": 0.0233, "step": 2450 }, { "epoch": 3.6881559220389803, "grad_norm": 0.47446519136428833, "learning_rate": 4.918000000000001e-05, "loss": 0.0263, "step": 2460 }, { "epoch": 3.7031484257871066, "grad_norm": 0.3639093339443207, "learning_rate": 4.9380000000000005e-05, "loss": 0.0255, "step": 2470 }, { "epoch": 3.7181409295352323, "grad_norm": 0.583349883556366, "learning_rate": 4.958e-05, "loss": 0.0261, "step": 2480 }, { "epoch": 3.733133433283358, "grad_norm": 0.4730679988861084, "learning_rate": 4.978e-05, "loss": 0.0244, "step": 2490 }, { "epoch": 3.7481259370314843, "grad_norm": 0.39493924379348755, "learning_rate": 4.9980000000000006e-05, "loss": 0.0249, "step": 2500 }, { "epoch": 3.76311844077961, "grad_norm": 0.35968178510665894, "learning_rate": 5.0180000000000004e-05, "loss": 0.026, "step": 2510 }, { "epoch": 3.778110944527736, "grad_norm": 0.5602163076400757, "learning_rate": 5.038e-05, "loss": 0.0275, "step": 2520 }, { "epoch": 3.793103448275862, "grad_norm": 0.6101713180541992, "learning_rate": 5.058000000000001e-05, "loss": 0.0219, "step": 2530 }, { "epoch": 3.808095952023988, "grad_norm": 0.5835831761360168, "learning_rate": 5.0780000000000005e-05, "loss": 0.0243, "step": 2540 }, { "epoch": 3.823088455772114, "grad_norm": 0.5334992408752441, "learning_rate": 5.098e-05, "loss": 0.0294, "step": 2550 }, { "epoch": 3.83808095952024, "grad_norm": 0.3274346888065338, "learning_rate": 5.118000000000001e-05, "loss": 0.0267, "step": 2560 }, { "epoch": 3.853073463268366, "grad_norm": 0.5520491003990173, "learning_rate": 5.1380000000000006e-05, "loss": 0.0253, "step": 2570 }, { "epoch": 3.868065967016492, "grad_norm": 0.8906029462814331, "learning_rate": 5.1580000000000004e-05, "loss": 0.0325, "step": 2580 }, { "epoch": 3.8830584707646176, "grad_norm": 0.6473612785339355, "learning_rate": 5.178000000000001e-05, "loss": 0.0309, "step": 2590 }, { "epoch": 3.898050974512744, "grad_norm": 0.5719380378723145, "learning_rate": 5.198000000000001e-05, "loss": 0.0303, "step": 2600 }, { "epoch": 3.9130434782608696, "grad_norm": 0.6766910552978516, "learning_rate": 5.2180000000000005e-05, "loss": 0.0271, "step": 2610 }, { "epoch": 3.9280359820089954, "grad_norm": 0.4628795385360718, "learning_rate": 5.238000000000001e-05, "loss": 0.0276, "step": 2620 }, { "epoch": 3.9430284857571216, "grad_norm": 0.7910708785057068, "learning_rate": 5.258000000000001e-05, "loss": 0.0266, "step": 2630 }, { "epoch": 3.9580209895052474, "grad_norm": 0.6692346334457397, "learning_rate": 5.2780000000000006e-05, "loss": 0.0256, "step": 2640 }, { "epoch": 3.973013493253373, "grad_norm": 0.47468385100364685, "learning_rate": 5.2980000000000004e-05, "loss": 0.0312, "step": 2650 }, { "epoch": 3.9880059970014994, "grad_norm": 0.5814277529716492, "learning_rate": 5.318000000000001e-05, "loss": 0.0307, "step": 2660 }, { "epoch": 4.002998500749626, "grad_norm": 0.4091750383377075, "learning_rate": 5.338000000000001e-05, "loss": 0.0276, "step": 2670 }, { "epoch": 4.017991004497751, "grad_norm": 0.6854994297027588, "learning_rate": 5.3580000000000005e-05, "loss": 0.0298, "step": 2680 }, { "epoch": 4.032983508245877, "grad_norm": 0.33739712834358215, "learning_rate": 5.378e-05, "loss": 0.0268, "step": 2690 }, { "epoch": 4.047976011994003, "grad_norm": 0.3424092233181, "learning_rate": 5.3979999999999995e-05, "loss": 0.0275, "step": 2700 }, { "epoch": 4.062968515742129, "grad_norm": 0.4331528842449188, "learning_rate": 5.418e-05, "loss": 0.0298, "step": 2710 }, { "epoch": 4.077961019490255, "grad_norm": 0.7632088661193848, "learning_rate": 5.438e-05, "loss": 0.0276, "step": 2720 }, { "epoch": 4.092953523238381, "grad_norm": 0.5150647759437561, "learning_rate": 5.4579999999999996e-05, "loss": 0.0255, "step": 2730 }, { "epoch": 4.1079460269865065, "grad_norm": 0.41598230600357056, "learning_rate": 5.478e-05, "loss": 0.0255, "step": 2740 }, { "epoch": 4.122938530734633, "grad_norm": 0.4762808382511139, "learning_rate": 5.498e-05, "loss": 0.0221, "step": 2750 }, { "epoch": 4.137931034482759, "grad_norm": 0.7220245003700256, "learning_rate": 5.518e-05, "loss": 0.0225, "step": 2760 }, { "epoch": 4.152923538230884, "grad_norm": 0.4292546510696411, "learning_rate": 5.538e-05, "loss": 0.0245, "step": 2770 }, { "epoch": 4.1679160419790104, "grad_norm": 0.5606144666671753, "learning_rate": 5.558e-05, "loss": 0.0268, "step": 2780 }, { "epoch": 4.182908545727137, "grad_norm": 0.40479356050491333, "learning_rate": 5.578e-05, "loss": 0.0249, "step": 2790 }, { "epoch": 4.197901049475262, "grad_norm": 0.3985999524593353, "learning_rate": 5.5979999999999996e-05, "loss": 0.0278, "step": 2800 }, { "epoch": 4.212893553223388, "grad_norm": 0.3953864872455597, "learning_rate": 5.618e-05, "loss": 0.0253, "step": 2810 }, { "epoch": 4.227886056971514, "grad_norm": 0.5113441944122314, "learning_rate": 5.638e-05, "loss": 0.0239, "step": 2820 }, { "epoch": 4.24287856071964, "grad_norm": 0.24408751726150513, "learning_rate": 5.658e-05, "loss": 0.0254, "step": 2830 }, { "epoch": 4.257871064467766, "grad_norm": 0.4022555947303772, "learning_rate": 5.678e-05, "loss": 0.0266, "step": 2840 }, { "epoch": 4.272863568215892, "grad_norm": 0.28332898020744324, "learning_rate": 5.698e-05, "loss": 0.0225, "step": 2850 }, { "epoch": 4.287856071964018, "grad_norm": 0.7654395699501038, "learning_rate": 5.718e-05, "loss": 0.0255, "step": 2860 }, { "epoch": 4.302848575712144, "grad_norm": 0.4928147494792938, "learning_rate": 5.738e-05, "loss": 0.0216, "step": 2870 }, { "epoch": 4.31784107946027, "grad_norm": 0.44574853777885437, "learning_rate": 5.758e-05, "loss": 0.0242, "step": 2880 }, { "epoch": 4.332833583208396, "grad_norm": 0.5018242597579956, "learning_rate": 5.778e-05, "loss": 0.0238, "step": 2890 }, { "epoch": 4.3478260869565215, "grad_norm": 0.4572640359401703, "learning_rate": 5.7980000000000004e-05, "loss": 0.024, "step": 2900 }, { "epoch": 4.362818590704648, "grad_norm": 0.47001832723617554, "learning_rate": 5.818e-05, "loss": 0.0317, "step": 2910 }, { "epoch": 4.377811094452774, "grad_norm": 0.45874908566474915, "learning_rate": 5.838e-05, "loss": 0.0264, "step": 2920 }, { "epoch": 4.392803598200899, "grad_norm": 0.4370318353176117, "learning_rate": 5.858e-05, "loss": 0.0245, "step": 2930 }, { "epoch": 4.4077961019490255, "grad_norm": 0.31742143630981445, "learning_rate": 5.878e-05, "loss": 0.0237, "step": 2940 }, { "epoch": 4.422788605697152, "grad_norm": 0.284474641084671, "learning_rate": 5.898e-05, "loss": 0.0216, "step": 2950 }, { "epoch": 4.437781109445277, "grad_norm": 0.4091624319553375, "learning_rate": 5.918e-05, "loss": 0.0289, "step": 2960 }, { "epoch": 4.452773613193403, "grad_norm": 0.508106529712677, "learning_rate": 5.9380000000000004e-05, "loss": 0.0265, "step": 2970 }, { "epoch": 4.4677661169415295, "grad_norm": 0.41576313972473145, "learning_rate": 5.958e-05, "loss": 0.0224, "step": 2980 }, { "epoch": 4.482758620689655, "grad_norm": 0.49214938282966614, "learning_rate": 5.978e-05, "loss": 0.0255, "step": 2990 }, { "epoch": 4.497751124437781, "grad_norm": 0.4267028570175171, "learning_rate": 5.9980000000000005e-05, "loss": 0.031, "step": 3000 }, { "epoch": 4.512743628185907, "grad_norm": 0.35308316349983215, "learning_rate": 6.018e-05, "loss": 0.0239, "step": 3010 }, { "epoch": 4.527736131934033, "grad_norm": 0.44482308626174927, "learning_rate": 6.038e-05, "loss": 0.0328, "step": 3020 }, { "epoch": 4.542728635682159, "grad_norm": 0.4486542344093323, "learning_rate": 6.0580000000000006e-05, "loss": 0.0246, "step": 3030 }, { "epoch": 4.557721139430285, "grad_norm": 0.40585947036743164, "learning_rate": 6.0780000000000004e-05, "loss": 0.0247, "step": 3040 }, { "epoch": 4.57271364317841, "grad_norm": 0.3849809169769287, "learning_rate": 6.098e-05, "loss": 0.0251, "step": 3050 }, { "epoch": 4.5877061469265366, "grad_norm": 0.41801077127456665, "learning_rate": 6.118000000000001e-05, "loss": 0.0294, "step": 3060 }, { "epoch": 4.602698650674663, "grad_norm": 0.5797315835952759, "learning_rate": 6.138e-05, "loss": 0.0268, "step": 3070 }, { "epoch": 4.617691154422789, "grad_norm": 0.3132677972316742, "learning_rate": 6.158e-05, "loss": 0.0241, "step": 3080 }, { "epoch": 4.632683658170914, "grad_norm": 0.4240990877151489, "learning_rate": 6.178000000000001e-05, "loss": 0.0252, "step": 3090 }, { "epoch": 4.6476761619190405, "grad_norm": 0.36552441120147705, "learning_rate": 6.198e-05, "loss": 0.0271, "step": 3100 }, { "epoch": 4.662668665667167, "grad_norm": 0.31898045539855957, "learning_rate": 6.218e-05, "loss": 0.0287, "step": 3110 }, { "epoch": 4.677661169415292, "grad_norm": 0.4124031960964203, "learning_rate": 6.238000000000001e-05, "loss": 0.0277, "step": 3120 }, { "epoch": 4.692653673163418, "grad_norm": 0.250202476978302, "learning_rate": 6.258e-05, "loss": 0.0295, "step": 3130 }, { "epoch": 4.7076461769115445, "grad_norm": 0.35431143641471863, "learning_rate": 6.278e-05, "loss": 0.0253, "step": 3140 }, { "epoch": 4.72263868065967, "grad_norm": 0.7163915634155273, "learning_rate": 6.298000000000001e-05, "loss": 0.0293, "step": 3150 }, { "epoch": 4.737631184407796, "grad_norm": 0.43702057003974915, "learning_rate": 6.318e-05, "loss": 0.0261, "step": 3160 }, { "epoch": 4.752623688155922, "grad_norm": 0.3596749007701874, "learning_rate": 6.338e-05, "loss": 0.0258, "step": 3170 }, { "epoch": 4.767616191904048, "grad_norm": 0.6289761662483215, "learning_rate": 6.358000000000001e-05, "loss": 0.0238, "step": 3180 }, { "epoch": 4.782608695652174, "grad_norm": 0.30265724658966064, "learning_rate": 6.378e-05, "loss": 0.0266, "step": 3190 }, { "epoch": 4.7976011994003, "grad_norm": 0.7281140685081482, "learning_rate": 6.398000000000001e-05, "loss": 0.0332, "step": 3200 }, { "epoch": 4.812593703148426, "grad_norm": 0.4240742325782776, "learning_rate": 6.418000000000001e-05, "loss": 0.0286, "step": 3210 }, { "epoch": 4.827586206896552, "grad_norm": 0.42340219020843506, "learning_rate": 6.438e-05, "loss": 0.0258, "step": 3220 }, { "epoch": 4.842578710644678, "grad_norm": 0.43451204895973206, "learning_rate": 6.458000000000001e-05, "loss": 0.025, "step": 3230 }, { "epoch": 4.857571214392804, "grad_norm": 0.6437982320785522, "learning_rate": 6.478000000000001e-05, "loss": 0.0207, "step": 3240 }, { "epoch": 4.872563718140929, "grad_norm": 0.391122967004776, "learning_rate": 6.498e-05, "loss": 0.0239, "step": 3250 }, { "epoch": 4.887556221889056, "grad_norm": 0.42035433650016785, "learning_rate": 6.518000000000001e-05, "loss": 0.0309, "step": 3260 }, { "epoch": 4.902548725637182, "grad_norm": 0.4159294068813324, "learning_rate": 6.538000000000001e-05, "loss": 0.0265, "step": 3270 }, { "epoch": 4.917541229385307, "grad_norm": 0.47672730684280396, "learning_rate": 6.558e-05, "loss": 0.0241, "step": 3280 }, { "epoch": 4.932533733133433, "grad_norm": 0.281084805727005, "learning_rate": 6.578000000000001e-05, "loss": 0.0262, "step": 3290 }, { "epoch": 4.94752623688156, "grad_norm": 0.5118176937103271, "learning_rate": 6.598e-05, "loss": 0.0257, "step": 3300 }, { "epoch": 4.962518740629685, "grad_norm": 0.5940420627593994, "learning_rate": 6.618e-05, "loss": 0.0267, "step": 3310 }, { "epoch": 4.977511244377811, "grad_norm": 0.5075573921203613, "learning_rate": 6.638e-05, "loss": 0.0253, "step": 3320 }, { "epoch": 4.992503748125937, "grad_norm": 0.408496230840683, "learning_rate": 6.658e-05, "loss": 0.0251, "step": 3330 }, { "epoch": 5.007496251874063, "grad_norm": 0.4248218834400177, "learning_rate": 6.678e-05, "loss": 0.0257, "step": 3340 }, { "epoch": 5.022488755622189, "grad_norm": 0.39737120270729065, "learning_rate": 6.698e-05, "loss": 0.0304, "step": 3350 }, { "epoch": 5.037481259370315, "grad_norm": 0.45250195264816284, "learning_rate": 6.718e-05, "loss": 0.0218, "step": 3360 }, { "epoch": 5.05247376311844, "grad_norm": 0.3925962448120117, "learning_rate": 6.738e-05, "loss": 0.0299, "step": 3370 }, { "epoch": 5.067466266866567, "grad_norm": 0.3701769709587097, "learning_rate": 6.758e-05, "loss": 0.0288, "step": 3380 }, { "epoch": 5.082458770614693, "grad_norm": 0.5102025866508484, "learning_rate": 6.778e-05, "loss": 0.0267, "step": 3390 }, { "epoch": 5.097451274362818, "grad_norm": 0.37801459431648254, "learning_rate": 6.798e-05, "loss": 0.0319, "step": 3400 }, { "epoch": 5.112443778110944, "grad_norm": 0.4114863872528076, "learning_rate": 6.818e-05, "loss": 0.0259, "step": 3410 }, { "epoch": 5.127436281859071, "grad_norm": 0.30654609203338623, "learning_rate": 6.838e-05, "loss": 0.0259, "step": 3420 }, { "epoch": 5.142428785607196, "grad_norm": 0.367466002702713, "learning_rate": 6.858e-05, "loss": 0.0221, "step": 3430 }, { "epoch": 5.157421289355322, "grad_norm": 0.4129818081855774, "learning_rate": 6.878e-05, "loss": 0.0229, "step": 3440 }, { "epoch": 5.172413793103448, "grad_norm": 0.34470751881599426, "learning_rate": 6.898e-05, "loss": 0.023, "step": 3450 }, { "epoch": 5.187406296851575, "grad_norm": 0.483713299036026, "learning_rate": 6.918e-05, "loss": 0.0248, "step": 3460 }, { "epoch": 5.2023988005997, "grad_norm": 0.21136535704135895, "learning_rate": 6.938e-05, "loss": 0.0214, "step": 3470 }, { "epoch": 5.217391304347826, "grad_norm": 0.4309028387069702, "learning_rate": 6.958e-05, "loss": 0.0282, "step": 3480 }, { "epoch": 5.232383808095952, "grad_norm": 0.3445214331150055, "learning_rate": 6.978e-05, "loss": 0.0232, "step": 3490 }, { "epoch": 5.247376311844078, "grad_norm": 0.656903862953186, "learning_rate": 6.998e-05, "loss": 0.0303, "step": 3500 }, { "epoch": 5.262368815592204, "grad_norm": 0.4701072573661804, "learning_rate": 7.018e-05, "loss": 0.027, "step": 3510 }, { "epoch": 5.27736131934033, "grad_norm": 0.481931209564209, "learning_rate": 7.038e-05, "loss": 0.023, "step": 3520 }, { "epoch": 5.2923538230884555, "grad_norm": 0.4491264820098877, "learning_rate": 7.058e-05, "loss": 0.0274, "step": 3530 }, { "epoch": 5.307346326836582, "grad_norm": 0.5957882404327393, "learning_rate": 7.078e-05, "loss": 0.032, "step": 3540 }, { "epoch": 5.322338830584708, "grad_norm": 0.3993901312351227, "learning_rate": 7.098e-05, "loss": 0.0315, "step": 3550 }, { "epoch": 5.337331334332833, "grad_norm": 0.49840158224105835, "learning_rate": 7.118e-05, "loss": 0.0267, "step": 3560 }, { "epoch": 5.3523238380809595, "grad_norm": 0.2678443491458893, "learning_rate": 7.138e-05, "loss": 0.0286, "step": 3570 }, { "epoch": 5.367316341829086, "grad_norm": 0.7395908236503601, "learning_rate": 7.158e-05, "loss": 0.028, "step": 3580 }, { "epoch": 5.382308845577211, "grad_norm": 0.4880072772502899, "learning_rate": 7.178000000000001e-05, "loss": 0.0235, "step": 3590 }, { "epoch": 5.397301349325337, "grad_norm": 0.30752211809158325, "learning_rate": 7.198e-05, "loss": 0.0272, "step": 3600 }, { "epoch": 5.4122938530734634, "grad_norm": 0.2086288183927536, "learning_rate": 7.218e-05, "loss": 0.0226, "step": 3610 }, { "epoch": 5.42728635682159, "grad_norm": 0.2572075426578522, "learning_rate": 7.238000000000001e-05, "loss": 0.0249, "step": 3620 }, { "epoch": 5.442278860569715, "grad_norm": 0.5395610332489014, "learning_rate": 7.258e-05, "loss": 0.0248, "step": 3630 }, { "epoch": 5.457271364317841, "grad_norm": 0.5399361252784729, "learning_rate": 7.278e-05, "loss": 0.0299, "step": 3640 }, { "epoch": 5.472263868065967, "grad_norm": 0.2959550619125366, "learning_rate": 7.298000000000001e-05, "loss": 0.0258, "step": 3650 }, { "epoch": 5.487256371814093, "grad_norm": 0.2834092676639557, "learning_rate": 7.318e-05, "loss": 0.0206, "step": 3660 }, { "epoch": 5.502248875562219, "grad_norm": 0.5493492484092712, "learning_rate": 7.338e-05, "loss": 0.025, "step": 3670 }, { "epoch": 5.517241379310345, "grad_norm": 0.5069541335105896, "learning_rate": 7.358000000000001e-05, "loss": 0.0277, "step": 3680 }, { "epoch": 5.5322338830584705, "grad_norm": 0.49362102150917053, "learning_rate": 7.378e-05, "loss": 0.0289, "step": 3690 }, { "epoch": 5.547226386806597, "grad_norm": 0.658973753452301, "learning_rate": 7.398e-05, "loss": 0.0294, "step": 3700 }, { "epoch": 5.562218890554723, "grad_norm": 0.3355591595172882, "learning_rate": 7.418000000000001e-05, "loss": 0.0348, "step": 3710 }, { "epoch": 5.577211394302848, "grad_norm": 0.6101976037025452, "learning_rate": 7.438e-05, "loss": 0.031, "step": 3720 }, { "epoch": 5.5922038980509745, "grad_norm": 0.46887868642807007, "learning_rate": 7.458000000000001e-05, "loss": 0.0203, "step": 3730 }, { "epoch": 5.607196401799101, "grad_norm": 0.43108808994293213, "learning_rate": 7.478e-05, "loss": 0.0269, "step": 3740 }, { "epoch": 5.622188905547226, "grad_norm": 0.4260016679763794, "learning_rate": 7.498e-05, "loss": 0.0285, "step": 3750 }, { "epoch": 5.637181409295352, "grad_norm": 0.6859562993049622, "learning_rate": 7.518000000000001e-05, "loss": 0.0249, "step": 3760 }, { "epoch": 5.6521739130434785, "grad_norm": 0.7248268127441406, "learning_rate": 7.538e-05, "loss": 0.033, "step": 3770 }, { "epoch": 5.667166416791604, "grad_norm": 0.5469388365745544, "learning_rate": 7.558e-05, "loss": 0.0317, "step": 3780 }, { "epoch": 5.68215892053973, "grad_norm": 0.4509018361568451, "learning_rate": 7.578000000000001e-05, "loss": 0.0256, "step": 3790 }, { "epoch": 5.697151424287856, "grad_norm": 0.713411808013916, "learning_rate": 7.598e-05, "loss": 0.0307, "step": 3800 }, { "epoch": 5.712143928035982, "grad_norm": 0.5839192271232605, "learning_rate": 7.618e-05, "loss": 0.0266, "step": 3810 }, { "epoch": 5.727136431784108, "grad_norm": 0.4234388470649719, "learning_rate": 7.638000000000001e-05, "loss": 0.0312, "step": 3820 }, { "epoch": 5.742128935532234, "grad_norm": 0.27885860204696655, "learning_rate": 7.658e-05, "loss": 0.0274, "step": 3830 }, { "epoch": 5.757121439280359, "grad_norm": 0.4587342441082001, "learning_rate": 7.678000000000001e-05, "loss": 0.0267, "step": 3840 }, { "epoch": 5.772113943028486, "grad_norm": 0.5152937173843384, "learning_rate": 7.698000000000001e-05, "loss": 0.0263, "step": 3850 }, { "epoch": 5.787106446776612, "grad_norm": 0.3946644067764282, "learning_rate": 7.718e-05, "loss": 0.0288, "step": 3860 }, { "epoch": 5.802098950524738, "grad_norm": 0.6451663374900818, "learning_rate": 7.738000000000001e-05, "loss": 0.0263, "step": 3870 }, { "epoch": 5.817091454272863, "grad_norm": 0.2809458374977112, "learning_rate": 7.758000000000001e-05, "loss": 0.0243, "step": 3880 }, { "epoch": 5.8320839580209896, "grad_norm": 0.3646807372570038, "learning_rate": 7.778e-05, "loss": 0.0248, "step": 3890 }, { "epoch": 5.847076461769116, "grad_norm": 0.33319252729415894, "learning_rate": 7.798000000000001e-05, "loss": 0.0289, "step": 3900 }, { "epoch": 5.862068965517241, "grad_norm": 0.467597097158432, "learning_rate": 7.818000000000001e-05, "loss": 0.0275, "step": 3910 }, { "epoch": 5.877061469265367, "grad_norm": 0.47090181708335876, "learning_rate": 7.838e-05, "loss": 0.0262, "step": 3920 }, { "epoch": 5.8920539730134935, "grad_norm": 0.2935847043991089, "learning_rate": 7.858000000000001e-05, "loss": 0.0255, "step": 3930 }, { "epoch": 5.907046476761619, "grad_norm": 0.8062829375267029, "learning_rate": 7.878e-05, "loss": 0.0291, "step": 3940 }, { "epoch": 5.922038980509745, "grad_norm": 0.4031580090522766, "learning_rate": 7.897999999999999e-05, "loss": 0.0286, "step": 3950 }, { "epoch": 5.937031484257871, "grad_norm": 0.28734612464904785, "learning_rate": 7.918e-05, "loss": 0.0249, "step": 3960 }, { "epoch": 5.952023988005997, "grad_norm": 0.43835529685020447, "learning_rate": 7.938e-05, "loss": 0.0291, "step": 3970 }, { "epoch": 5.967016491754123, "grad_norm": 0.456620991230011, "learning_rate": 7.958e-05, "loss": 0.0287, "step": 3980 }, { "epoch": 5.982008995502249, "grad_norm": 0.39337584376335144, "learning_rate": 7.978e-05, "loss": 0.0281, "step": 3990 }, { "epoch": 5.997001499250375, "grad_norm": 0.4338148832321167, "learning_rate": 7.998e-05, "loss": 0.0272, "step": 4000 }, { "epoch": 6.011994002998501, "grad_norm": 0.3682861626148224, "learning_rate": 8.018e-05, "loss": 0.023, "step": 4010 }, { "epoch": 6.026986506746627, "grad_norm": 0.4797731041908264, "learning_rate": 8.038e-05, "loss": 0.0241, "step": 4020 }, { "epoch": 6.041979010494753, "grad_norm": 0.26338934898376465, "learning_rate": 8.058e-05, "loss": 0.0291, "step": 4030 }, { "epoch": 6.056971514242878, "grad_norm": 0.4103999137878418, "learning_rate": 8.078e-05, "loss": 0.0253, "step": 4040 }, { "epoch": 6.071964017991005, "grad_norm": 0.42007970809936523, "learning_rate": 8.098e-05, "loss": 0.0238, "step": 4050 }, { "epoch": 6.086956521739131, "grad_norm": 0.2664199471473694, "learning_rate": 8.118e-05, "loss": 0.0276, "step": 4060 }, { "epoch": 6.101949025487256, "grad_norm": 0.49657756090164185, "learning_rate": 8.138e-05, "loss": 0.0287, "step": 4070 }, { "epoch": 6.116941529235382, "grad_norm": 0.34607890248298645, "learning_rate": 8.158e-05, "loss": 0.0228, "step": 4080 }, { "epoch": 6.131934032983509, "grad_norm": 0.6504865884780884, "learning_rate": 8.178e-05, "loss": 0.0264, "step": 4090 }, { "epoch": 6.146926536731634, "grad_norm": 0.394433856010437, "learning_rate": 8.198e-05, "loss": 0.0279, "step": 4100 }, { "epoch": 6.16191904047976, "grad_norm": 0.28207188844680786, "learning_rate": 8.218e-05, "loss": 0.0245, "step": 4110 }, { "epoch": 6.176911544227886, "grad_norm": 0.3311763405799866, "learning_rate": 8.238000000000001e-05, "loss": 0.024, "step": 4120 }, { "epoch": 6.191904047976012, "grad_norm": 0.27296751737594604, "learning_rate": 8.258e-05, "loss": 0.0237, "step": 4130 }, { "epoch": 6.206896551724138, "grad_norm": 0.4681967794895172, "learning_rate": 8.278e-05, "loss": 0.0253, "step": 4140 }, { "epoch": 6.221889055472264, "grad_norm": 0.4561450779438019, "learning_rate": 8.298000000000001e-05, "loss": 0.0228, "step": 4150 }, { "epoch": 6.2368815592203894, "grad_norm": 0.3745529353618622, "learning_rate": 8.318e-05, "loss": 0.0212, "step": 4160 }, { "epoch": 6.251874062968516, "grad_norm": 0.6292225122451782, "learning_rate": 8.338e-05, "loss": 0.0283, "step": 4170 }, { "epoch": 6.266866566716642, "grad_norm": 0.4040026068687439, "learning_rate": 8.358e-05, "loss": 0.0279, "step": 4180 }, { "epoch": 6.281859070464767, "grad_norm": 0.39735427498817444, "learning_rate": 8.378e-05, "loss": 0.0257, "step": 4190 }, { "epoch": 6.296851574212893, "grad_norm": 0.38071340322494507, "learning_rate": 8.398e-05, "loss": 0.0227, "step": 4200 }, { "epoch": 6.31184407796102, "grad_norm": 0.533391535282135, "learning_rate": 8.418e-05, "loss": 0.0261, "step": 4210 }, { "epoch": 6.326836581709145, "grad_norm": 0.3036806285381317, "learning_rate": 8.438e-05, "loss": 0.0254, "step": 4220 }, { "epoch": 6.341829085457271, "grad_norm": 0.372399240732193, "learning_rate": 8.458e-05, "loss": 0.0266, "step": 4230 }, { "epoch": 6.356821589205397, "grad_norm": 0.4678504467010498, "learning_rate": 8.478e-05, "loss": 0.0296, "step": 4240 }, { "epoch": 6.371814092953524, "grad_norm": 0.306121826171875, "learning_rate": 8.498e-05, "loss": 0.027, "step": 4250 }, { "epoch": 6.386806596701649, "grad_norm": 0.396908700466156, "learning_rate": 8.518000000000001e-05, "loss": 0.0303, "step": 4260 }, { "epoch": 6.401799100449775, "grad_norm": 0.3144121766090393, "learning_rate": 8.538e-05, "loss": 0.0237, "step": 4270 }, { "epoch": 6.416791604197901, "grad_norm": 0.486493319272995, "learning_rate": 8.558e-05, "loss": 0.0231, "step": 4280 }, { "epoch": 6.431784107946027, "grad_norm": 0.4401141405105591, "learning_rate": 8.578000000000001e-05, "loss": 0.0223, "step": 4290 }, { "epoch": 6.446776611694153, "grad_norm": 0.3221685290336609, "learning_rate": 8.598e-05, "loss": 0.0253, "step": 4300 }, { "epoch": 6.461769115442279, "grad_norm": 0.3544839024543762, "learning_rate": 8.618e-05, "loss": 0.0241, "step": 4310 }, { "epoch": 6.4767616191904045, "grad_norm": 0.44466298818588257, "learning_rate": 8.638000000000001e-05, "loss": 0.0281, "step": 4320 }, { "epoch": 6.491754122938531, "grad_norm": 0.284768283367157, "learning_rate": 8.658e-05, "loss": 0.0236, "step": 4330 }, { "epoch": 6.506746626686657, "grad_norm": 0.44669657945632935, "learning_rate": 8.678e-05, "loss": 0.0239, "step": 4340 }, { "epoch": 6.521739130434782, "grad_norm": 0.5169503092765808, "learning_rate": 8.698000000000001e-05, "loss": 0.0234, "step": 4350 }, { "epoch": 6.5367316341829085, "grad_norm": 0.5080941915512085, "learning_rate": 8.718e-05, "loss": 0.0218, "step": 4360 }, { "epoch": 6.551724137931035, "grad_norm": 0.2500442862510681, "learning_rate": 8.738000000000001e-05, "loss": 0.0258, "step": 4370 }, { "epoch": 6.566716641679161, "grad_norm": 0.5339643955230713, "learning_rate": 8.758000000000001e-05, "loss": 0.0276, "step": 4380 }, { "epoch": 6.581709145427286, "grad_norm": 0.40371423959732056, "learning_rate": 8.778e-05, "loss": 0.0277, "step": 4390 }, { "epoch": 6.5967016491754125, "grad_norm": 0.3140086829662323, "learning_rate": 8.798000000000001e-05, "loss": 0.0323, "step": 4400 }, { "epoch": 6.611694152923539, "grad_norm": 0.4289420545101166, "learning_rate": 8.818000000000001e-05, "loss": 0.0284, "step": 4410 }, { "epoch": 6.626686656671664, "grad_norm": 0.33737874031066895, "learning_rate": 8.838e-05, "loss": 0.0247, "step": 4420 }, { "epoch": 6.64167916041979, "grad_norm": 0.3467290699481964, "learning_rate": 8.858000000000001e-05, "loss": 0.029, "step": 4430 }, { "epoch": 6.6566716641679164, "grad_norm": 0.35096094012260437, "learning_rate": 8.878000000000001e-05, "loss": 0.0294, "step": 4440 }, { "epoch": 6.671664167916042, "grad_norm": 0.4096924960613251, "learning_rate": 8.898e-05, "loss": 0.0258, "step": 4450 }, { "epoch": 6.686656671664168, "grad_norm": 0.5629929900169373, "learning_rate": 8.918000000000001e-05, "loss": 0.0302, "step": 4460 }, { "epoch": 6.701649175412294, "grad_norm": 0.3207022249698639, "learning_rate": 8.938e-05, "loss": 0.0264, "step": 4470 }, { "epoch": 6.7166416791604195, "grad_norm": 0.20799192786216736, "learning_rate": 8.958e-05, "loss": 0.0272, "step": 4480 }, { "epoch": 6.731634182908546, "grad_norm": 0.6238973736763, "learning_rate": 8.978000000000001e-05, "loss": 0.0293, "step": 4490 }, { "epoch": 6.746626686656672, "grad_norm": 0.683045506477356, "learning_rate": 8.998e-05, "loss": 0.0314, "step": 4500 }, { "epoch": 6.761619190404797, "grad_norm": 0.38669443130493164, "learning_rate": 9.018000000000001e-05, "loss": 0.032, "step": 4510 }, { "epoch": 6.7766116941529235, "grad_norm": 0.6826093792915344, "learning_rate": 9.038000000000001e-05, "loss": 0.0286, "step": 4520 }, { "epoch": 6.79160419790105, "grad_norm": 0.47045814990997314, "learning_rate": 9.058e-05, "loss": 0.0268, "step": 4530 }, { "epoch": 6.806596701649175, "grad_norm": 0.3681105971336365, "learning_rate": 9.078000000000001e-05, "loss": 0.0228, "step": 4540 }, { "epoch": 6.821589205397301, "grad_norm": 0.5198001861572266, "learning_rate": 9.098000000000001e-05, "loss": 0.0249, "step": 4550 }, { "epoch": 6.8365817091454275, "grad_norm": 0.37484729290008545, "learning_rate": 9.118e-05, "loss": 0.0219, "step": 4560 }, { "epoch": 6.851574212893553, "grad_norm": 0.3984006345272064, "learning_rate": 9.138e-05, "loss": 0.0255, "step": 4570 }, { "epoch": 6.866566716641679, "grad_norm": 0.2220279425382614, "learning_rate": 9.158e-05, "loss": 0.0249, "step": 4580 }, { "epoch": 6.881559220389805, "grad_norm": 0.3429665267467499, "learning_rate": 9.178e-05, "loss": 0.0259, "step": 4590 }, { "epoch": 6.896551724137931, "grad_norm": 0.28920507431030273, "learning_rate": 9.198e-05, "loss": 0.0246, "step": 4600 }, { "epoch": 6.911544227886057, "grad_norm": 0.3477950096130371, "learning_rate": 9.218e-05, "loss": 0.0291, "step": 4610 }, { "epoch": 6.926536731634183, "grad_norm": 0.24320971965789795, "learning_rate": 9.238e-05, "loss": 0.0315, "step": 4620 }, { "epoch": 6.941529235382308, "grad_norm": 0.21767182648181915, "learning_rate": 9.258e-05, "loss": 0.0254, "step": 4630 }, { "epoch": 6.956521739130435, "grad_norm": 0.758289098739624, "learning_rate": 9.278e-05, "loss": 0.0281, "step": 4640 }, { "epoch": 6.971514242878561, "grad_norm": 0.3680587410926819, "learning_rate": 9.298e-05, "loss": 0.029, "step": 4650 }, { "epoch": 6.986506746626687, "grad_norm": 0.661546528339386, "learning_rate": 9.318e-05, "loss": 0.035, "step": 4660 }, { "epoch": 7.001499250374812, "grad_norm": 0.3998330533504486, "learning_rate": 9.338e-05, "loss": 0.029, "step": 4670 }, { "epoch": 7.016491754122939, "grad_norm": 0.5103744864463806, "learning_rate": 9.358e-05, "loss": 0.0254, "step": 4680 }, { "epoch": 7.031484257871065, "grad_norm": 0.4459877610206604, "learning_rate": 9.378e-05, "loss": 0.0301, "step": 4690 }, { "epoch": 7.04647676161919, "grad_norm": 0.41950884461402893, "learning_rate": 9.398e-05, "loss": 0.0321, "step": 4700 }, { "epoch": 7.061469265367316, "grad_norm": 0.6622359156608582, "learning_rate": 9.418e-05, "loss": 0.0261, "step": 4710 }, { "epoch": 7.076461769115443, "grad_norm": 0.5790828466415405, "learning_rate": 9.438e-05, "loss": 0.0301, "step": 4720 }, { "epoch": 7.091454272863568, "grad_norm": 0.5139708518981934, "learning_rate": 9.458e-05, "loss": 0.0279, "step": 4730 }, { "epoch": 7.106446776611694, "grad_norm": 0.5255036950111389, "learning_rate": 9.478e-05, "loss": 0.0253, "step": 4740 }, { "epoch": 7.12143928035982, "grad_norm": 0.25787708163261414, "learning_rate": 9.498e-05, "loss": 0.0237, "step": 4750 }, { "epoch": 7.136431784107946, "grad_norm": 0.36950045824050903, "learning_rate": 9.518000000000001e-05, "loss": 0.0277, "step": 4760 }, { "epoch": 7.151424287856072, "grad_norm": 0.49220797419548035, "learning_rate": 9.538e-05, "loss": 0.0281, "step": 4770 }, { "epoch": 7.166416791604198, "grad_norm": 0.5166265368461609, "learning_rate": 9.558e-05, "loss": 0.0324, "step": 4780 }, { "epoch": 7.181409295352323, "grad_norm": 0.3699895739555359, "learning_rate": 9.578000000000001e-05, "loss": 0.0269, "step": 4790 }, { "epoch": 7.19640179910045, "grad_norm": 0.3544500768184662, "learning_rate": 9.598e-05, "loss": 0.0286, "step": 4800 }, { "epoch": 7.211394302848576, "grad_norm": 0.43215808272361755, "learning_rate": 9.618e-05, "loss": 0.026, "step": 4810 }, { "epoch": 7.226386806596702, "grad_norm": 0.3366709053516388, "learning_rate": 9.638000000000001e-05, "loss": 0.0255, "step": 4820 }, { "epoch": 7.241379310344827, "grad_norm": 0.5390172600746155, "learning_rate": 9.658e-05, "loss": 0.0269, "step": 4830 }, { "epoch": 7.256371814092954, "grad_norm": 0.42898935079574585, "learning_rate": 9.678e-05, "loss": 0.0399, "step": 4840 }, { "epoch": 7.27136431784108, "grad_norm": 0.29673340916633606, "learning_rate": 9.698000000000001e-05, "loss": 0.0275, "step": 4850 }, { "epoch": 7.286356821589205, "grad_norm": 0.3137001693248749, "learning_rate": 9.718e-05, "loss": 0.0265, "step": 4860 }, { "epoch": 7.301349325337331, "grad_norm": 0.38921621441841125, "learning_rate": 9.738e-05, "loss": 0.0263, "step": 4870 }, { "epoch": 7.316341829085458, "grad_norm": 0.40818408131599426, "learning_rate": 9.758000000000001e-05, "loss": 0.0275, "step": 4880 }, { "epoch": 7.331334332833583, "grad_norm": 0.5272011160850525, "learning_rate": 9.778e-05, "loss": 0.0254, "step": 4890 }, { "epoch": 7.346326836581709, "grad_norm": 0.3612275719642639, "learning_rate": 9.798000000000001e-05, "loss": 0.0304, "step": 4900 }, { "epoch": 7.361319340329835, "grad_norm": 0.3850025534629822, "learning_rate": 9.818000000000001e-05, "loss": 0.029, "step": 4910 }, { "epoch": 7.376311844077961, "grad_norm": 0.28618761897087097, "learning_rate": 9.838e-05, "loss": 0.0217, "step": 4920 }, { "epoch": 7.391304347826087, "grad_norm": 0.4691023528575897, "learning_rate": 9.858000000000001e-05, "loss": 0.0266, "step": 4930 }, { "epoch": 7.406296851574213, "grad_norm": 0.26261433959007263, "learning_rate": 9.878e-05, "loss": 0.0245, "step": 4940 }, { "epoch": 7.4212893553223385, "grad_norm": 0.36436763405799866, "learning_rate": 9.898e-05, "loss": 0.0285, "step": 4950 }, { "epoch": 7.436281859070465, "grad_norm": 0.4470043182373047, "learning_rate": 9.918000000000001e-05, "loss": 0.0299, "step": 4960 }, { "epoch": 7.451274362818591, "grad_norm": 0.5527220368385315, "learning_rate": 9.938e-05, "loss": 0.0329, "step": 4970 }, { "epoch": 7.466266866566716, "grad_norm": 0.36053910851478577, "learning_rate": 9.958e-05, "loss": 0.031, "step": 4980 }, { "epoch": 7.4812593703148424, "grad_norm": 0.4211731553077698, "learning_rate": 9.978000000000001e-05, "loss": 0.0257, "step": 4990 }, { "epoch": 7.496251874062969, "grad_norm": 0.36269623041152954, "learning_rate": 9.998e-05, "loss": 0.0265, "step": 5000 }, { "epoch": 7.511244377811094, "grad_norm": 0.32534855604171753, "learning_rate": 9.999999778549045e-05, "loss": 0.0287, "step": 5010 }, { "epoch": 7.52623688155922, "grad_norm": 0.2773454189300537, "learning_rate": 9.999999013039593e-05, "loss": 0.03, "step": 5020 }, { "epoch": 7.541229385307346, "grad_norm": 0.27131906151771545, "learning_rate": 9.999997700737766e-05, "loss": 0.0222, "step": 5030 }, { "epoch": 7.556221889055473, "grad_norm": 0.31759756803512573, "learning_rate": 9.999995841643709e-05, "loss": 0.0241, "step": 5040 }, { "epoch": 7.571214392803598, "grad_norm": 0.31476277112960815, "learning_rate": 9.999993435757623e-05, "loss": 0.0227, "step": 5050 }, { "epoch": 7.586206896551724, "grad_norm": 0.38768646121025085, "learning_rate": 9.999990483079773e-05, "loss": 0.0261, "step": 5060 }, { "epoch": 7.60119940029985, "grad_norm": 0.43208909034729004, "learning_rate": 9.999986983610481e-05, "loss": 0.0215, "step": 5070 }, { "epoch": 7.616191904047976, "grad_norm": 0.49132946133613586, "learning_rate": 9.99998293735013e-05, "loss": 0.026, "step": 5080 }, { "epoch": 7.631184407796102, "grad_norm": 0.6390396952629089, "learning_rate": 9.999978344299161e-05, "loss": 0.0306, "step": 5090 }, { "epoch": 7.646176911544228, "grad_norm": 0.40413936972618103, "learning_rate": 9.99997320445808e-05, "loss": 0.0276, "step": 5100 }, { "epoch": 7.6611694152923535, "grad_norm": 0.6139463186264038, "learning_rate": 9.999967517827444e-05, "loss": 0.0276, "step": 5110 }, { "epoch": 7.67616191904048, "grad_norm": 0.46102285385131836, "learning_rate": 9.999961284407879e-05, "loss": 0.037, "step": 5120 }, { "epoch": 7.691154422788606, "grad_norm": 0.4320075809955597, "learning_rate": 9.999954504200067e-05, "loss": 0.0284, "step": 5130 }, { "epoch": 7.706146926536731, "grad_norm": 0.5356706380844116, "learning_rate": 9.999947177204744e-05, "loss": 0.0256, "step": 5140 }, { "epoch": 7.7211394302848575, "grad_norm": 0.29085052013397217, "learning_rate": 9.999939303422718e-05, "loss": 0.0268, "step": 5150 }, { "epoch": 7.736131934032984, "grad_norm": 0.4449183940887451, "learning_rate": 9.999930882854847e-05, "loss": 0.0292, "step": 5160 }, { "epoch": 7.75112443778111, "grad_norm": 0.45596733689308167, "learning_rate": 9.999921915502051e-05, "loss": 0.0213, "step": 5170 }, { "epoch": 7.766116941529235, "grad_norm": 0.3371008038520813, "learning_rate": 9.99991240136531e-05, "loss": 0.0261, "step": 5180 }, { "epoch": 7.7811094452773615, "grad_norm": 0.4112216532230377, "learning_rate": 9.999902340445668e-05, "loss": 0.0348, "step": 5190 }, { "epoch": 7.796101949025488, "grad_norm": 0.5604249238967896, "learning_rate": 9.999891732744224e-05, "loss": 0.0281, "step": 5200 }, { "epoch": 7.811094452773613, "grad_norm": 0.5985254049301147, "learning_rate": 9.999880578262135e-05, "loss": 0.0278, "step": 5210 }, { "epoch": 7.826086956521739, "grad_norm": 0.4814131259918213, "learning_rate": 9.999868877000624e-05, "loss": 0.0269, "step": 5220 }, { "epoch": 7.8410794602698655, "grad_norm": 0.2527081370353699, "learning_rate": 9.99985662896097e-05, "loss": 0.0242, "step": 5230 }, { "epoch": 7.856071964017991, "grad_norm": 0.3255614936351776, "learning_rate": 9.999843834144513e-05, "loss": 0.0219, "step": 5240 }, { "epoch": 7.871064467766117, "grad_norm": 0.29755499958992004, "learning_rate": 9.99983049255265e-05, "loss": 0.0231, "step": 5250 }, { "epoch": 7.886056971514243, "grad_norm": 0.39504027366638184, "learning_rate": 9.999816604186843e-05, "loss": 0.0269, "step": 5260 }, { "epoch": 7.901049475262369, "grad_norm": 0.3738665282726288, "learning_rate": 9.999802169048609e-05, "loss": 0.0228, "step": 5270 }, { "epoch": 7.916041979010495, "grad_norm": 0.24702244997024536, "learning_rate": 9.999787187139527e-05, "loss": 0.0252, "step": 5280 }, { "epoch": 7.931034482758621, "grad_norm": 0.5282250642776489, "learning_rate": 9.999771658461234e-05, "loss": 0.0232, "step": 5290 }, { "epoch": 7.946026986506746, "grad_norm": 0.5407117009162903, "learning_rate": 9.999755583015431e-05, "loss": 0.0258, "step": 5300 }, { "epoch": 7.9610194902548725, "grad_norm": 0.40336930751800537, "learning_rate": 9.999738960803874e-05, "loss": 0.0266, "step": 5310 }, { "epoch": 7.976011994002999, "grad_norm": 0.2538587152957916, "learning_rate": 9.99972179182838e-05, "loss": 0.0233, "step": 5320 }, { "epoch": 7.991004497751124, "grad_norm": 0.46852368116378784, "learning_rate": 9.99970407609083e-05, "loss": 0.0221, "step": 5330 }, { "epoch": 8.005997001499251, "grad_norm": 0.34248676896095276, "learning_rate": 9.999685813593159e-05, "loss": 0.0272, "step": 5340 }, { "epoch": 8.020989505247377, "grad_norm": 0.4238882064819336, "learning_rate": 9.999667004337362e-05, "loss": 0.0229, "step": 5350 }, { "epoch": 8.035982008995502, "grad_norm": 0.3883921205997467, "learning_rate": 9.9996476483255e-05, "loss": 0.0273, "step": 5360 }, { "epoch": 8.050974512743629, "grad_norm": 0.18701647222042084, "learning_rate": 9.999627745559688e-05, "loss": 0.0256, "step": 5370 }, { "epoch": 8.065967016491754, "grad_norm": 0.1959642469882965, "learning_rate": 9.999607296042101e-05, "loss": 0.0222, "step": 5380 }, { "epoch": 8.08095952023988, "grad_norm": 0.3664335608482361, "learning_rate": 9.99958629977498e-05, "loss": 0.0236, "step": 5390 }, { "epoch": 8.095952023988007, "grad_norm": 0.2864916920661926, "learning_rate": 9.999564756760615e-05, "loss": 0.0223, "step": 5400 }, { "epoch": 8.110944527736132, "grad_norm": 0.6170476675033569, "learning_rate": 9.999542667001366e-05, "loss": 0.0249, "step": 5410 }, { "epoch": 8.125937031484257, "grad_norm": 0.3584560453891754, "learning_rate": 9.999520030499647e-05, "loss": 0.0203, "step": 5420 }, { "epoch": 8.140929535232384, "grad_norm": 0.30870521068573, "learning_rate": 9.999496847257936e-05, "loss": 0.0252, "step": 5430 }, { "epoch": 8.15592203898051, "grad_norm": 0.5862494707107544, "learning_rate": 9.999473117278764e-05, "loss": 0.0259, "step": 5440 }, { "epoch": 8.170914542728635, "grad_norm": 0.302327036857605, "learning_rate": 9.999448840564731e-05, "loss": 0.0288, "step": 5450 }, { "epoch": 8.185907046476762, "grad_norm": 0.4368806779384613, "learning_rate": 9.999424017118488e-05, "loss": 0.0293, "step": 5460 }, { "epoch": 8.200899550224888, "grad_norm": 0.3779785931110382, "learning_rate": 9.999398646942751e-05, "loss": 0.0249, "step": 5470 }, { "epoch": 8.215892053973013, "grad_norm": 0.46820423007011414, "learning_rate": 9.999372730040296e-05, "loss": 0.0259, "step": 5480 }, { "epoch": 8.23088455772114, "grad_norm": 0.5257642269134521, "learning_rate": 9.999346266413953e-05, "loss": 0.0224, "step": 5490 }, { "epoch": 8.245877061469265, "grad_norm": 0.3880838453769684, "learning_rate": 9.99931925606662e-05, "loss": 0.022, "step": 5500 }, { "epoch": 8.26086956521739, "grad_norm": 0.4305361807346344, "learning_rate": 9.99929169900125e-05, "loss": 0.0232, "step": 5510 }, { "epoch": 8.275862068965518, "grad_norm": 0.23082509636878967, "learning_rate": 9.999263595220855e-05, "loss": 0.0194, "step": 5520 }, { "epoch": 8.290854572713643, "grad_norm": 0.2689354121685028, "learning_rate": 9.99923494472851e-05, "loss": 0.0279, "step": 5530 }, { "epoch": 8.305847076461768, "grad_norm": 0.38867196440696716, "learning_rate": 9.999205747527348e-05, "loss": 0.0243, "step": 5540 }, { "epoch": 8.320839580209896, "grad_norm": 0.3558838367462158, "learning_rate": 9.999176003620561e-05, "loss": 0.0213, "step": 5550 }, { "epoch": 8.335832083958021, "grad_norm": 0.1852700561285019, "learning_rate": 9.999145713011405e-05, "loss": 0.023, "step": 5560 }, { "epoch": 8.350824587706146, "grad_norm": 0.23053456842899323, "learning_rate": 9.999114875703186e-05, "loss": 0.0191, "step": 5570 }, { "epoch": 8.365817091454273, "grad_norm": 0.4052848219871521, "learning_rate": 9.999083491699281e-05, "loss": 0.0213, "step": 5580 }, { "epoch": 8.380809595202399, "grad_norm": 0.5250983238220215, "learning_rate": 9.999051561003123e-05, "loss": 0.0223, "step": 5590 }, { "epoch": 8.395802098950524, "grad_norm": 0.29370811581611633, "learning_rate": 9.999019083618202e-05, "loss": 0.022, "step": 5600 }, { "epoch": 8.410794602698651, "grad_norm": 0.3893769681453705, "learning_rate": 9.99898605954807e-05, "loss": 0.0299, "step": 5610 }, { "epoch": 8.425787106446776, "grad_norm": 0.43034160137176514, "learning_rate": 9.998952488796338e-05, "loss": 0.0266, "step": 5620 }, { "epoch": 8.440779610194902, "grad_norm": 0.4464210271835327, "learning_rate": 9.998918371366676e-05, "loss": 0.0266, "step": 5630 }, { "epoch": 8.455772113943029, "grad_norm": 0.4790783226490021, "learning_rate": 9.99888370726282e-05, "loss": 0.021, "step": 5640 }, { "epoch": 8.470764617691154, "grad_norm": 0.4471001923084259, "learning_rate": 9.998848496488556e-05, "loss": 0.0225, "step": 5650 }, { "epoch": 8.48575712143928, "grad_norm": 0.6057403683662415, "learning_rate": 9.998812739047736e-05, "loss": 0.0258, "step": 5660 }, { "epoch": 8.500749625187407, "grad_norm": 0.33941665291786194, "learning_rate": 9.99877643494427e-05, "loss": 0.0243, "step": 5670 }, { "epoch": 8.515742128935532, "grad_norm": 0.25729691982269287, "learning_rate": 9.998739584182128e-05, "loss": 0.0251, "step": 5680 }, { "epoch": 8.530734632683657, "grad_norm": 0.3829537630081177, "learning_rate": 9.998702186765342e-05, "loss": 0.0266, "step": 5690 }, { "epoch": 8.545727136431784, "grad_norm": 0.522443413734436, "learning_rate": 9.998664242698e-05, "loss": 0.0207, "step": 5700 }, { "epoch": 8.56071964017991, "grad_norm": 0.35400116443634033, "learning_rate": 9.998625751984251e-05, "loss": 0.0216, "step": 5710 }, { "epoch": 8.575712143928037, "grad_norm": 0.44060832262039185, "learning_rate": 9.998586714628307e-05, "loss": 0.0302, "step": 5720 }, { "epoch": 8.590704647676162, "grad_norm": 0.6515967845916748, "learning_rate": 9.998547130634432e-05, "loss": 0.0315, "step": 5730 }, { "epoch": 8.605697151424287, "grad_norm": 0.40537938475608826, "learning_rate": 9.99850700000696e-05, "loss": 0.0235, "step": 5740 }, { "epoch": 8.620689655172415, "grad_norm": 0.4887751042842865, "learning_rate": 9.998466322750278e-05, "loss": 0.0268, "step": 5750 }, { "epoch": 8.63568215892054, "grad_norm": 0.3912718594074249, "learning_rate": 9.998425098868834e-05, "loss": 0.0274, "step": 5760 }, { "epoch": 8.650674662668665, "grad_norm": 0.5298880934715271, "learning_rate": 9.998383328367136e-05, "loss": 0.0234, "step": 5770 }, { "epoch": 8.665667166416792, "grad_norm": 0.36391034722328186, "learning_rate": 9.99834101124975e-05, "loss": 0.0285, "step": 5780 }, { "epoch": 8.680659670164918, "grad_norm": 0.34971559047698975, "learning_rate": 9.998298147521309e-05, "loss": 0.0286, "step": 5790 }, { "epoch": 8.695652173913043, "grad_norm": 0.35075852274894714, "learning_rate": 9.998254737186496e-05, "loss": 0.0231, "step": 5800 }, { "epoch": 8.71064467766117, "grad_norm": 0.29698070883750916, "learning_rate": 9.99821078025006e-05, "loss": 0.0197, "step": 5810 }, { "epoch": 8.725637181409295, "grad_norm": 0.282201886177063, "learning_rate": 9.998166276716807e-05, "loss": 0.022, "step": 5820 }, { "epoch": 8.74062968515742, "grad_norm": 0.49470776319503784, "learning_rate": 9.998121226591606e-05, "loss": 0.029, "step": 5830 }, { "epoch": 8.755622188905548, "grad_norm": 0.4108705520629883, "learning_rate": 9.998075629879382e-05, "loss": 0.0259, "step": 5840 }, { "epoch": 8.770614692653673, "grad_norm": 0.3977918028831482, "learning_rate": 9.99802948658512e-05, "loss": 0.0312, "step": 5850 }, { "epoch": 8.785607196401799, "grad_norm": 0.26755988597869873, "learning_rate": 9.99798279671387e-05, "loss": 0.024, "step": 5860 }, { "epoch": 8.800599700149926, "grad_norm": 0.28575316071510315, "learning_rate": 9.997935560270734e-05, "loss": 0.0259, "step": 5870 }, { "epoch": 8.815592203898051, "grad_norm": 0.4418277442455292, "learning_rate": 9.997887777260879e-05, "loss": 0.0324, "step": 5880 }, { "epoch": 8.830584707646176, "grad_norm": 0.39364883303642273, "learning_rate": 9.997839447689532e-05, "loss": 0.0241, "step": 5890 }, { "epoch": 8.845577211394303, "grad_norm": 0.391055166721344, "learning_rate": 9.997790571561978e-05, "loss": 0.0268, "step": 5900 }, { "epoch": 8.860569715142429, "grad_norm": 0.35235342383384705, "learning_rate": 9.99774114888356e-05, "loss": 0.0227, "step": 5910 }, { "epoch": 8.875562218890554, "grad_norm": 0.462946355342865, "learning_rate": 9.997691179659684e-05, "loss": 0.0222, "step": 5920 }, { "epoch": 8.890554722638681, "grad_norm": 0.3778655529022217, "learning_rate": 9.997640663895815e-05, "loss": 0.0187, "step": 5930 }, { "epoch": 8.905547226386807, "grad_norm": 0.4864121675491333, "learning_rate": 9.997589601597477e-05, "loss": 0.0231, "step": 5940 }, { "epoch": 8.920539730134932, "grad_norm": 0.4688422381877899, "learning_rate": 9.997537992770252e-05, "loss": 0.0236, "step": 5950 }, { "epoch": 8.935532233883059, "grad_norm": 0.5130874514579773, "learning_rate": 9.997485837419788e-05, "loss": 0.0222, "step": 5960 }, { "epoch": 8.950524737631184, "grad_norm": 0.3452247381210327, "learning_rate": 9.997433135551786e-05, "loss": 0.0228, "step": 5970 }, { "epoch": 8.96551724137931, "grad_norm": 0.44138896465301514, "learning_rate": 9.997379887172009e-05, "loss": 0.0236, "step": 5980 }, { "epoch": 8.980509745127437, "grad_norm": 0.5519752502441406, "learning_rate": 9.997326092286281e-05, "loss": 0.0237, "step": 5990 }, { "epoch": 8.995502248875562, "grad_norm": 0.3345456123352051, "learning_rate": 9.997271750900486e-05, "loss": 0.0327, "step": 6000 }, { "epoch": 9.010494752623687, "grad_norm": 0.3975534737110138, "learning_rate": 9.997216863020565e-05, "loss": 0.024, "step": 6010 }, { "epoch": 9.025487256371814, "grad_norm": 0.44033461809158325, "learning_rate": 9.99716142865252e-05, "loss": 0.028, "step": 6020 }, { "epoch": 9.04047976011994, "grad_norm": 0.17173299193382263, "learning_rate": 9.997105447802415e-05, "loss": 0.0198, "step": 6030 }, { "epoch": 9.055472263868065, "grad_norm": 0.21123473346233368, "learning_rate": 9.997048920476373e-05, "loss": 0.0256, "step": 6040 }, { "epoch": 9.070464767616192, "grad_norm": 0.2554328739643097, "learning_rate": 9.996991846680572e-05, "loss": 0.0189, "step": 6050 }, { "epoch": 9.085457271364318, "grad_norm": 0.22080443799495697, "learning_rate": 9.996934226421257e-05, "loss": 0.0222, "step": 6060 }, { "epoch": 9.100449775112443, "grad_norm": 0.37986138463020325, "learning_rate": 9.996876059704726e-05, "loss": 0.0204, "step": 6070 }, { "epoch": 9.11544227886057, "grad_norm": 0.4190172851085663, "learning_rate": 9.996817346537343e-05, "loss": 0.0246, "step": 6080 }, { "epoch": 9.130434782608695, "grad_norm": 0.45349401235580444, "learning_rate": 9.996758086925526e-05, "loss": 0.0226, "step": 6090 }, { "epoch": 9.145427286356822, "grad_norm": 0.3627827763557434, "learning_rate": 9.996698280875759e-05, "loss": 0.0202, "step": 6100 }, { "epoch": 9.160419790104948, "grad_norm": 0.3516427278518677, "learning_rate": 9.99663792839458e-05, "loss": 0.0241, "step": 6110 }, { "epoch": 9.175412293853073, "grad_norm": 0.41031068563461304, "learning_rate": 9.99657702948859e-05, "loss": 0.0218, "step": 6120 }, { "epoch": 9.1904047976012, "grad_norm": 0.31561747193336487, "learning_rate": 9.996515584164448e-05, "loss": 0.0264, "step": 6130 }, { "epoch": 9.205397301349326, "grad_norm": 0.34186461567878723, "learning_rate": 9.996453592428873e-05, "loss": 0.0237, "step": 6140 }, { "epoch": 9.22038980509745, "grad_norm": 0.3731537461280823, "learning_rate": 9.996391054288646e-05, "loss": 0.0201, "step": 6150 }, { "epoch": 9.235382308845578, "grad_norm": 0.41368284821510315, "learning_rate": 9.996327969750605e-05, "loss": 0.0248, "step": 6160 }, { "epoch": 9.250374812593703, "grad_norm": 0.36294955015182495, "learning_rate": 9.996264338821649e-05, "loss": 0.0235, "step": 6170 }, { "epoch": 9.265367316341829, "grad_norm": 0.2512459456920624, "learning_rate": 9.996200161508735e-05, "loss": 0.0218, "step": 6180 }, { "epoch": 9.280359820089956, "grad_norm": 0.32134056091308594, "learning_rate": 9.996135437818885e-05, "loss": 0.0256, "step": 6190 }, { "epoch": 9.295352323838081, "grad_norm": 0.26343125104904175, "learning_rate": 9.996070167759175e-05, "loss": 0.0218, "step": 6200 }, { "epoch": 9.310344827586206, "grad_norm": 0.3244744837284088, "learning_rate": 9.996004351336743e-05, "loss": 0.0214, "step": 6210 }, { "epoch": 9.325337331334334, "grad_norm": 0.24104847013950348, "learning_rate": 9.995937988558785e-05, "loss": 0.0219, "step": 6220 }, { "epoch": 9.340329835082459, "grad_norm": 0.3528400957584381, "learning_rate": 9.995871079432561e-05, "loss": 0.0257, "step": 6230 }, { "epoch": 9.355322338830584, "grad_norm": 0.4498286843299866, "learning_rate": 9.995803623965389e-05, "loss": 0.0233, "step": 6240 }, { "epoch": 9.370314842578711, "grad_norm": 0.39787107706069946, "learning_rate": 9.995735622164641e-05, "loss": 0.0261, "step": 6250 }, { "epoch": 9.385307346326837, "grad_norm": 0.5409586429595947, "learning_rate": 9.995667074037758e-05, "loss": 0.0273, "step": 6260 }, { "epoch": 9.400299850074962, "grad_norm": 0.3227854371070862, "learning_rate": 9.995597979592232e-05, "loss": 0.0245, "step": 6270 }, { "epoch": 9.415292353823089, "grad_norm": 0.4448113739490509, "learning_rate": 9.995528338835625e-05, "loss": 0.0272, "step": 6280 }, { "epoch": 9.430284857571214, "grad_norm": 0.4716537296772003, "learning_rate": 9.995458151775547e-05, "loss": 0.0249, "step": 6290 }, { "epoch": 9.44527736131934, "grad_norm": 0.4405542016029358, "learning_rate": 9.995387418419677e-05, "loss": 0.0242, "step": 6300 }, { "epoch": 9.460269865067467, "grad_norm": 0.38025498390197754, "learning_rate": 9.99531613877575e-05, "loss": 0.0224, "step": 6310 }, { "epoch": 9.475262368815592, "grad_norm": 0.3717125654220581, "learning_rate": 9.995244312851559e-05, "loss": 0.022, "step": 6320 }, { "epoch": 9.490254872563717, "grad_norm": 0.3749077022075653, "learning_rate": 9.995171940654961e-05, "loss": 0.0213, "step": 6330 }, { "epoch": 9.505247376311845, "grad_norm": 0.38216686248779297, "learning_rate": 9.995099022193871e-05, "loss": 0.0239, "step": 6340 }, { "epoch": 9.52023988005997, "grad_norm": 0.41579297184944153, "learning_rate": 9.995025557476261e-05, "loss": 0.0192, "step": 6350 }, { "epoch": 9.535232383808095, "grad_norm": 0.24617283046245575, "learning_rate": 9.994951546510165e-05, "loss": 0.0238, "step": 6360 }, { "epoch": 9.550224887556222, "grad_norm": 0.3493844270706177, "learning_rate": 9.994876989303679e-05, "loss": 0.0287, "step": 6370 }, { "epoch": 9.565217391304348, "grad_norm": 0.3653126060962677, "learning_rate": 9.994801885864955e-05, "loss": 0.024, "step": 6380 }, { "epoch": 9.580209895052473, "grad_norm": 0.30482226610183716, "learning_rate": 9.994726236202205e-05, "loss": 0.0249, "step": 6390 }, { "epoch": 9.5952023988006, "grad_norm": 0.2890792191028595, "learning_rate": 9.994650040323704e-05, "loss": 0.025, "step": 6400 }, { "epoch": 9.610194902548725, "grad_norm": 0.2054891437292099, "learning_rate": 9.994573298237784e-05, "loss": 0.023, "step": 6410 }, { "epoch": 9.62518740629685, "grad_norm": 0.4225403964519501, "learning_rate": 9.994496009952837e-05, "loss": 0.0261, "step": 6420 }, { "epoch": 9.640179910044978, "grad_norm": 0.4776424169540405, "learning_rate": 9.994418175477316e-05, "loss": 0.0213, "step": 6430 }, { "epoch": 9.655172413793103, "grad_norm": 0.4641764461994171, "learning_rate": 9.994339794819733e-05, "loss": 0.0225, "step": 6440 }, { "epoch": 9.670164917541229, "grad_norm": 0.24519921839237213, "learning_rate": 9.994260867988658e-05, "loss": 0.0199, "step": 6450 }, { "epoch": 9.685157421289356, "grad_norm": 0.2886161208152771, "learning_rate": 9.994181394992723e-05, "loss": 0.024, "step": 6460 }, { "epoch": 9.700149925037481, "grad_norm": 0.34043189883232117, "learning_rate": 9.994101375840618e-05, "loss": 0.0251, "step": 6470 }, { "epoch": 9.715142428785608, "grad_norm": 0.39936357736587524, "learning_rate": 9.994020810541098e-05, "loss": 0.0204, "step": 6480 }, { "epoch": 9.730134932533733, "grad_norm": 0.3706470727920532, "learning_rate": 9.99393969910297e-05, "loss": 0.0228, "step": 6490 }, { "epoch": 9.745127436281859, "grad_norm": 0.4688586890697479, "learning_rate": 9.993858041535104e-05, "loss": 0.0223, "step": 6500 }, { "epoch": 9.760119940029984, "grad_norm": 0.2991940975189209, "learning_rate": 9.99377583784643e-05, "loss": 0.0246, "step": 6510 }, { "epoch": 9.775112443778111, "grad_norm": 0.36758068203926086, "learning_rate": 9.993693088045939e-05, "loss": 0.0242, "step": 6520 }, { "epoch": 9.790104947526237, "grad_norm": 0.2899485230445862, "learning_rate": 9.99360979214268e-05, "loss": 0.0284, "step": 6530 }, { "epoch": 9.805097451274364, "grad_norm": 0.4673821032047272, "learning_rate": 9.99352595014576e-05, "loss": 0.0232, "step": 6540 }, { "epoch": 9.820089955022489, "grad_norm": 0.42016154527664185, "learning_rate": 9.993441562064354e-05, "loss": 0.0253, "step": 6550 }, { "epoch": 9.835082458770614, "grad_norm": 0.4444972276687622, "learning_rate": 9.993356627907685e-05, "loss": 0.0275, "step": 6560 }, { "epoch": 9.850074962518741, "grad_norm": 0.35825327038764954, "learning_rate": 9.99327114768504e-05, "loss": 0.0253, "step": 6570 }, { "epoch": 9.865067466266867, "grad_norm": 0.37561503052711487, "learning_rate": 9.99318512140577e-05, "loss": 0.0295, "step": 6580 }, { "epoch": 9.880059970014992, "grad_norm": 0.26683998107910156, "learning_rate": 9.993098549079284e-05, "loss": 0.0215, "step": 6590 }, { "epoch": 9.89505247376312, "grad_norm": 0.27397027611732483, "learning_rate": 9.993011430715047e-05, "loss": 0.0254, "step": 6600 }, { "epoch": 9.910044977511244, "grad_norm": 0.24189235270023346, "learning_rate": 9.992923766322586e-05, "loss": 0.025, "step": 6610 }, { "epoch": 9.92503748125937, "grad_norm": 0.2865879535675049, "learning_rate": 9.99283555591149e-05, "loss": 0.0215, "step": 6620 }, { "epoch": 9.940029985007497, "grad_norm": 0.25342297554016113, "learning_rate": 9.992746799491404e-05, "loss": 0.0245, "step": 6630 }, { "epoch": 9.955022488755622, "grad_norm": 0.2959621250629425, "learning_rate": 9.992657497072033e-05, "loss": 0.0228, "step": 6640 }, { "epoch": 9.970014992503748, "grad_norm": 0.3843325078487396, "learning_rate": 9.992567648663147e-05, "loss": 0.0214, "step": 6650 }, { "epoch": 9.985007496251875, "grad_norm": 0.32106053829193115, "learning_rate": 9.992477254274568e-05, "loss": 0.0238, "step": 6660 }, { "epoch": 10.0, "grad_norm": 0.31143254041671753, "learning_rate": 9.992386313916183e-05, "loss": 0.0198, "step": 6670 }, { "epoch": 10.014992503748125, "grad_norm": 0.3538073003292084, "learning_rate": 9.992294827597934e-05, "loss": 0.0231, "step": 6680 }, { "epoch": 10.029985007496252, "grad_norm": 0.3325996994972229, "learning_rate": 9.992202795329831e-05, "loss": 0.0185, "step": 6690 }, { "epoch": 10.044977511244378, "grad_norm": 0.38494619727134705, "learning_rate": 9.992110217121936e-05, "loss": 0.0195, "step": 6700 }, { "epoch": 10.059970014992503, "grad_norm": 0.2973482608795166, "learning_rate": 9.992017092984372e-05, "loss": 0.019, "step": 6710 }, { "epoch": 10.07496251874063, "grad_norm": 0.3960256278514862, "learning_rate": 9.991923422927326e-05, "loss": 0.02, "step": 6720 }, { "epoch": 10.089955022488756, "grad_norm": 0.30172204971313477, "learning_rate": 9.991829206961037e-05, "loss": 0.0218, "step": 6730 }, { "epoch": 10.10494752623688, "grad_norm": 0.2719300091266632, "learning_rate": 9.991734445095813e-05, "loss": 0.0245, "step": 6740 }, { "epoch": 10.119940029985008, "grad_norm": 0.34351083636283875, "learning_rate": 9.991639137342015e-05, "loss": 0.0209, "step": 6750 }, { "epoch": 10.134932533733133, "grad_norm": 0.28869277238845825, "learning_rate": 9.991543283710064e-05, "loss": 0.0176, "step": 6760 }, { "epoch": 10.149925037481259, "grad_norm": 0.36673375964164734, "learning_rate": 9.991446884210445e-05, "loss": 0.0208, "step": 6770 }, { "epoch": 10.164917541229386, "grad_norm": 0.4843023121356964, "learning_rate": 9.9913499388537e-05, "loss": 0.0214, "step": 6780 }, { "epoch": 10.179910044977511, "grad_norm": 0.3120528757572174, "learning_rate": 9.99125244765043e-05, "loss": 0.0216, "step": 6790 }, { "epoch": 10.194902548725636, "grad_norm": 0.2597133219242096, "learning_rate": 9.991154410611296e-05, "loss": 0.0239, "step": 6800 }, { "epoch": 10.209895052473763, "grad_norm": 0.24100251495838165, "learning_rate": 9.99105582774702e-05, "loss": 0.0228, "step": 6810 }, { "epoch": 10.224887556221889, "grad_norm": 0.31829532980918884, "learning_rate": 9.990956699068384e-05, "loss": 0.0212, "step": 6820 }, { "epoch": 10.239880059970014, "grad_norm": 0.31740885972976685, "learning_rate": 9.990857024586224e-05, "loss": 0.0187, "step": 6830 }, { "epoch": 10.254872563718141, "grad_norm": 0.2029232531785965, "learning_rate": 9.990756804311446e-05, "loss": 0.0184, "step": 6840 }, { "epoch": 10.269865067466267, "grad_norm": 0.2538192570209503, "learning_rate": 9.990656038255006e-05, "loss": 0.0178, "step": 6850 }, { "epoch": 10.284857571214392, "grad_norm": 0.34294217824935913, "learning_rate": 9.990554726427926e-05, "loss": 0.0216, "step": 6860 }, { "epoch": 10.299850074962519, "grad_norm": 0.38328269124031067, "learning_rate": 9.990452868841284e-05, "loss": 0.029, "step": 6870 }, { "epoch": 10.314842578710644, "grad_norm": 0.29963162541389465, "learning_rate": 9.99035046550622e-05, "loss": 0.0251, "step": 6880 }, { "epoch": 10.329835082458771, "grad_norm": 0.5707314610481262, "learning_rate": 9.99024751643393e-05, "loss": 0.0244, "step": 6890 }, { "epoch": 10.344827586206897, "grad_norm": 0.3422578275203705, "learning_rate": 9.990144021635677e-05, "loss": 0.0208, "step": 6900 }, { "epoch": 10.359820089955022, "grad_norm": 0.270343542098999, "learning_rate": 9.990039981122775e-05, "loss": 0.0211, "step": 6910 }, { "epoch": 10.37481259370315, "grad_norm": 0.3571113049983978, "learning_rate": 9.989935394906602e-05, "loss": 0.0263, "step": 6920 }, { "epoch": 10.389805097451275, "grad_norm": 0.3695005774497986, "learning_rate": 9.989830262998598e-05, "loss": 0.0245, "step": 6930 }, { "epoch": 10.4047976011994, "grad_norm": 0.4233928918838501, "learning_rate": 9.989724585410259e-05, "loss": 0.0203, "step": 6940 }, { "epoch": 10.419790104947527, "grad_norm": 0.39843252301216125, "learning_rate": 9.989618362153139e-05, "loss": 0.0262, "step": 6950 }, { "epoch": 10.434782608695652, "grad_norm": 0.42565518617630005, "learning_rate": 9.989511593238859e-05, "loss": 0.0224, "step": 6960 }, { "epoch": 10.449775112443778, "grad_norm": 0.3030361235141754, "learning_rate": 9.98940427867909e-05, "loss": 0.0241, "step": 6970 }, { "epoch": 10.464767616191905, "grad_norm": 0.36090055108070374, "learning_rate": 9.989296418485573e-05, "loss": 0.0269, "step": 6980 }, { "epoch": 10.47976011994003, "grad_norm": 0.2402164787054062, "learning_rate": 9.989188012670101e-05, "loss": 0.0236, "step": 6990 }, { "epoch": 10.494752623688155, "grad_norm": 0.43622416257858276, "learning_rate": 9.989079061244528e-05, "loss": 0.0235, "step": 7000 }, { "epoch": 10.509745127436283, "grad_norm": 0.24302099645137787, "learning_rate": 9.988969564220769e-05, "loss": 0.0242, "step": 7010 }, { "epoch": 10.524737631184408, "grad_norm": 0.365945428609848, "learning_rate": 9.988859521610801e-05, "loss": 0.0184, "step": 7020 }, { "epoch": 10.539730134932533, "grad_norm": 0.5547352433204651, "learning_rate": 9.988748933426656e-05, "loss": 0.0236, "step": 7030 }, { "epoch": 10.55472263868066, "grad_norm": 0.30342113971710205, "learning_rate": 9.988637799680428e-05, "loss": 0.0235, "step": 7040 }, { "epoch": 10.569715142428786, "grad_norm": 0.41731584072113037, "learning_rate": 9.98852612038427e-05, "loss": 0.0292, "step": 7050 }, { "epoch": 10.584707646176911, "grad_norm": 0.5396575331687927, "learning_rate": 9.988413895550397e-05, "loss": 0.0237, "step": 7060 }, { "epoch": 10.599700149925038, "grad_norm": 0.4569787085056305, "learning_rate": 9.98830112519108e-05, "loss": 0.025, "step": 7070 }, { "epoch": 10.614692653673163, "grad_norm": 0.21851368248462677, "learning_rate": 9.98818780931865e-05, "loss": 0.0192, "step": 7080 }, { "epoch": 10.629685157421289, "grad_norm": 0.4067769944667816, "learning_rate": 9.988073947945502e-05, "loss": 0.0218, "step": 7090 }, { "epoch": 10.644677661169416, "grad_norm": 0.25893405079841614, "learning_rate": 9.987959541084087e-05, "loss": 0.0208, "step": 7100 }, { "epoch": 10.659670164917541, "grad_norm": 0.5310393571853638, "learning_rate": 9.987844588746915e-05, "loss": 0.0218, "step": 7110 }, { "epoch": 10.674662668665666, "grad_norm": 0.39230433106422424, "learning_rate": 9.987729090946558e-05, "loss": 0.0248, "step": 7120 }, { "epoch": 10.689655172413794, "grad_norm": 0.40773552656173706, "learning_rate": 9.987613047695647e-05, "loss": 0.0217, "step": 7130 }, { "epoch": 10.704647676161919, "grad_norm": 0.28572532534599304, "learning_rate": 9.987496459006871e-05, "loss": 0.0221, "step": 7140 }, { "epoch": 10.719640179910044, "grad_norm": 0.3873806893825531, "learning_rate": 9.987379324892982e-05, "loss": 0.0268, "step": 7150 }, { "epoch": 10.734632683658171, "grad_norm": 0.38146838545799255, "learning_rate": 9.987261645366788e-05, "loss": 0.0255, "step": 7160 }, { "epoch": 10.749625187406297, "grad_norm": 0.33083394169807434, "learning_rate": 9.987143420441158e-05, "loss": 0.0199, "step": 7170 }, { "epoch": 10.764617691154422, "grad_norm": 0.32753410935401917, "learning_rate": 9.987024650129022e-05, "loss": 0.02, "step": 7180 }, { "epoch": 10.77961019490255, "grad_norm": 0.32663074135780334, "learning_rate": 9.986905334443368e-05, "loss": 0.0265, "step": 7190 }, { "epoch": 10.794602698650674, "grad_norm": 0.27466458082199097, "learning_rate": 9.986785473397245e-05, "loss": 0.0207, "step": 7200 }, { "epoch": 10.8095952023988, "grad_norm": 0.2987978458404541, "learning_rate": 9.98666506700376e-05, "loss": 0.0174, "step": 7210 }, { "epoch": 10.824587706146927, "grad_norm": 0.23901784420013428, "learning_rate": 9.986544115276081e-05, "loss": 0.0233, "step": 7220 }, { "epoch": 10.839580209895052, "grad_norm": 0.3745051622390747, "learning_rate": 9.986422618227433e-05, "loss": 0.0221, "step": 7230 }, { "epoch": 10.85457271364318, "grad_norm": 0.263001412153244, "learning_rate": 9.986300575871106e-05, "loss": 0.0209, "step": 7240 }, { "epoch": 10.869565217391305, "grad_norm": 0.35232752561569214, "learning_rate": 9.986177988220444e-05, "loss": 0.0232, "step": 7250 }, { "epoch": 10.88455772113943, "grad_norm": 0.47481152415275574, "learning_rate": 9.986054855288856e-05, "loss": 0.0228, "step": 7260 }, { "epoch": 10.899550224887555, "grad_norm": 0.3358514606952667, "learning_rate": 9.985931177089802e-05, "loss": 0.0234, "step": 7270 }, { "epoch": 10.914542728635682, "grad_norm": 0.4386899471282959, "learning_rate": 9.985806953636814e-05, "loss": 0.0232, "step": 7280 }, { "epoch": 10.929535232383808, "grad_norm": 0.335993230342865, "learning_rate": 9.985682184943471e-05, "loss": 0.022, "step": 7290 }, { "epoch": 10.944527736131935, "grad_norm": 0.4982161819934845, "learning_rate": 9.98555687102342e-05, "loss": 0.031, "step": 7300 }, { "epoch": 10.95952023988006, "grad_norm": 0.3877367079257965, "learning_rate": 9.985431011890367e-05, "loss": 0.0273, "step": 7310 }, { "epoch": 10.974512743628186, "grad_norm": 0.30542588233947754, "learning_rate": 9.985304607558075e-05, "loss": 0.0267, "step": 7320 }, { "epoch": 10.989505247376313, "grad_norm": 0.3877709209918976, "learning_rate": 9.985177658040364e-05, "loss": 0.0246, "step": 7330 }, { "epoch": 11.004497751124438, "grad_norm": 0.426440566778183, "learning_rate": 9.985050163351119e-05, "loss": 0.023, "step": 7340 }, { "epoch": 11.019490254872563, "grad_norm": 0.2748238742351532, "learning_rate": 9.984922123504286e-05, "loss": 0.0188, "step": 7350 }, { "epoch": 11.03448275862069, "grad_norm": 0.2530178725719452, "learning_rate": 9.984793538513862e-05, "loss": 0.0201, "step": 7360 }, { "epoch": 11.049475262368816, "grad_norm": 0.23785041272640228, "learning_rate": 9.984664408393912e-05, "loss": 0.0232, "step": 7370 }, { "epoch": 11.064467766116941, "grad_norm": 0.2808997631072998, "learning_rate": 9.984534733158556e-05, "loss": 0.0189, "step": 7380 }, { "epoch": 11.079460269865068, "grad_norm": 0.3179199993610382, "learning_rate": 9.984404512821977e-05, "loss": 0.0232, "step": 7390 }, { "epoch": 11.094452773613193, "grad_norm": 0.29721784591674805, "learning_rate": 9.984273747398411e-05, "loss": 0.0214, "step": 7400 }, { "epoch": 11.109445277361319, "grad_norm": 0.3037783205509186, "learning_rate": 9.984142436902165e-05, "loss": 0.0175, "step": 7410 }, { "epoch": 11.124437781109446, "grad_norm": 0.3754832148551941, "learning_rate": 9.984010581347596e-05, "loss": 0.0264, "step": 7420 }, { "epoch": 11.139430284857571, "grad_norm": 0.40656667947769165, "learning_rate": 9.983878180749121e-05, "loss": 0.0203, "step": 7430 }, { "epoch": 11.154422788605697, "grad_norm": 0.2418227642774582, "learning_rate": 9.983745235121222e-05, "loss": 0.0232, "step": 7440 }, { "epoch": 11.169415292353824, "grad_norm": 0.2795887589454651, "learning_rate": 9.983611744478438e-05, "loss": 0.0204, "step": 7450 }, { "epoch": 11.184407796101949, "grad_norm": 0.4107739329338074, "learning_rate": 9.983477708835365e-05, "loss": 0.0227, "step": 7460 }, { "epoch": 11.199400299850074, "grad_norm": 0.31841737031936646, "learning_rate": 9.983343128206664e-05, "loss": 0.0212, "step": 7470 }, { "epoch": 11.214392803598201, "grad_norm": 0.2355625480413437, "learning_rate": 9.983208002607049e-05, "loss": 0.0223, "step": 7480 }, { "epoch": 11.229385307346327, "grad_norm": 0.4220694899559021, "learning_rate": 9.9830723320513e-05, "loss": 0.0226, "step": 7490 }, { "epoch": 11.244377811094452, "grad_norm": 0.2166745662689209, "learning_rate": 9.982936116554254e-05, "loss": 0.0235, "step": 7500 }, { "epoch": 11.25937031484258, "grad_norm": 0.3407725989818573, "learning_rate": 9.982799356130803e-05, "loss": 0.0188, "step": 7510 }, { "epoch": 11.274362818590705, "grad_norm": 0.2767082452774048, "learning_rate": 9.982662050795908e-05, "loss": 0.0235, "step": 7520 }, { "epoch": 11.28935532233883, "grad_norm": 0.46364158391952515, "learning_rate": 9.982524200564583e-05, "loss": 0.0223, "step": 7530 }, { "epoch": 11.304347826086957, "grad_norm": 0.3344242572784424, "learning_rate": 9.982385805451901e-05, "loss": 0.0215, "step": 7540 }, { "epoch": 11.319340329835082, "grad_norm": 0.3220809996128082, "learning_rate": 9.982246865472998e-05, "loss": 0.0214, "step": 7550 }, { "epoch": 11.334332833583208, "grad_norm": 0.4761151373386383, "learning_rate": 9.982107380643069e-05, "loss": 0.0218, "step": 7560 }, { "epoch": 11.349325337331335, "grad_norm": 0.37261730432510376, "learning_rate": 9.981967350977368e-05, "loss": 0.0222, "step": 7570 }, { "epoch": 11.36431784107946, "grad_norm": 0.36796003580093384, "learning_rate": 9.981826776491208e-05, "loss": 0.0303, "step": 7580 }, { "epoch": 11.379310344827585, "grad_norm": 0.3630298972129822, "learning_rate": 9.98168565719996e-05, "loss": 0.0231, "step": 7590 }, { "epoch": 11.394302848575713, "grad_norm": 0.5565486550331116, "learning_rate": 9.98154399311906e-05, "loss": 0.0277, "step": 7600 }, { "epoch": 11.409295352323838, "grad_norm": 0.5181038975715637, "learning_rate": 9.981401784263997e-05, "loss": 0.0219, "step": 7610 }, { "epoch": 11.424287856071963, "grad_norm": 0.5753671526908875, "learning_rate": 9.981259030650326e-05, "loss": 0.0237, "step": 7620 }, { "epoch": 11.43928035982009, "grad_norm": 0.44945821166038513, "learning_rate": 9.981115732293655e-05, "loss": 0.0262, "step": 7630 }, { "epoch": 11.454272863568216, "grad_norm": 0.4485301375389099, "learning_rate": 9.980971889209659e-05, "loss": 0.0215, "step": 7640 }, { "epoch": 11.469265367316343, "grad_norm": 0.27437129616737366, "learning_rate": 9.980827501414064e-05, "loss": 0.0232, "step": 7650 }, { "epoch": 11.484257871064468, "grad_norm": 0.3614014685153961, "learning_rate": 9.980682568922663e-05, "loss": 0.0257, "step": 7660 }, { "epoch": 11.499250374812593, "grad_norm": 0.30397069454193115, "learning_rate": 9.980537091751304e-05, "loss": 0.0244, "step": 7670 }, { "epoch": 11.514242878560719, "grad_norm": 0.39159566164016724, "learning_rate": 9.980391069915897e-05, "loss": 0.0206, "step": 7680 }, { "epoch": 11.529235382308846, "grad_norm": 0.2313222587108612, "learning_rate": 9.98024450343241e-05, "loss": 0.0213, "step": 7690 }, { "epoch": 11.544227886056971, "grad_norm": 0.32795143127441406, "learning_rate": 9.980097392316872e-05, "loss": 0.0195, "step": 7700 }, { "epoch": 11.559220389805098, "grad_norm": 0.3547206223011017, "learning_rate": 9.97994973658537e-05, "loss": 0.0261, "step": 7710 }, { "epoch": 11.574212893553224, "grad_norm": 0.19977688789367676, "learning_rate": 9.979801536254054e-05, "loss": 0.0242, "step": 7720 }, { "epoch": 11.589205397301349, "grad_norm": 0.30551406741142273, "learning_rate": 9.979652791339127e-05, "loss": 0.0269, "step": 7730 }, { "epoch": 11.604197901049476, "grad_norm": 0.25032874941825867, "learning_rate": 9.97950350185686e-05, "loss": 0.0252, "step": 7740 }, { "epoch": 11.619190404797601, "grad_norm": 0.37295088171958923, "learning_rate": 9.979353667823574e-05, "loss": 0.0208, "step": 7750 }, { "epoch": 11.634182908545727, "grad_norm": 0.3568632900714874, "learning_rate": 9.979203289255658e-05, "loss": 0.0228, "step": 7760 }, { "epoch": 11.649175412293854, "grad_norm": 0.25152286887168884, "learning_rate": 9.979052366169557e-05, "loss": 0.0183, "step": 7770 }, { "epoch": 11.664167916041979, "grad_norm": 0.3347049653530121, "learning_rate": 9.978900898581775e-05, "loss": 0.0224, "step": 7780 }, { "epoch": 11.679160419790104, "grad_norm": 0.3263874053955078, "learning_rate": 9.978748886508875e-05, "loss": 0.0204, "step": 7790 }, { "epoch": 11.694152923538232, "grad_norm": 0.32570216059684753, "learning_rate": 9.978596329967484e-05, "loss": 0.0205, "step": 7800 }, { "epoch": 11.709145427286357, "grad_norm": 0.38764917850494385, "learning_rate": 9.978443228974284e-05, "loss": 0.0248, "step": 7810 }, { "epoch": 11.724137931034482, "grad_norm": 0.2455524504184723, "learning_rate": 9.978289583546015e-05, "loss": 0.0238, "step": 7820 }, { "epoch": 11.73913043478261, "grad_norm": 0.22069405019283295, "learning_rate": 9.978135393699484e-05, "loss": 0.0272, "step": 7830 }, { "epoch": 11.754122938530735, "grad_norm": 0.3307015299797058, "learning_rate": 9.977980659451548e-05, "loss": 0.019, "step": 7840 }, { "epoch": 11.76911544227886, "grad_norm": 0.2000645101070404, "learning_rate": 9.977825380819135e-05, "loss": 0.0217, "step": 7850 }, { "epoch": 11.784107946026987, "grad_norm": 0.3387535810470581, "learning_rate": 9.97766955781922e-05, "loss": 0.0181, "step": 7860 }, { "epoch": 11.799100449775112, "grad_norm": 0.35134604573249817, "learning_rate": 9.977513190468848e-05, "loss": 0.0216, "step": 7870 }, { "epoch": 11.814092953523238, "grad_norm": 0.31285831332206726, "learning_rate": 9.977356278785116e-05, "loss": 0.0282, "step": 7880 }, { "epoch": 11.829085457271365, "grad_norm": 0.5041400194168091, "learning_rate": 9.977198822785184e-05, "loss": 0.0246, "step": 7890 }, { "epoch": 11.84407796101949, "grad_norm": 0.4303722083568573, "learning_rate": 9.977040822486273e-05, "loss": 0.0222, "step": 7900 }, { "epoch": 11.859070464767616, "grad_norm": 0.39625197649002075, "learning_rate": 9.97688227790566e-05, "loss": 0.0233, "step": 7910 }, { "epoch": 11.874062968515743, "grad_norm": 0.38457342982292175, "learning_rate": 9.976723189060684e-05, "loss": 0.0226, "step": 7920 }, { "epoch": 11.889055472263868, "grad_norm": 0.3893722891807556, "learning_rate": 9.976563555968742e-05, "loss": 0.0194, "step": 7930 }, { "epoch": 11.904047976011993, "grad_norm": 0.45783987641334534, "learning_rate": 9.976403378647292e-05, "loss": 0.025, "step": 7940 }, { "epoch": 11.91904047976012, "grad_norm": 0.3944067060947418, "learning_rate": 9.97624265711385e-05, "loss": 0.026, "step": 7950 }, { "epoch": 11.934032983508246, "grad_norm": 0.31759941577911377, "learning_rate": 9.976081391385993e-05, "loss": 0.0271, "step": 7960 }, { "epoch": 11.949025487256371, "grad_norm": 0.3084299564361572, "learning_rate": 9.975919581481356e-05, "loss": 0.0214, "step": 7970 }, { "epoch": 11.964017991004498, "grad_norm": 0.37824487686157227, "learning_rate": 9.975757227417634e-05, "loss": 0.0256, "step": 7980 }, { "epoch": 11.979010494752623, "grad_norm": 0.2717660367488861, "learning_rate": 9.975594329212586e-05, "loss": 0.0227, "step": 7990 }, { "epoch": 11.994002998500749, "grad_norm": 0.4396626055240631, "learning_rate": 9.97543088688402e-05, "loss": 0.0234, "step": 8000 }, { "epoch": 12.008995502248876, "grad_norm": 0.28463485836982727, "learning_rate": 9.975266900449814e-05, "loss": 0.0223, "step": 8010 }, { "epoch": 12.023988005997001, "grad_norm": 0.40280312299728394, "learning_rate": 9.975102369927898e-05, "loss": 0.0228, "step": 8020 }, { "epoch": 12.038980509745127, "grad_norm": 0.5586749315261841, "learning_rate": 9.974937295336269e-05, "loss": 0.0191, "step": 8030 }, { "epoch": 12.053973013493254, "grad_norm": 0.3512212336063385, "learning_rate": 9.974771676692975e-05, "loss": 0.0175, "step": 8040 }, { "epoch": 12.068965517241379, "grad_norm": 0.24690258502960205, "learning_rate": 9.974605514016131e-05, "loss": 0.0179, "step": 8050 }, { "epoch": 12.083958020989506, "grad_norm": 0.47262778878211975, "learning_rate": 9.974438807323907e-05, "loss": 0.023, "step": 8060 }, { "epoch": 12.098950524737631, "grad_norm": 0.40090152621269226, "learning_rate": 9.974271556634535e-05, "loss": 0.018, "step": 8070 }, { "epoch": 12.113943028485757, "grad_norm": 0.39113685488700867, "learning_rate": 9.974103761966302e-05, "loss": 0.0208, "step": 8080 }, { "epoch": 12.128935532233884, "grad_norm": 0.46493178606033325, "learning_rate": 9.973935423337563e-05, "loss": 0.0199, "step": 8090 }, { "epoch": 12.14392803598201, "grad_norm": 0.3808997869491577, "learning_rate": 9.973766540766722e-05, "loss": 0.0178, "step": 8100 }, { "epoch": 12.158920539730135, "grad_norm": 0.30234795808792114, "learning_rate": 9.97359711427225e-05, "loss": 0.0232, "step": 8110 }, { "epoch": 12.173913043478262, "grad_norm": 0.5260949730873108, "learning_rate": 9.973427143872677e-05, "loss": 0.0226, "step": 8120 }, { "epoch": 12.188905547226387, "grad_norm": 0.21856077015399933, "learning_rate": 9.973256629586589e-05, "loss": 0.0182, "step": 8130 }, { "epoch": 12.203898050974512, "grad_norm": 0.6153507828712463, "learning_rate": 9.973085571432632e-05, "loss": 0.0212, "step": 8140 }, { "epoch": 12.21889055472264, "grad_norm": 0.16932447254657745, "learning_rate": 9.972913969429513e-05, "loss": 0.0239, "step": 8150 }, { "epoch": 12.233883058470765, "grad_norm": 0.37129807472229004, "learning_rate": 9.972741823596e-05, "loss": 0.0209, "step": 8160 }, { "epoch": 12.24887556221889, "grad_norm": 0.1946491152048111, "learning_rate": 9.972569133950917e-05, "loss": 0.0196, "step": 8170 }, { "epoch": 12.263868065967017, "grad_norm": 0.39536774158477783, "learning_rate": 9.972395900513151e-05, "loss": 0.0229, "step": 8180 }, { "epoch": 12.278860569715143, "grad_norm": 0.24815596640110016, "learning_rate": 9.972222123301645e-05, "loss": 0.0225, "step": 8190 }, { "epoch": 12.293853073463268, "grad_norm": 0.36363592743873596, "learning_rate": 9.972047802335403e-05, "loss": 0.0217, "step": 8200 }, { "epoch": 12.308845577211395, "grad_norm": 0.2522033452987671, "learning_rate": 9.971872937633488e-05, "loss": 0.0245, "step": 8210 }, { "epoch": 12.32383808095952, "grad_norm": 0.24236543476581573, "learning_rate": 9.971697529215024e-05, "loss": 0.0202, "step": 8220 }, { "epoch": 12.338830584707646, "grad_norm": 0.2647806704044342, "learning_rate": 9.971521577099192e-05, "loss": 0.0225, "step": 8230 }, { "epoch": 12.353823088455773, "grad_norm": 0.21565698087215424, "learning_rate": 9.971345081305236e-05, "loss": 0.0218, "step": 8240 }, { "epoch": 12.368815592203898, "grad_norm": 0.28480032086372375, "learning_rate": 9.971168041852456e-05, "loss": 0.017, "step": 8250 }, { "epoch": 12.383808095952023, "grad_norm": 0.3746260404586792, "learning_rate": 9.970990458760215e-05, "loss": 0.0231, "step": 8260 }, { "epoch": 12.39880059970015, "grad_norm": 0.33300480246543884, "learning_rate": 9.970812332047929e-05, "loss": 0.0285, "step": 8270 }, { "epoch": 12.413793103448276, "grad_norm": 0.33867111802101135, "learning_rate": 9.97063366173508e-05, "loss": 0.0254, "step": 8280 }, { "epoch": 12.428785607196401, "grad_norm": 0.339431494474411, "learning_rate": 9.970454447841207e-05, "loss": 0.0218, "step": 8290 }, { "epoch": 12.443778110944528, "grad_norm": 0.2525383234024048, "learning_rate": 9.970274690385909e-05, "loss": 0.0195, "step": 8300 }, { "epoch": 12.458770614692654, "grad_norm": 0.24002164602279663, "learning_rate": 9.970094389388844e-05, "loss": 0.0231, "step": 8310 }, { "epoch": 12.473763118440779, "grad_norm": 0.2232813537120819, "learning_rate": 9.969913544869728e-05, "loss": 0.0234, "step": 8320 }, { "epoch": 12.488755622188906, "grad_norm": 0.2637665569782257, "learning_rate": 9.96973215684834e-05, "loss": 0.0208, "step": 8330 }, { "epoch": 12.503748125937031, "grad_norm": 0.21339941024780273, "learning_rate": 9.969550225344513e-05, "loss": 0.0216, "step": 8340 }, { "epoch": 12.518740629685157, "grad_norm": 0.3494541347026825, "learning_rate": 9.969367750378147e-05, "loss": 0.0224, "step": 8350 }, { "epoch": 12.533733133433284, "grad_norm": 0.26085251569747925, "learning_rate": 9.969184731969194e-05, "loss": 0.021, "step": 8360 }, { "epoch": 12.548725637181409, "grad_norm": 0.2156774252653122, "learning_rate": 9.96900117013767e-05, "loss": 0.0197, "step": 8370 }, { "epoch": 12.563718140929534, "grad_norm": 0.26270970702171326, "learning_rate": 9.96881706490365e-05, "loss": 0.0204, "step": 8380 }, { "epoch": 12.578710644677662, "grad_norm": 0.2542820870876312, "learning_rate": 9.968632416287265e-05, "loss": 0.0199, "step": 8390 }, { "epoch": 12.593703148425787, "grad_norm": 0.2509387731552124, "learning_rate": 9.96844722430871e-05, "loss": 0.0171, "step": 8400 }, { "epoch": 12.608695652173914, "grad_norm": 0.32845765352249146, "learning_rate": 9.968261488988235e-05, "loss": 0.0197, "step": 8410 }, { "epoch": 12.62368815592204, "grad_norm": 0.25267401337623596, "learning_rate": 9.968075210346155e-05, "loss": 0.0206, "step": 8420 }, { "epoch": 12.638680659670165, "grad_norm": 0.27376440167427063, "learning_rate": 9.967888388402839e-05, "loss": 0.0193, "step": 8430 }, { "epoch": 12.65367316341829, "grad_norm": 0.32860469818115234, "learning_rate": 9.967701023178717e-05, "loss": 0.021, "step": 8440 }, { "epoch": 12.668665667166417, "grad_norm": 0.4312947392463684, "learning_rate": 9.967513114694282e-05, "loss": 0.0222, "step": 8450 }, { "epoch": 12.683658170914542, "grad_norm": 0.2884674668312073, "learning_rate": 9.967324662970079e-05, "loss": 0.0184, "step": 8460 }, { "epoch": 12.69865067466267, "grad_norm": 0.19219423830509186, "learning_rate": 9.96713566802672e-05, "loss": 0.0195, "step": 8470 }, { "epoch": 12.713643178410795, "grad_norm": 0.3717878460884094, "learning_rate": 9.966946129884873e-05, "loss": 0.0228, "step": 8480 }, { "epoch": 12.72863568215892, "grad_norm": 0.38936835527420044, "learning_rate": 9.966756048565265e-05, "loss": 0.0272, "step": 8490 }, { "epoch": 12.743628185907047, "grad_norm": 0.3411654829978943, "learning_rate": 9.966565424088681e-05, "loss": 0.0173, "step": 8500 }, { "epoch": 12.758620689655173, "grad_norm": 0.3656691312789917, "learning_rate": 9.96637425647597e-05, "loss": 0.022, "step": 8510 }, { "epoch": 12.773613193403298, "grad_norm": 0.20003297924995422, "learning_rate": 9.966182545748038e-05, "loss": 0.0215, "step": 8520 }, { "epoch": 12.788605697151425, "grad_norm": 0.3291391134262085, "learning_rate": 9.96599029192585e-05, "loss": 0.0187, "step": 8530 }, { "epoch": 12.80359820089955, "grad_norm": 0.27346041798591614, "learning_rate": 9.965797495030428e-05, "loss": 0.0188, "step": 8540 }, { "epoch": 12.818590704647676, "grad_norm": 0.49366188049316406, "learning_rate": 9.96560415508286e-05, "loss": 0.0245, "step": 8550 }, { "epoch": 12.833583208395803, "grad_norm": 0.32761287689208984, "learning_rate": 9.965410272104286e-05, "loss": 0.0197, "step": 8560 }, { "epoch": 12.848575712143928, "grad_norm": 0.26072967052459717, "learning_rate": 9.96521584611591e-05, "loss": 0.0219, "step": 8570 }, { "epoch": 12.863568215892053, "grad_norm": 0.38294488191604614, "learning_rate": 9.965020877138994e-05, "loss": 0.0178, "step": 8580 }, { "epoch": 12.87856071964018, "grad_norm": 0.34160900115966797, "learning_rate": 9.964825365194861e-05, "loss": 0.0187, "step": 8590 }, { "epoch": 12.893553223388306, "grad_norm": 0.24070459604263306, "learning_rate": 9.96462931030489e-05, "loss": 0.0218, "step": 8600 }, { "epoch": 12.908545727136431, "grad_norm": 0.25865763425827026, "learning_rate": 9.96443271249052e-05, "loss": 0.018, "step": 8610 }, { "epoch": 12.923538230884558, "grad_norm": 0.3050123155117035, "learning_rate": 9.964235571773255e-05, "loss": 0.0237, "step": 8620 }, { "epoch": 12.938530734632684, "grad_norm": 0.2968873083591461, "learning_rate": 9.96403788817465e-05, "loss": 0.017, "step": 8630 }, { "epoch": 12.953523238380809, "grad_norm": 0.2570797801017761, "learning_rate": 9.963839661716325e-05, "loss": 0.018, "step": 8640 }, { "epoch": 12.968515742128936, "grad_norm": 0.40725937485694885, "learning_rate": 9.963640892419958e-05, "loss": 0.0253, "step": 8650 }, { "epoch": 12.983508245877061, "grad_norm": 0.21932215988636017, "learning_rate": 9.963441580307286e-05, "loss": 0.0194, "step": 8660 }, { "epoch": 12.998500749625187, "grad_norm": 0.3197121322154999, "learning_rate": 9.963241725400104e-05, "loss": 0.0208, "step": 8670 }, { "epoch": 13.013493253373314, "grad_norm": 0.3608223497867584, "learning_rate": 9.963041327720271e-05, "loss": 0.0247, "step": 8680 }, { "epoch": 13.02848575712144, "grad_norm": 0.4239412546157837, "learning_rate": 9.962840387289697e-05, "loss": 0.0198, "step": 8690 }, { "epoch": 13.043478260869565, "grad_norm": 0.4467112123966217, "learning_rate": 9.962638904130363e-05, "loss": 0.0235, "step": 8700 }, { "epoch": 13.058470764617692, "grad_norm": 0.2678614854812622, "learning_rate": 9.962436878264298e-05, "loss": 0.0214, "step": 8710 }, { "epoch": 13.073463268365817, "grad_norm": 0.33572936058044434, "learning_rate": 9.962234309713598e-05, "loss": 0.0201, "step": 8720 }, { "epoch": 13.088455772113942, "grad_norm": 0.24951788783073425, "learning_rate": 9.962031198500414e-05, "loss": 0.0258, "step": 8730 }, { "epoch": 13.10344827586207, "grad_norm": 0.19649356603622437, "learning_rate": 9.961827544646958e-05, "loss": 0.0205, "step": 8740 }, { "epoch": 13.118440779610195, "grad_norm": 0.26748397946357727, "learning_rate": 9.961623348175501e-05, "loss": 0.0183, "step": 8750 }, { "epoch": 13.13343328335832, "grad_norm": 0.5815973281860352, "learning_rate": 9.961418609108377e-05, "loss": 0.0255, "step": 8760 }, { "epoch": 13.148425787106447, "grad_norm": 0.5159278512001038, "learning_rate": 9.961213327467971e-05, "loss": 0.0253, "step": 8770 }, { "epoch": 13.163418290854572, "grad_norm": 0.47086313366889954, "learning_rate": 9.961007503276736e-05, "loss": 0.0254, "step": 8780 }, { "epoch": 13.178410794602698, "grad_norm": 0.33135613799095154, "learning_rate": 9.960801136557179e-05, "loss": 0.0264, "step": 8790 }, { "epoch": 13.193403298350825, "grad_norm": 0.30964338779449463, "learning_rate": 9.960594227331866e-05, "loss": 0.0196, "step": 8800 }, { "epoch": 13.20839580209895, "grad_norm": 0.3350619077682495, "learning_rate": 9.960386775623429e-05, "loss": 0.0259, "step": 8810 }, { "epoch": 13.223388305847077, "grad_norm": 0.27056971192359924, "learning_rate": 9.96017878145455e-05, "loss": 0.0169, "step": 8820 }, { "epoch": 13.238380809595203, "grad_norm": 0.30676814913749695, "learning_rate": 9.959970244847977e-05, "loss": 0.0241, "step": 8830 }, { "epoch": 13.253373313343328, "grad_norm": 0.25194278359413147, "learning_rate": 9.959761165826518e-05, "loss": 0.0184, "step": 8840 }, { "epoch": 13.268365817091455, "grad_norm": 0.4102483093738556, "learning_rate": 9.959551544413033e-05, "loss": 0.0181, "step": 8850 }, { "epoch": 13.28335832083958, "grad_norm": 0.2657543420791626, "learning_rate": 9.959341380630448e-05, "loss": 0.0231, "step": 8860 }, { "epoch": 13.298350824587706, "grad_norm": 0.32467472553253174, "learning_rate": 9.959130674501746e-05, "loss": 0.0221, "step": 8870 }, { "epoch": 13.313343328335833, "grad_norm": 0.24705550074577332, "learning_rate": 9.958919426049968e-05, "loss": 0.0213, "step": 8880 }, { "epoch": 13.328335832083958, "grad_norm": 0.324405699968338, "learning_rate": 9.958707635298219e-05, "loss": 0.0228, "step": 8890 }, { "epoch": 13.343328335832084, "grad_norm": 0.3217483460903168, "learning_rate": 9.958495302269657e-05, "loss": 0.0189, "step": 8900 }, { "epoch": 13.35832083958021, "grad_norm": 0.29670488834381104, "learning_rate": 9.958282426987503e-05, "loss": 0.0194, "step": 8910 }, { "epoch": 13.373313343328336, "grad_norm": 0.2527467906475067, "learning_rate": 9.95806900947504e-05, "loss": 0.0198, "step": 8920 }, { "epoch": 13.388305847076461, "grad_norm": 0.237270787358284, "learning_rate": 9.957855049755604e-05, "loss": 0.0163, "step": 8930 }, { "epoch": 13.403298350824588, "grad_norm": 0.3023013770580292, "learning_rate": 9.957640547852593e-05, "loss": 0.0162, "step": 8940 }, { "epoch": 13.418290854572714, "grad_norm": 0.30351436138153076, "learning_rate": 9.957425503789466e-05, "loss": 0.0203, "step": 8950 }, { "epoch": 13.433283358320839, "grad_norm": 0.20945307612419128, "learning_rate": 9.957209917589738e-05, "loss": 0.0199, "step": 8960 }, { "epoch": 13.448275862068966, "grad_norm": 0.3208000957965851, "learning_rate": 9.956993789276987e-05, "loss": 0.0173, "step": 8970 }, { "epoch": 13.463268365817092, "grad_norm": 0.4051738679409027, "learning_rate": 9.956777118874847e-05, "loss": 0.0208, "step": 8980 }, { "epoch": 13.478260869565217, "grad_norm": 0.3233533501625061, "learning_rate": 9.956559906407016e-05, "loss": 0.0189, "step": 8990 }, { "epoch": 13.493253373313344, "grad_norm": 0.2988022267818451, "learning_rate": 9.956342151897245e-05, "loss": 0.0207, "step": 9000 }, { "epoch": 13.50824587706147, "grad_norm": 0.19963739812374115, "learning_rate": 9.956123855369346e-05, "loss": 0.0169, "step": 9010 }, { "epoch": 13.523238380809595, "grad_norm": 0.3028227686882019, "learning_rate": 9.955905016847196e-05, "loss": 0.0148, "step": 9020 }, { "epoch": 13.538230884557722, "grad_norm": 0.2875560522079468, "learning_rate": 9.955685636354723e-05, "loss": 0.0189, "step": 9030 }, { "epoch": 13.553223388305847, "grad_norm": 0.5055646896362305, "learning_rate": 9.95546571391592e-05, "loss": 0.018, "step": 9040 }, { "epoch": 13.568215892053972, "grad_norm": 0.3410536050796509, "learning_rate": 9.955245249554837e-05, "loss": 0.0223, "step": 9050 }, { "epoch": 13.5832083958021, "grad_norm": 0.4022865891456604, "learning_rate": 9.955024243295582e-05, "loss": 0.0212, "step": 9060 }, { "epoch": 13.598200899550225, "grad_norm": 0.2921414375305176, "learning_rate": 9.954802695162328e-05, "loss": 0.0166, "step": 9070 }, { "epoch": 13.61319340329835, "grad_norm": 0.2788468301296234, "learning_rate": 9.954580605179302e-05, "loss": 0.0186, "step": 9080 }, { "epoch": 13.628185907046477, "grad_norm": 0.2542870342731476, "learning_rate": 9.954357973370788e-05, "loss": 0.0173, "step": 9090 }, { "epoch": 13.643178410794603, "grad_norm": 0.24120423197746277, "learning_rate": 9.954134799761135e-05, "loss": 0.0192, "step": 9100 }, { "epoch": 13.658170914542728, "grad_norm": 0.3773444592952728, "learning_rate": 9.953911084374748e-05, "loss": 0.0226, "step": 9110 }, { "epoch": 13.673163418290855, "grad_norm": 0.36152827739715576, "learning_rate": 9.953686827236093e-05, "loss": 0.0248, "step": 9120 }, { "epoch": 13.68815592203898, "grad_norm": 0.30583521723747253, "learning_rate": 9.953462028369695e-05, "loss": 0.0159, "step": 9130 }, { "epoch": 13.703148425787106, "grad_norm": 0.33813372254371643, "learning_rate": 9.953236687800136e-05, "loss": 0.0221, "step": 9140 }, { "epoch": 13.718140929535233, "grad_norm": 0.31283822655677795, "learning_rate": 9.95301080555206e-05, "loss": 0.0209, "step": 9150 }, { "epoch": 13.733133433283358, "grad_norm": 0.25571706891059875, "learning_rate": 9.952784381650171e-05, "loss": 0.0223, "step": 9160 }, { "epoch": 13.748125937031483, "grad_norm": 0.41844642162323, "learning_rate": 9.952557416119226e-05, "loss": 0.0215, "step": 9170 }, { "epoch": 13.76311844077961, "grad_norm": 0.368600457906723, "learning_rate": 9.95232990898405e-05, "loss": 0.023, "step": 9180 }, { "epoch": 13.778110944527736, "grad_norm": 0.3875691294670105, "learning_rate": 9.95210186026952e-05, "loss": 0.0202, "step": 9190 }, { "epoch": 13.793103448275861, "grad_norm": 0.19624242186546326, "learning_rate": 9.951873270000576e-05, "loss": 0.0245, "step": 9200 }, { "epoch": 13.808095952023988, "grad_norm": 0.45546120405197144, "learning_rate": 9.951644138202216e-05, "loss": 0.0216, "step": 9210 }, { "epoch": 13.823088455772114, "grad_norm": 0.24161510169506073, "learning_rate": 9.951414464899498e-05, "loss": 0.0264, "step": 9220 }, { "epoch": 13.83808095952024, "grad_norm": 0.3400627076625824, "learning_rate": 9.951184250117538e-05, "loss": 0.0234, "step": 9230 }, { "epoch": 13.853073463268366, "grad_norm": 0.34753721952438354, "learning_rate": 9.950953493881513e-05, "loss": 0.0218, "step": 9240 }, { "epoch": 13.868065967016491, "grad_norm": 0.3442288041114807, "learning_rate": 9.950722196216658e-05, "loss": 0.024, "step": 9250 }, { "epoch": 13.883058470764619, "grad_norm": 0.4176729917526245, "learning_rate": 9.950490357148265e-05, "loss": 0.0205, "step": 9260 }, { "epoch": 13.898050974512744, "grad_norm": 0.3150755763053894, "learning_rate": 9.950257976701692e-05, "loss": 0.0225, "step": 9270 }, { "epoch": 13.91304347826087, "grad_norm": 0.2414959818124771, "learning_rate": 9.950025054902348e-05, "loss": 0.0179, "step": 9280 }, { "epoch": 13.928035982008996, "grad_norm": 0.3287509083747864, "learning_rate": 9.949791591775706e-05, "loss": 0.0198, "step": 9290 }, { "epoch": 13.943028485757122, "grad_norm": 0.38492098450660706, "learning_rate": 9.949557587347298e-05, "loss": 0.0232, "step": 9300 }, { "epoch": 13.958020989505247, "grad_norm": 0.27235740423202515, "learning_rate": 9.949323041642713e-05, "loss": 0.0197, "step": 9310 }, { "epoch": 13.973013493253374, "grad_norm": 0.5159173607826233, "learning_rate": 9.949087954687602e-05, "loss": 0.0212, "step": 9320 }, { "epoch": 13.9880059970015, "grad_norm": 0.3702939748764038, "learning_rate": 9.948852326507672e-05, "loss": 0.0207, "step": 9330 }, { "epoch": 14.002998500749625, "grad_norm": 0.37264329195022583, "learning_rate": 9.948616157128694e-05, "loss": 0.0219, "step": 9340 }, { "epoch": 14.017991004497752, "grad_norm": 0.34394803643226624, "learning_rate": 9.948379446576493e-05, "loss": 0.0187, "step": 9350 }, { "epoch": 14.032983508245877, "grad_norm": 0.2710411250591278, "learning_rate": 9.948142194876952e-05, "loss": 0.0224, "step": 9360 }, { "epoch": 14.047976011994002, "grad_norm": 0.320766806602478, "learning_rate": 9.947904402056024e-05, "loss": 0.0243, "step": 9370 }, { "epoch": 14.06296851574213, "grad_norm": 0.22861100733280182, "learning_rate": 9.947666068139708e-05, "loss": 0.0201, "step": 9380 }, { "epoch": 14.077961019490255, "grad_norm": 0.25473904609680176, "learning_rate": 9.947427193154071e-05, "loss": 0.0201, "step": 9390 }, { "epoch": 14.09295352323838, "grad_norm": 0.2736515700817108, "learning_rate": 9.947187777125233e-05, "loss": 0.0207, "step": 9400 }, { "epoch": 14.107946026986507, "grad_norm": 0.252525269985199, "learning_rate": 9.946947820079377e-05, "loss": 0.021, "step": 9410 }, { "epoch": 14.122938530734633, "grad_norm": 0.2318815141916275, "learning_rate": 9.946707322042747e-05, "loss": 0.0188, "step": 9420 }, { "epoch": 14.137931034482758, "grad_norm": 0.3196122646331787, "learning_rate": 9.94646628304164e-05, "loss": 0.0189, "step": 9430 }, { "epoch": 14.152923538230885, "grad_norm": 0.3823380470275879, "learning_rate": 9.946224703102418e-05, "loss": 0.0248, "step": 9440 }, { "epoch": 14.16791604197901, "grad_norm": 0.4808330237865448, "learning_rate": 9.945982582251498e-05, "loss": 0.0249, "step": 9450 }, { "epoch": 14.182908545727136, "grad_norm": 0.18593814969062805, "learning_rate": 9.94573992051536e-05, "loss": 0.0212, "step": 9460 }, { "epoch": 14.197901049475263, "grad_norm": 0.3025306165218353, "learning_rate": 9.94549671792054e-05, "loss": 0.0222, "step": 9470 }, { "epoch": 14.212893553223388, "grad_norm": 0.34669092297554016, "learning_rate": 9.945252974493635e-05, "loss": 0.0196, "step": 9480 }, { "epoch": 14.227886056971514, "grad_norm": 0.28056439757347107, "learning_rate": 9.9450086902613e-05, "loss": 0.0219, "step": 9490 }, { "epoch": 14.24287856071964, "grad_norm": 0.4071255624294281, "learning_rate": 9.944763865250248e-05, "loss": 0.0195, "step": 9500 }, { "epoch": 14.257871064467766, "grad_norm": 0.27808547019958496, "learning_rate": 9.944518499487254e-05, "loss": 0.0242, "step": 9510 }, { "epoch": 14.272863568215891, "grad_norm": 0.4313402771949768, "learning_rate": 9.944272592999151e-05, "loss": 0.0228, "step": 9520 }, { "epoch": 14.287856071964018, "grad_norm": 0.5299051403999329, "learning_rate": 9.94402614581283e-05, "loss": 0.0215, "step": 9530 }, { "epoch": 14.302848575712144, "grad_norm": 0.39898914098739624, "learning_rate": 9.943779157955244e-05, "loss": 0.0262, "step": 9540 }, { "epoch": 14.317841079460269, "grad_norm": 0.44068223237991333, "learning_rate": 9.943531629453403e-05, "loss": 0.0229, "step": 9550 }, { "epoch": 14.332833583208396, "grad_norm": 0.26288914680480957, "learning_rate": 9.943283560334375e-05, "loss": 0.0226, "step": 9560 }, { "epoch": 14.347826086956522, "grad_norm": 0.34288346767425537, "learning_rate": 9.943034950625288e-05, "loss": 0.021, "step": 9570 }, { "epoch": 14.362818590704647, "grad_norm": 0.5901798009872437, "learning_rate": 9.942785800353332e-05, "loss": 0.0224, "step": 9580 }, { "epoch": 14.377811094452774, "grad_norm": 0.24762094020843506, "learning_rate": 9.942536109545751e-05, "loss": 0.0219, "step": 9590 }, { "epoch": 14.3928035982009, "grad_norm": 0.3555395305156708, "learning_rate": 9.942285878229853e-05, "loss": 0.0195, "step": 9600 }, { "epoch": 14.407796101949025, "grad_norm": 0.3074661195278168, "learning_rate": 9.942035106433001e-05, "loss": 0.0243, "step": 9610 }, { "epoch": 14.422788605697152, "grad_norm": 0.37400805950164795, "learning_rate": 9.94178379418262e-05, "loss": 0.0256, "step": 9620 }, { "epoch": 14.437781109445277, "grad_norm": 0.2315872609615326, "learning_rate": 9.941531941506194e-05, "loss": 0.0221, "step": 9630 }, { "epoch": 14.452773613193404, "grad_norm": 0.3173655569553375, "learning_rate": 9.941279548431263e-05, "loss": 0.0171, "step": 9640 }, { "epoch": 14.46776611694153, "grad_norm": 0.4141431748867035, "learning_rate": 9.941026614985431e-05, "loss": 0.0203, "step": 9650 }, { "epoch": 14.482758620689655, "grad_norm": 0.33221590518951416, "learning_rate": 9.940773141196357e-05, "loss": 0.0225, "step": 9660 }, { "epoch": 14.497751124437782, "grad_norm": 0.445909708738327, "learning_rate": 9.94051912709176e-05, "loss": 0.0216, "step": 9670 }, { "epoch": 14.512743628185907, "grad_norm": 0.19096921384334564, "learning_rate": 9.940264572699421e-05, "loss": 0.0203, "step": 9680 }, { "epoch": 14.527736131934033, "grad_norm": 0.4575099050998688, "learning_rate": 9.940009478047174e-05, "loss": 0.0208, "step": 9690 }, { "epoch": 14.54272863568216, "grad_norm": 0.41168755292892456, "learning_rate": 9.939753843162918e-05, "loss": 0.0183, "step": 9700 }, { "epoch": 14.557721139430285, "grad_norm": 0.36744481325149536, "learning_rate": 9.939497668074609e-05, "loss": 0.0195, "step": 9710 }, { "epoch": 14.57271364317841, "grad_norm": 0.27455320954322815, "learning_rate": 9.93924095281026e-05, "loss": 0.0213, "step": 9720 }, { "epoch": 14.587706146926537, "grad_norm": 0.3709152936935425, "learning_rate": 9.938983697397948e-05, "loss": 0.0222, "step": 9730 }, { "epoch": 14.602698650674663, "grad_norm": 0.2630356252193451, "learning_rate": 9.938725901865805e-05, "loss": 0.0217, "step": 9740 }, { "epoch": 14.617691154422788, "grad_norm": 0.36606842279434204, "learning_rate": 9.93846756624202e-05, "loss": 0.0213, "step": 9750 }, { "epoch": 14.632683658170915, "grad_norm": 0.2851661443710327, "learning_rate": 9.938208690554849e-05, "loss": 0.0194, "step": 9760 }, { "epoch": 14.64767616191904, "grad_norm": 0.3013788163661957, "learning_rate": 9.9379492748326e-05, "loss": 0.0205, "step": 9770 }, { "epoch": 14.662668665667166, "grad_norm": 0.3037647008895874, "learning_rate": 9.937689319103641e-05, "loss": 0.0234, "step": 9780 }, { "epoch": 14.677661169415293, "grad_norm": 0.4145650565624237, "learning_rate": 9.937428823396404e-05, "loss": 0.0242, "step": 9790 }, { "epoch": 14.692653673163418, "grad_norm": 0.4470023810863495, "learning_rate": 9.937167787739372e-05, "loss": 0.021, "step": 9800 }, { "epoch": 14.707646176911544, "grad_norm": 0.20968367159366608, "learning_rate": 9.936906212161095e-05, "loss": 0.0199, "step": 9810 }, { "epoch": 14.72263868065967, "grad_norm": 0.3800349831581116, "learning_rate": 9.936644096690176e-05, "loss": 0.02, "step": 9820 }, { "epoch": 14.737631184407796, "grad_norm": 0.2903740704059601, "learning_rate": 9.936381441355282e-05, "loss": 0.0242, "step": 9830 }, { "epoch": 14.752623688155921, "grad_norm": 0.24798911809921265, "learning_rate": 9.936118246185136e-05, "loss": 0.02, "step": 9840 }, { "epoch": 14.767616191904049, "grad_norm": 0.20470072329044342, "learning_rate": 9.935854511208518e-05, "loss": 0.0214, "step": 9850 }, { "epoch": 14.782608695652174, "grad_norm": 0.4413379430770874, "learning_rate": 9.935590236454272e-05, "loss": 0.0219, "step": 9860 }, { "epoch": 14.7976011994003, "grad_norm": 0.24200640618801117, "learning_rate": 9.935325421951298e-05, "loss": 0.0211, "step": 9870 }, { "epoch": 14.812593703148426, "grad_norm": 0.2405657321214676, "learning_rate": 9.935060067728557e-05, "loss": 0.02, "step": 9880 }, { "epoch": 14.827586206896552, "grad_norm": 0.37425583600997925, "learning_rate": 9.934794173815067e-05, "loss": 0.0216, "step": 9890 }, { "epoch": 14.842578710644677, "grad_norm": 0.27257204055786133, "learning_rate": 9.934527740239906e-05, "loss": 0.0214, "step": 9900 }, { "epoch": 14.857571214392804, "grad_norm": 0.2446412742137909, "learning_rate": 9.934260767032209e-05, "loss": 0.0173, "step": 9910 }, { "epoch": 14.87256371814093, "grad_norm": 0.19735027849674225, "learning_rate": 9.933993254221172e-05, "loss": 0.026, "step": 9920 }, { "epoch": 14.887556221889055, "grad_norm": 0.34004735946655273, "learning_rate": 9.933725201836053e-05, "loss": 0.0169, "step": 9930 }, { "epoch": 14.902548725637182, "grad_norm": 0.5104056596755981, "learning_rate": 9.933456609906162e-05, "loss": 0.0264, "step": 9940 }, { "epoch": 14.917541229385307, "grad_norm": 0.3044484257698059, "learning_rate": 9.933187478460875e-05, "loss": 0.0208, "step": 9950 }, { "epoch": 14.932533733133432, "grad_norm": 0.3192273676395416, "learning_rate": 9.93291780752962e-05, "loss": 0.0254, "step": 9960 }, { "epoch": 14.94752623688156, "grad_norm": 0.23500655591487885, "learning_rate": 9.932647597141893e-05, "loss": 0.0203, "step": 9970 }, { "epoch": 14.962518740629685, "grad_norm": 0.2807745337486267, "learning_rate": 9.932376847327239e-05, "loss": 0.0217, "step": 9980 }, { "epoch": 14.977511244377812, "grad_norm": 0.4010413587093353, "learning_rate": 9.932105558115268e-05, "loss": 0.0224, "step": 9990 }, { "epoch": 14.992503748125937, "grad_norm": 0.29456183314323425, "learning_rate": 9.931833729535651e-05, "loss": 0.0209, "step": 10000 }, { "epoch": 15.007496251874063, "grad_norm": 0.45307254791259766, "learning_rate": 9.931561361618111e-05, "loss": 0.024, "step": 10010 }, { "epoch": 15.02248875562219, "grad_norm": 0.2962070405483246, "learning_rate": 9.931288454392435e-05, "loss": 0.0199, "step": 10020 }, { "epoch": 15.037481259370315, "grad_norm": 0.4152054190635681, "learning_rate": 9.931015007888467e-05, "loss": 0.0195, "step": 10030 }, { "epoch": 15.05247376311844, "grad_norm": 0.25698643922805786, "learning_rate": 9.930741022136112e-05, "loss": 0.024, "step": 10040 }, { "epoch": 15.067466266866568, "grad_norm": 0.29116350412368774, "learning_rate": 9.930466497165333e-05, "loss": 0.0188, "step": 10050 }, { "epoch": 15.082458770614693, "grad_norm": 0.29714885354042053, "learning_rate": 9.93019143300615e-05, "loss": 0.0195, "step": 10060 }, { "epoch": 15.097451274362818, "grad_norm": 0.31359878182411194, "learning_rate": 9.929915829688644e-05, "loss": 0.0202, "step": 10070 }, { "epoch": 15.112443778110945, "grad_norm": 0.3190551698207855, "learning_rate": 9.929639687242955e-05, "loss": 0.0204, "step": 10080 }, { "epoch": 15.12743628185907, "grad_norm": 0.33053678274154663, "learning_rate": 9.929363005699281e-05, "loss": 0.0187, "step": 10090 }, { "epoch": 15.142428785607196, "grad_norm": 0.3594664931297302, "learning_rate": 9.92908578508788e-05, "loss": 0.018, "step": 10100 }, { "epoch": 15.157421289355323, "grad_norm": 0.24445463716983795, "learning_rate": 9.928808025439069e-05, "loss": 0.0227, "step": 10110 }, { "epoch": 15.172413793103448, "grad_norm": 0.3703429102897644, "learning_rate": 9.928529726783223e-05, "loss": 0.0187, "step": 10120 }, { "epoch": 15.187406296851574, "grad_norm": 0.37695813179016113, "learning_rate": 9.928250889150774e-05, "loss": 0.0228, "step": 10130 }, { "epoch": 15.2023988005997, "grad_norm": 0.240004301071167, "learning_rate": 9.92797151257222e-05, "loss": 0.0174, "step": 10140 }, { "epoch": 15.217391304347826, "grad_norm": 0.3754497170448303, "learning_rate": 9.927691597078108e-05, "loss": 0.0201, "step": 10150 }, { "epoch": 15.232383808095951, "grad_norm": 0.3483123183250427, "learning_rate": 9.927411142699053e-05, "loss": 0.0188, "step": 10160 }, { "epoch": 15.247376311844079, "grad_norm": 0.3730141520500183, "learning_rate": 9.927130149465725e-05, "loss": 0.0212, "step": 10170 }, { "epoch": 15.262368815592204, "grad_norm": 0.2580997943878174, "learning_rate": 9.92684861740885e-05, "loss": 0.0213, "step": 10180 }, { "epoch": 15.27736131934033, "grad_norm": 0.3934812545776367, "learning_rate": 9.926566546559217e-05, "loss": 0.0202, "step": 10190 }, { "epoch": 15.292353823088456, "grad_norm": 0.35997021198272705, "learning_rate": 9.926283936947673e-05, "loss": 0.019, "step": 10200 }, { "epoch": 15.307346326836582, "grad_norm": 0.41819626092910767, "learning_rate": 9.926000788605126e-05, "loss": 0.021, "step": 10210 }, { "epoch": 15.322338830584707, "grad_norm": 0.1852847784757614, "learning_rate": 9.92571710156254e-05, "loss": 0.0175, "step": 10220 }, { "epoch": 15.337331334332834, "grad_norm": 0.2780044376850128, "learning_rate": 9.925432875850936e-05, "loss": 0.0209, "step": 10230 }, { "epoch": 15.35232383808096, "grad_norm": 0.2904166281223297, "learning_rate": 9.925148111501396e-05, "loss": 0.0174, "step": 10240 }, { "epoch": 15.367316341829085, "grad_norm": 0.1931580752134323, "learning_rate": 9.924862808545066e-05, "loss": 0.0174, "step": 10250 }, { "epoch": 15.382308845577212, "grad_norm": 0.3456142842769623, "learning_rate": 9.924576967013141e-05, "loss": 0.0201, "step": 10260 }, { "epoch": 15.397301349325337, "grad_norm": 0.3927532732486725, "learning_rate": 9.924290586936887e-05, "loss": 0.0216, "step": 10270 }, { "epoch": 15.412293853073463, "grad_norm": 0.36971935629844666, "learning_rate": 9.924003668347614e-05, "loss": 0.0217, "step": 10280 }, { "epoch": 15.42728635682159, "grad_norm": 0.33837950229644775, "learning_rate": 9.923716211276704e-05, "loss": 0.0233, "step": 10290 }, { "epoch": 15.442278860569715, "grad_norm": 0.3072681427001953, "learning_rate": 9.923428215755594e-05, "loss": 0.0216, "step": 10300 }, { "epoch": 15.45727136431784, "grad_norm": 0.24246111512184143, "learning_rate": 9.923139681815775e-05, "loss": 0.0191, "step": 10310 }, { "epoch": 15.472263868065967, "grad_norm": 0.288167268037796, "learning_rate": 9.922850609488801e-05, "loss": 0.0218, "step": 10320 }, { "epoch": 15.487256371814093, "grad_norm": 0.3855711817741394, "learning_rate": 9.922560998806287e-05, "loss": 0.018, "step": 10330 }, { "epoch": 15.502248875562218, "grad_norm": 0.17815515398979187, "learning_rate": 9.922270849799905e-05, "loss": 0.0159, "step": 10340 }, { "epoch": 15.517241379310345, "grad_norm": 0.22839263081550598, "learning_rate": 9.92198016250138e-05, "loss": 0.0181, "step": 10350 }, { "epoch": 15.53223388305847, "grad_norm": 0.13210536539554596, "learning_rate": 9.921688936942506e-05, "loss": 0.0145, "step": 10360 }, { "epoch": 15.547226386806596, "grad_norm": 0.26111850142478943, "learning_rate": 9.921397173155129e-05, "loss": 0.0178, "step": 10370 }, { "epoch": 15.562218890554723, "grad_norm": 0.4395041763782501, "learning_rate": 9.921104871171157e-05, "loss": 0.0231, "step": 10380 }, { "epoch": 15.577211394302848, "grad_norm": 0.40534302592277527, "learning_rate": 9.920812031022554e-05, "loss": 0.0165, "step": 10390 }, { "epoch": 15.592203898050975, "grad_norm": 0.35182976722717285, "learning_rate": 9.920518652741348e-05, "loss": 0.0209, "step": 10400 }, { "epoch": 15.6071964017991, "grad_norm": 0.28993621468544006, "learning_rate": 9.920224736359618e-05, "loss": 0.0174, "step": 10410 }, { "epoch": 15.622188905547226, "grad_norm": 0.254412978887558, "learning_rate": 9.91993028190951e-05, "loss": 0.017, "step": 10420 }, { "epoch": 15.637181409295351, "grad_norm": 0.31224337220191956, "learning_rate": 9.919635289423222e-05, "loss": 0.0162, "step": 10430 }, { "epoch": 15.652173913043478, "grad_norm": 0.4177940785884857, "learning_rate": 9.919339758933015e-05, "loss": 0.0218, "step": 10440 }, { "epoch": 15.667166416791604, "grad_norm": 0.4095781445503235, "learning_rate": 9.919043690471209e-05, "loss": 0.0203, "step": 10450 }, { "epoch": 15.682158920539731, "grad_norm": 0.2077493667602539, "learning_rate": 9.91874708407018e-05, "loss": 0.0194, "step": 10460 }, { "epoch": 15.697151424287856, "grad_norm": 0.35046902298927307, "learning_rate": 9.918449939762367e-05, "loss": 0.0212, "step": 10470 }, { "epoch": 15.712143928035982, "grad_norm": 0.23753350973129272, "learning_rate": 9.91815225758026e-05, "loss": 0.0181, "step": 10480 }, { "epoch": 15.727136431784109, "grad_norm": 0.2182263880968094, "learning_rate": 9.917854037556419e-05, "loss": 0.0182, "step": 10490 }, { "epoch": 15.742128935532234, "grad_norm": 0.28410306572914124, "learning_rate": 9.917555279723454e-05, "loss": 0.0173, "step": 10500 }, { "epoch": 15.75712143928036, "grad_norm": 0.23080061376094818, "learning_rate": 9.917255984114036e-05, "loss": 0.0171, "step": 10510 }, { "epoch": 15.772113943028486, "grad_norm": 0.3656279742717743, "learning_rate": 9.916956150760896e-05, "loss": 0.0204, "step": 10520 }, { "epoch": 15.787106446776612, "grad_norm": 0.2392081767320633, "learning_rate": 9.916655779696826e-05, "loss": 0.019, "step": 10530 }, { "epoch": 15.802098950524737, "grad_norm": 0.41153258085250854, "learning_rate": 9.916354870954671e-05, "loss": 0.0179, "step": 10540 }, { "epoch": 15.817091454272864, "grad_norm": 0.44127511978149414, "learning_rate": 9.91605342456734e-05, "loss": 0.0168, "step": 10550 }, { "epoch": 15.83208395802099, "grad_norm": 0.31889671087265015, "learning_rate": 9.915751440567795e-05, "loss": 0.02, "step": 10560 }, { "epoch": 15.847076461769115, "grad_norm": 0.3816819190979004, "learning_rate": 9.915448918989066e-05, "loss": 0.0213, "step": 10570 }, { "epoch": 15.862068965517242, "grad_norm": 0.3745889663696289, "learning_rate": 9.915145859864232e-05, "loss": 0.0189, "step": 10580 }, { "epoch": 15.877061469265367, "grad_norm": 0.1976807415485382, "learning_rate": 9.914842263226437e-05, "loss": 0.0183, "step": 10590 }, { "epoch": 15.892053973013493, "grad_norm": 0.31275448203086853, "learning_rate": 9.914538129108882e-05, "loss": 0.0176, "step": 10600 }, { "epoch": 15.90704647676162, "grad_norm": 0.2515629827976227, "learning_rate": 9.914233457544825e-05, "loss": 0.0195, "step": 10610 }, { "epoch": 15.922038980509745, "grad_norm": 0.3888002932071686, "learning_rate": 9.913928248567586e-05, "loss": 0.0176, "step": 10620 }, { "epoch": 15.93703148425787, "grad_norm": 0.21307945251464844, "learning_rate": 9.913622502210542e-05, "loss": 0.018, "step": 10630 }, { "epoch": 15.952023988005998, "grad_norm": 0.23039020597934723, "learning_rate": 9.913316218507128e-05, "loss": 0.017, "step": 10640 }, { "epoch": 15.967016491754123, "grad_norm": 0.21541538834571838, "learning_rate": 9.91300939749084e-05, "loss": 0.0173, "step": 10650 }, { "epoch": 15.982008995502248, "grad_norm": 0.49631986021995544, "learning_rate": 9.91270203919523e-05, "loss": 0.023, "step": 10660 }, { "epoch": 15.997001499250375, "grad_norm": 0.1981402188539505, "learning_rate": 9.912394143653912e-05, "loss": 0.0169, "step": 10670 }, { "epoch": 16.011994002998502, "grad_norm": 0.2386118471622467, "learning_rate": 9.912085710900555e-05, "loss": 0.021, "step": 10680 }, { "epoch": 16.026986506746628, "grad_norm": 0.26707810163497925, "learning_rate": 9.911776740968892e-05, "loss": 0.0168, "step": 10690 }, { "epoch": 16.041979010494753, "grad_norm": 0.29031985998153687, "learning_rate": 9.911467233892709e-05, "loss": 0.0191, "step": 10700 }, { "epoch": 16.05697151424288, "grad_norm": 0.2965391278266907, "learning_rate": 9.911157189705853e-05, "loss": 0.0174, "step": 10710 }, { "epoch": 16.071964017991004, "grad_norm": 0.3583792448043823, "learning_rate": 9.910846608442229e-05, "loss": 0.026, "step": 10720 }, { "epoch": 16.08695652173913, "grad_norm": 0.2740519046783447, "learning_rate": 9.910535490135805e-05, "loss": 0.0161, "step": 10730 }, { "epoch": 16.101949025487258, "grad_norm": 0.26954174041748047, "learning_rate": 9.910223834820603e-05, "loss": 0.0179, "step": 10740 }, { "epoch": 16.116941529235383, "grad_norm": 0.30370843410491943, "learning_rate": 9.909911642530703e-05, "loss": 0.0179, "step": 10750 }, { "epoch": 16.13193403298351, "grad_norm": 0.3229650855064392, "learning_rate": 9.909598913300249e-05, "loss": 0.021, "step": 10760 }, { "epoch": 16.146926536731634, "grad_norm": 0.18661777675151825, "learning_rate": 9.909285647163438e-05, "loss": 0.0169, "step": 10770 }, { "epoch": 16.16191904047976, "grad_norm": 0.28280550241470337, "learning_rate": 9.908971844154531e-05, "loss": 0.0187, "step": 10780 }, { "epoch": 16.176911544227885, "grad_norm": 0.34680965542793274, "learning_rate": 9.908657504307843e-05, "loss": 0.022, "step": 10790 }, { "epoch": 16.191904047976013, "grad_norm": 0.297730028629303, "learning_rate": 9.908342627657751e-05, "loss": 0.0193, "step": 10800 }, { "epoch": 16.20689655172414, "grad_norm": 0.3596203029155731, "learning_rate": 9.908027214238689e-05, "loss": 0.0196, "step": 10810 }, { "epoch": 16.221889055472264, "grad_norm": 0.24421323835849762, "learning_rate": 9.90771126408515e-05, "loss": 0.0222, "step": 10820 }, { "epoch": 16.23688155922039, "grad_norm": 0.41479021310806274, "learning_rate": 9.907394777231685e-05, "loss": 0.0216, "step": 10830 }, { "epoch": 16.251874062968515, "grad_norm": 0.3512302339076996, "learning_rate": 9.907077753712905e-05, "loss": 0.021, "step": 10840 }, { "epoch": 16.26686656671664, "grad_norm": 0.15503600239753723, "learning_rate": 9.906760193563482e-05, "loss": 0.0191, "step": 10850 }, { "epoch": 16.28185907046477, "grad_norm": 0.358948677778244, "learning_rate": 9.906442096818139e-05, "loss": 0.0153, "step": 10860 }, { "epoch": 16.296851574212894, "grad_norm": 0.24552708864212036, "learning_rate": 9.906123463511665e-05, "loss": 0.0202, "step": 10870 }, { "epoch": 16.31184407796102, "grad_norm": 0.2544082999229431, "learning_rate": 9.905804293678907e-05, "loss": 0.0212, "step": 10880 }, { "epoch": 16.326836581709145, "grad_norm": 0.18420709669589996, "learning_rate": 9.905484587354766e-05, "loss": 0.015, "step": 10890 }, { "epoch": 16.34182908545727, "grad_norm": 0.3693162798881531, "learning_rate": 9.905164344574205e-05, "loss": 0.0177, "step": 10900 }, { "epoch": 16.356821589205396, "grad_norm": 0.32234835624694824, "learning_rate": 9.904843565372248e-05, "loss": 0.0175, "step": 10910 }, { "epoch": 16.371814092953525, "grad_norm": 0.2633249759674072, "learning_rate": 9.904522249783972e-05, "loss": 0.0178, "step": 10920 }, { "epoch": 16.38680659670165, "grad_norm": 0.24866075813770294, "learning_rate": 9.904200397844517e-05, "loss": 0.0211, "step": 10930 }, { "epoch": 16.401799100449775, "grad_norm": 0.32512161135673523, "learning_rate": 9.903878009589078e-05, "loss": 0.0199, "step": 10940 }, { "epoch": 16.4167916041979, "grad_norm": 0.20880967378616333, "learning_rate": 9.903555085052915e-05, "loss": 0.021, "step": 10950 }, { "epoch": 16.431784107946026, "grad_norm": 0.2537990212440491, "learning_rate": 9.903231624271338e-05, "loss": 0.0163, "step": 10960 }, { "epoch": 16.446776611694155, "grad_norm": 0.28112873435020447, "learning_rate": 9.902907627279724e-05, "loss": 0.0192, "step": 10970 }, { "epoch": 16.46176911544228, "grad_norm": 0.24191202223300934, "learning_rate": 9.902583094113504e-05, "loss": 0.0183, "step": 10980 }, { "epoch": 16.476761619190405, "grad_norm": 0.19190922379493713, "learning_rate": 9.902258024808168e-05, "loss": 0.0196, "step": 10990 }, { "epoch": 16.49175412293853, "grad_norm": 0.39885175228118896, "learning_rate": 9.901932419399264e-05, "loss": 0.0243, "step": 11000 }, { "epoch": 16.506746626686656, "grad_norm": 0.247291699051857, "learning_rate": 9.9016062779224e-05, "loss": 0.0177, "step": 11010 }, { "epoch": 16.52173913043478, "grad_norm": 0.2530309855937958, "learning_rate": 9.901279600413242e-05, "loss": 0.018, "step": 11020 }, { "epoch": 16.53673163418291, "grad_norm": 0.4303077161312103, "learning_rate": 9.900952386907518e-05, "loss": 0.022, "step": 11030 }, { "epoch": 16.551724137931036, "grad_norm": 0.26448604464530945, "learning_rate": 9.90062463744101e-05, "loss": 0.018, "step": 11040 }, { "epoch": 16.56671664167916, "grad_norm": 0.2114461362361908, "learning_rate": 9.900296352049558e-05, "loss": 0.0158, "step": 11050 }, { "epoch": 16.581709145427286, "grad_norm": 0.18887227773666382, "learning_rate": 9.899967530769065e-05, "loss": 0.0174, "step": 11060 }, { "epoch": 16.59670164917541, "grad_norm": 0.2169555276632309, "learning_rate": 9.899638173635489e-05, "loss": 0.0181, "step": 11070 }, { "epoch": 16.611694152923537, "grad_norm": 0.24610064923763275, "learning_rate": 9.899308280684849e-05, "loss": 0.019, "step": 11080 }, { "epoch": 16.626686656671666, "grad_norm": 0.2696317434310913, "learning_rate": 9.898977851953222e-05, "loss": 0.0181, "step": 11090 }, { "epoch": 16.64167916041979, "grad_norm": 0.31641364097595215, "learning_rate": 9.898646887476741e-05, "loss": 0.0216, "step": 11100 }, { "epoch": 16.656671664167916, "grad_norm": 0.28689059615135193, "learning_rate": 9.898315387291603e-05, "loss": 0.0248, "step": 11110 }, { "epoch": 16.671664167916042, "grad_norm": 0.36883342266082764, "learning_rate": 9.89798335143406e-05, "loss": 0.0217, "step": 11120 }, { "epoch": 16.686656671664167, "grad_norm": 0.42866501212120056, "learning_rate": 9.897650779940419e-05, "loss": 0.0221, "step": 11130 }, { "epoch": 16.701649175412292, "grad_norm": 0.37753352522850037, "learning_rate": 9.897317672847054e-05, "loss": 0.0204, "step": 11140 }, { "epoch": 16.71664167916042, "grad_norm": 0.26366615295410156, "learning_rate": 9.89698403019039e-05, "loss": 0.0242, "step": 11150 }, { "epoch": 16.731634182908547, "grad_norm": 0.43204036355018616, "learning_rate": 9.896649852006917e-05, "loss": 0.0217, "step": 11160 }, { "epoch": 16.746626686656672, "grad_norm": 0.21364203095436096, "learning_rate": 9.896315138333177e-05, "loss": 0.0193, "step": 11170 }, { "epoch": 16.761619190404797, "grad_norm": 0.27817466855049133, "learning_rate": 9.895979889205774e-05, "loss": 0.0187, "step": 11180 }, { "epoch": 16.776611694152923, "grad_norm": 0.35245922207832336, "learning_rate": 9.895644104661372e-05, "loss": 0.0211, "step": 11190 }, { "epoch": 16.791604197901048, "grad_norm": 0.2736836075782776, "learning_rate": 9.895307784736691e-05, "loss": 0.023, "step": 11200 }, { "epoch": 16.806596701649177, "grad_norm": 0.23852862417697906, "learning_rate": 9.894970929468512e-05, "loss": 0.0217, "step": 11210 }, { "epoch": 16.821589205397302, "grad_norm": 0.17514967918395996, "learning_rate": 9.89463353889367e-05, "loss": 0.0165, "step": 11220 }, { "epoch": 16.836581709145428, "grad_norm": 0.25887835025787354, "learning_rate": 9.894295613049065e-05, "loss": 0.0176, "step": 11230 }, { "epoch": 16.851574212893553, "grad_norm": 0.2702782452106476, "learning_rate": 9.893957151971649e-05, "loss": 0.0174, "step": 11240 }, { "epoch": 16.866566716641678, "grad_norm": 0.2413305938243866, "learning_rate": 9.893618155698436e-05, "loss": 0.0208, "step": 11250 }, { "epoch": 16.881559220389803, "grad_norm": 0.2475152313709259, "learning_rate": 9.8932786242665e-05, "loss": 0.0199, "step": 11260 }, { "epoch": 16.896551724137932, "grad_norm": 0.3219253420829773, "learning_rate": 9.89293855771297e-05, "loss": 0.019, "step": 11270 }, { "epoch": 16.911544227886058, "grad_norm": 0.8385009169578552, "learning_rate": 9.892597956075036e-05, "loss": 0.0213, "step": 11280 }, { "epoch": 16.926536731634183, "grad_norm": 0.39484721422195435, "learning_rate": 9.892256819389947e-05, "loss": 0.0197, "step": 11290 }, { "epoch": 16.94152923538231, "grad_norm": 0.38393718004226685, "learning_rate": 9.891915147695006e-05, "loss": 0.025, "step": 11300 }, { "epoch": 16.956521739130434, "grad_norm": 0.3013901710510254, "learning_rate": 9.891572941027577e-05, "loss": 0.0182, "step": 11310 }, { "epoch": 16.97151424287856, "grad_norm": 0.2502027153968811, "learning_rate": 9.89123019942509e-05, "loss": 0.022, "step": 11320 }, { "epoch": 16.986506746626688, "grad_norm": 0.31390711665153503, "learning_rate": 9.89088692292502e-05, "loss": 0.0205, "step": 11330 }, { "epoch": 17.001499250374813, "grad_norm": 0.274952232837677, "learning_rate": 9.89054311156491e-05, "loss": 0.0209, "step": 11340 }, { "epoch": 17.01649175412294, "grad_norm": 0.20174916088581085, "learning_rate": 9.890198765382357e-05, "loss": 0.0206, "step": 11350 }, { "epoch": 17.031484257871064, "grad_norm": 0.28905990719795227, "learning_rate": 9.889853884415021e-05, "loss": 0.0164, "step": 11360 }, { "epoch": 17.04647676161919, "grad_norm": 0.20262154936790466, "learning_rate": 9.889508468700614e-05, "loss": 0.0194, "step": 11370 }, { "epoch": 17.061469265367315, "grad_norm": 0.24840036034584045, "learning_rate": 9.889162518276915e-05, "loss": 0.0166, "step": 11380 }, { "epoch": 17.076461769115443, "grad_norm": 0.2636430859565735, "learning_rate": 9.888816033181752e-05, "loss": 0.0151, "step": 11390 }, { "epoch": 17.09145427286357, "grad_norm": 0.3275454640388489, "learning_rate": 9.888469013453018e-05, "loss": 0.0175, "step": 11400 }, { "epoch": 17.106446776611694, "grad_norm": 0.2520284652709961, "learning_rate": 9.888121459128663e-05, "loss": 0.0182, "step": 11410 }, { "epoch": 17.12143928035982, "grad_norm": 0.21250317990779877, "learning_rate": 9.887773370246693e-05, "loss": 0.0168, "step": 11420 }, { "epoch": 17.136431784107945, "grad_norm": 0.24433083832263947, "learning_rate": 9.887424746845177e-05, "loss": 0.0204, "step": 11430 }, { "epoch": 17.151424287856074, "grad_norm": 0.2665407061576843, "learning_rate": 9.887075588962239e-05, "loss": 0.0162, "step": 11440 }, { "epoch": 17.1664167916042, "grad_norm": 0.3017294108867645, "learning_rate": 9.88672589663606e-05, "loss": 0.0195, "step": 11450 }, { "epoch": 17.181409295352324, "grad_norm": 0.24222537875175476, "learning_rate": 9.886375669904886e-05, "loss": 0.0225, "step": 11460 }, { "epoch": 17.19640179910045, "grad_norm": 0.4294602870941162, "learning_rate": 9.886024908807014e-05, "loss": 0.0231, "step": 11470 }, { "epoch": 17.211394302848575, "grad_norm": 0.2579127848148346, "learning_rate": 9.885673613380806e-05, "loss": 0.0209, "step": 11480 }, { "epoch": 17.2263868065967, "grad_norm": 0.28478431701660156, "learning_rate": 9.885321783664676e-05, "loss": 0.0173, "step": 11490 }, { "epoch": 17.24137931034483, "grad_norm": 0.24079205095767975, "learning_rate": 9.884969419697101e-05, "loss": 0.0222, "step": 11500 }, { "epoch": 17.256371814092955, "grad_norm": 0.2643553912639618, "learning_rate": 9.884616521516614e-05, "loss": 0.021, "step": 11510 }, { "epoch": 17.27136431784108, "grad_norm": 0.273260235786438, "learning_rate": 9.88426308916181e-05, "loss": 0.0197, "step": 11520 }, { "epoch": 17.286356821589205, "grad_norm": 0.332125723361969, "learning_rate": 9.883909122671335e-05, "loss": 0.0215, "step": 11530 }, { "epoch": 17.30134932533733, "grad_norm": 0.31094443798065186, "learning_rate": 9.883554622083904e-05, "loss": 0.0168, "step": 11540 }, { "epoch": 17.316341829085456, "grad_norm": 0.34955352544784546, "learning_rate": 9.88319958743828e-05, "loss": 0.0185, "step": 11550 }, { "epoch": 17.331334332833585, "grad_norm": 0.2829396426677704, "learning_rate": 9.882844018773291e-05, "loss": 0.0189, "step": 11560 }, { "epoch": 17.34632683658171, "grad_norm": 0.2848830223083496, "learning_rate": 9.882487916127823e-05, "loss": 0.0203, "step": 11570 }, { "epoch": 17.361319340329835, "grad_norm": 0.30275437235832214, "learning_rate": 9.882131279540815e-05, "loss": 0.0169, "step": 11580 }, { "epoch": 17.37631184407796, "grad_norm": 0.3396068215370178, "learning_rate": 9.881774109051271e-05, "loss": 0.0224, "step": 11590 }, { "epoch": 17.391304347826086, "grad_norm": 0.3017500638961792, "learning_rate": 9.881416404698252e-05, "loss": 0.0211, "step": 11600 }, { "epoch": 17.40629685157421, "grad_norm": 0.20704782009124756, "learning_rate": 9.881058166520873e-05, "loss": 0.0213, "step": 11610 }, { "epoch": 17.42128935532234, "grad_norm": 0.39469075202941895, "learning_rate": 9.880699394558311e-05, "loss": 0.0178, "step": 11620 }, { "epoch": 17.436281859070466, "grad_norm": 0.2683986723423004, "learning_rate": 9.880340088849801e-05, "loss": 0.0191, "step": 11630 }, { "epoch": 17.45127436281859, "grad_norm": 0.3730694353580475, "learning_rate": 9.879980249434637e-05, "loss": 0.0219, "step": 11640 }, { "epoch": 17.466266866566716, "grad_norm": 0.2715345025062561, "learning_rate": 9.879619876352168e-05, "loss": 0.0162, "step": 11650 }, { "epoch": 17.48125937031484, "grad_norm": 0.24768829345703125, "learning_rate": 9.879258969641809e-05, "loss": 0.0173, "step": 11660 }, { "epoch": 17.496251874062967, "grad_norm": 0.24764342606067657, "learning_rate": 9.878897529343023e-05, "loss": 0.0185, "step": 11670 }, { "epoch": 17.511244377811096, "grad_norm": 0.254761278629303, "learning_rate": 9.878535555495338e-05, "loss": 0.017, "step": 11680 }, { "epoch": 17.52623688155922, "grad_norm": 0.33404305577278137, "learning_rate": 9.87817304813834e-05, "loss": 0.0161, "step": 11690 }, { "epoch": 17.541229385307346, "grad_norm": 0.3025311529636383, "learning_rate": 9.877810007311671e-05, "loss": 0.0179, "step": 11700 }, { "epoch": 17.55622188905547, "grad_norm": 0.2572515308856964, "learning_rate": 9.877446433055035e-05, "loss": 0.0282, "step": 11710 }, { "epoch": 17.571214392803597, "grad_norm": 0.5776599049568176, "learning_rate": 9.877082325408191e-05, "loss": 0.0173, "step": 11720 }, { "epoch": 17.586206896551722, "grad_norm": 0.424754798412323, "learning_rate": 9.876717684410954e-05, "loss": 0.019, "step": 11730 }, { "epoch": 17.60119940029985, "grad_norm": 0.28409242630004883, "learning_rate": 9.876352510103204e-05, "loss": 0.0219, "step": 11740 }, { "epoch": 17.616191904047977, "grad_norm": 0.3630163371562958, "learning_rate": 9.875986802524875e-05, "loss": 0.0178, "step": 11750 }, { "epoch": 17.631184407796102, "grad_norm": 0.4724234342575073, "learning_rate": 9.87562056171596e-05, "loss": 0.0202, "step": 11760 }, { "epoch": 17.646176911544227, "grad_norm": 0.21129801869392395, "learning_rate": 9.875253787716511e-05, "loss": 0.0168, "step": 11770 }, { "epoch": 17.661169415292353, "grad_norm": 0.33726969361305237, "learning_rate": 9.874886480566637e-05, "loss": 0.0164, "step": 11780 }, { "epoch": 17.67616191904048, "grad_norm": 0.5022133588790894, "learning_rate": 9.874518640306507e-05, "loss": 0.0192, "step": 11790 }, { "epoch": 17.691154422788607, "grad_norm": 0.2350488007068634, "learning_rate": 9.874150266976347e-05, "loss": 0.0184, "step": 11800 }, { "epoch": 17.706146926536732, "grad_norm": 0.19968967139720917, "learning_rate": 9.873781360616443e-05, "loss": 0.016, "step": 11810 }, { "epoch": 17.721139430284857, "grad_norm": 0.22995202243328094, "learning_rate": 9.873411921267137e-05, "loss": 0.0193, "step": 11820 }, { "epoch": 17.736131934032983, "grad_norm": 0.4145035147666931, "learning_rate": 9.873041948968829e-05, "loss": 0.0189, "step": 11830 }, { "epoch": 17.751124437781108, "grad_norm": 0.2643471360206604, "learning_rate": 9.872671443761981e-05, "loss": 0.0167, "step": 11840 }, { "epoch": 17.766116941529237, "grad_norm": 0.27975529432296753, "learning_rate": 9.872300405687109e-05, "loss": 0.0199, "step": 11850 }, { "epoch": 17.781109445277362, "grad_norm": 0.227279394865036, "learning_rate": 9.871928834784792e-05, "loss": 0.0173, "step": 11860 }, { "epoch": 17.796101949025488, "grad_norm": 0.27226436138153076, "learning_rate": 9.871556731095661e-05, "loss": 0.0243, "step": 11870 }, { "epoch": 17.811094452773613, "grad_norm": 0.35141468048095703, "learning_rate": 9.871184094660411e-05, "loss": 0.018, "step": 11880 }, { "epoch": 17.82608695652174, "grad_norm": 0.3108333349227905, "learning_rate": 9.870810925519791e-05, "loss": 0.0174, "step": 11890 }, { "epoch": 17.841079460269864, "grad_norm": 0.41292378306388855, "learning_rate": 9.870437223714612e-05, "loss": 0.0174, "step": 11900 }, { "epoch": 17.856071964017993, "grad_norm": 0.2519598603248596, "learning_rate": 9.87006298928574e-05, "loss": 0.0217, "step": 11910 }, { "epoch": 17.871064467766118, "grad_norm": 0.22630108892917633, "learning_rate": 9.869688222274103e-05, "loss": 0.0159, "step": 11920 }, { "epoch": 17.886056971514243, "grad_norm": 0.32888758182525635, "learning_rate": 9.869312922720681e-05, "loss": 0.0184, "step": 11930 }, { "epoch": 17.90104947526237, "grad_norm": 0.22107748687267303, "learning_rate": 9.868937090666521e-05, "loss": 0.0181, "step": 11940 }, { "epoch": 17.916041979010494, "grad_norm": 0.2747652232646942, "learning_rate": 9.86856072615272e-05, "loss": 0.019, "step": 11950 }, { "epoch": 17.93103448275862, "grad_norm": 0.37516751885414124, "learning_rate": 9.868183829220438e-05, "loss": 0.0181, "step": 11960 }, { "epoch": 17.946026986506748, "grad_norm": 0.2943252623081207, "learning_rate": 9.867806399910893e-05, "loss": 0.0215, "step": 11970 }, { "epoch": 17.961019490254873, "grad_norm": 0.22925099730491638, "learning_rate": 9.867428438265356e-05, "loss": 0.0167, "step": 11980 }, { "epoch": 17.976011994003, "grad_norm": 0.4964207112789154, "learning_rate": 9.867049944325165e-05, "loss": 0.0161, "step": 11990 }, { "epoch": 17.991004497751124, "grad_norm": 0.21856465935707092, "learning_rate": 9.86667091813171e-05, "loss": 0.0179, "step": 12000 }, { "epoch": 18.00599700149925, "grad_norm": 0.475579172372818, "learning_rate": 9.866291359726438e-05, "loss": 0.0189, "step": 12010 }, { "epoch": 18.020989505247375, "grad_norm": 0.24540632963180542, "learning_rate": 9.865911269150861e-05, "loss": 0.0213, "step": 12020 }, { "epoch": 18.035982008995504, "grad_norm": 0.2666570246219635, "learning_rate": 9.865530646446544e-05, "loss": 0.0201, "step": 12030 }, { "epoch": 18.05097451274363, "grad_norm": 0.3016108274459839, "learning_rate": 9.86514949165511e-05, "loss": 0.0166, "step": 12040 }, { "epoch": 18.065967016491754, "grad_norm": 0.3532419204711914, "learning_rate": 9.864767804818243e-05, "loss": 0.0258, "step": 12050 }, { "epoch": 18.08095952023988, "grad_norm": 0.40472960472106934, "learning_rate": 9.86438558597768e-05, "loss": 0.0196, "step": 12060 }, { "epoch": 18.095952023988005, "grad_norm": 0.4099477529525757, "learning_rate": 9.864002835175225e-05, "loss": 0.0223, "step": 12070 }, { "epoch": 18.11094452773613, "grad_norm": 0.2853414714336395, "learning_rate": 9.863619552452734e-05, "loss": 0.019, "step": 12080 }, { "epoch": 18.12593703148426, "grad_norm": 0.3174021542072296, "learning_rate": 9.863235737852119e-05, "loss": 0.0184, "step": 12090 }, { "epoch": 18.140929535232384, "grad_norm": 0.37211909890174866, "learning_rate": 9.862851391415356e-05, "loss": 0.0194, "step": 12100 }, { "epoch": 18.15592203898051, "grad_norm": 0.4531102776527405, "learning_rate": 9.862466513184477e-05, "loss": 0.0181, "step": 12110 }, { "epoch": 18.170914542728635, "grad_norm": 0.31237542629241943, "learning_rate": 9.86208110320157e-05, "loss": 0.0168, "step": 12120 }, { "epoch": 18.18590704647676, "grad_norm": 0.2041163146495819, "learning_rate": 9.861695161508784e-05, "loss": 0.0194, "step": 12130 }, { "epoch": 18.200899550224886, "grad_norm": 0.32146885991096497, "learning_rate": 9.861308688148324e-05, "loss": 0.0197, "step": 12140 }, { "epoch": 18.215892053973015, "grad_norm": 0.16893577575683594, "learning_rate": 9.860921683162455e-05, "loss": 0.0204, "step": 12150 }, { "epoch": 18.23088455772114, "grad_norm": 0.3193182051181793, "learning_rate": 9.860534146593499e-05, "loss": 0.0247, "step": 12160 }, { "epoch": 18.245877061469265, "grad_norm": 0.37342801690101624, "learning_rate": 9.860146078483836e-05, "loss": 0.018, "step": 12170 }, { "epoch": 18.26086956521739, "grad_norm": 0.23427225649356842, "learning_rate": 9.859757478875905e-05, "loss": 0.0197, "step": 12180 }, { "epoch": 18.275862068965516, "grad_norm": 0.37950870394706726, "learning_rate": 9.859368347812204e-05, "loss": 0.0211, "step": 12190 }, { "epoch": 18.290854572713645, "grad_norm": 0.27148258686065674, "learning_rate": 9.858978685335285e-05, "loss": 0.0151, "step": 12200 }, { "epoch": 18.30584707646177, "grad_norm": 0.3774438798427582, "learning_rate": 9.858588491487763e-05, "loss": 0.0191, "step": 12210 }, { "epoch": 18.320839580209896, "grad_norm": 0.3212546706199646, "learning_rate": 9.858197766312308e-05, "loss": 0.0196, "step": 12220 }, { "epoch": 18.33583208395802, "grad_norm": 0.26789844036102295, "learning_rate": 9.857806509851649e-05, "loss": 0.0219, "step": 12230 }, { "epoch": 18.350824587706146, "grad_norm": 0.27399197220802307, "learning_rate": 9.857414722148574e-05, "loss": 0.018, "step": 12240 }, { "epoch": 18.36581709145427, "grad_norm": 0.22868497669696808, "learning_rate": 9.857022403245928e-05, "loss": 0.0173, "step": 12250 }, { "epoch": 18.3808095952024, "grad_norm": 0.455655038356781, "learning_rate": 9.856629553186615e-05, "loss": 0.0234, "step": 12260 }, { "epoch": 18.395802098950526, "grad_norm": 0.22151537239551544, "learning_rate": 9.856236172013595e-05, "loss": 0.02, "step": 12270 }, { "epoch": 18.41079460269865, "grad_norm": 0.1984046846628189, "learning_rate": 9.85584225976989e-05, "loss": 0.0187, "step": 12280 }, { "epoch": 18.425787106446776, "grad_norm": 0.20335893332958221, "learning_rate": 9.855447816498575e-05, "loss": 0.0157, "step": 12290 }, { "epoch": 18.4407796101949, "grad_norm": 0.26600539684295654, "learning_rate": 9.855052842242787e-05, "loss": 0.0177, "step": 12300 }, { "epoch": 18.455772113943027, "grad_norm": 0.25704413652420044, "learning_rate": 9.85465733704572e-05, "loss": 0.0174, "step": 12310 }, { "epoch": 18.470764617691156, "grad_norm": 0.26265865564346313, "learning_rate": 9.854261300950624e-05, "loss": 0.0162, "step": 12320 }, { "epoch": 18.48575712143928, "grad_norm": 0.4101729691028595, "learning_rate": 9.853864734000813e-05, "loss": 0.018, "step": 12330 }, { "epoch": 18.500749625187407, "grad_norm": 0.1752038300037384, "learning_rate": 9.85346763623965e-05, "loss": 0.0182, "step": 12340 }, { "epoch": 18.515742128935532, "grad_norm": 0.33103224635124207, "learning_rate": 9.853070007710564e-05, "loss": 0.0166, "step": 12350 }, { "epoch": 18.530734632683657, "grad_norm": 0.33578845858573914, "learning_rate": 9.85267184845704e-05, "loss": 0.0165, "step": 12360 }, { "epoch": 18.545727136431783, "grad_norm": 0.23676510155200958, "learning_rate": 9.852273158522616e-05, "loss": 0.0169, "step": 12370 }, { "epoch": 18.56071964017991, "grad_norm": 0.37489232420921326, "learning_rate": 9.851873937950896e-05, "loss": 0.0214, "step": 12380 }, { "epoch": 18.575712143928037, "grad_norm": 0.3000742197036743, "learning_rate": 9.851474186785537e-05, "loss": 0.0186, "step": 12390 }, { "epoch": 18.590704647676162, "grad_norm": 0.3457273244857788, "learning_rate": 9.851073905070254e-05, "loss": 0.022, "step": 12400 }, { "epoch": 18.605697151424287, "grad_norm": 0.23563668131828308, "learning_rate": 9.850673092848824e-05, "loss": 0.0189, "step": 12410 }, { "epoch": 18.620689655172413, "grad_norm": 0.32547399401664734, "learning_rate": 9.850271750165077e-05, "loss": 0.022, "step": 12420 }, { "epoch": 18.635682158920538, "grad_norm": 0.21851371228694916, "learning_rate": 9.849869877062902e-05, "loss": 0.017, "step": 12430 }, { "epoch": 18.650674662668667, "grad_norm": 0.35620707273483276, "learning_rate": 9.849467473586252e-05, "loss": 0.0182, "step": 12440 }, { "epoch": 18.665667166416792, "grad_norm": 0.3524804711341858, "learning_rate": 9.849064539779127e-05, "loss": 0.015, "step": 12450 }, { "epoch": 18.680659670164918, "grad_norm": 0.26485905051231384, "learning_rate": 9.848661075685594e-05, "loss": 0.0166, "step": 12460 }, { "epoch": 18.695652173913043, "grad_norm": 0.38857874274253845, "learning_rate": 9.848257081349778e-05, "loss": 0.0178, "step": 12470 }, { "epoch": 18.71064467766117, "grad_norm": 0.30191248655319214, "learning_rate": 9.847852556815856e-05, "loss": 0.0176, "step": 12480 }, { "epoch": 18.725637181409294, "grad_norm": 0.2937434911727905, "learning_rate": 9.847447502128067e-05, "loss": 0.0174, "step": 12490 }, { "epoch": 18.740629685157423, "grad_norm": 0.2845187187194824, "learning_rate": 9.847041917330708e-05, "loss": 0.0167, "step": 12500 }, { "epoch": 18.755622188905548, "grad_norm": 0.2258828580379486, "learning_rate": 9.846635802468132e-05, "loss": 0.0144, "step": 12510 }, { "epoch": 18.770614692653673, "grad_norm": 0.24676118791103363, "learning_rate": 9.84622915758475e-05, "loss": 0.0164, "step": 12520 }, { "epoch": 18.7856071964018, "grad_norm": 0.17169874906539917, "learning_rate": 9.845821982725034e-05, "loss": 0.016, "step": 12530 }, { "epoch": 18.800599700149924, "grad_norm": 0.33855360746383667, "learning_rate": 9.845414277933514e-05, "loss": 0.0159, "step": 12540 }, { "epoch": 18.81559220389805, "grad_norm": 0.25056546926498413, "learning_rate": 9.845006043254771e-05, "loss": 0.0176, "step": 12550 }, { "epoch": 18.830584707646178, "grad_norm": 0.2914973199367523, "learning_rate": 9.844597278733451e-05, "loss": 0.0198, "step": 12560 }, { "epoch": 18.845577211394303, "grad_norm": 0.2538295388221741, "learning_rate": 9.844187984414259e-05, "loss": 0.0199, "step": 12570 }, { "epoch": 18.86056971514243, "grad_norm": 0.3619343042373657, "learning_rate": 9.84377816034195e-05, "loss": 0.0177, "step": 12580 }, { "epoch": 18.875562218890554, "grad_norm": 0.3386528491973877, "learning_rate": 9.843367806561345e-05, "loss": 0.0157, "step": 12590 }, { "epoch": 18.89055472263868, "grad_norm": 0.21817930042743683, "learning_rate": 9.842956923117317e-05, "loss": 0.0187, "step": 12600 }, { "epoch": 18.90554722638681, "grad_norm": 0.24518387019634247, "learning_rate": 9.842545510054802e-05, "loss": 0.0161, "step": 12610 }, { "epoch": 18.920539730134934, "grad_norm": 0.22747547924518585, "learning_rate": 9.842133567418792e-05, "loss": 0.0216, "step": 12620 }, { "epoch": 18.93553223388306, "grad_norm": 0.17589162290096283, "learning_rate": 9.841721095254333e-05, "loss": 0.0174, "step": 12630 }, { "epoch": 18.950524737631184, "grad_norm": 0.27028319239616394, "learning_rate": 9.841308093606537e-05, "loss": 0.0153, "step": 12640 }, { "epoch": 18.96551724137931, "grad_norm": 0.20437146723270416, "learning_rate": 9.840894562520565e-05, "loss": 0.0184, "step": 12650 }, { "epoch": 18.980509745127435, "grad_norm": 0.2914891839027405, "learning_rate": 9.840480502041642e-05, "loss": 0.0172, "step": 12660 }, { "epoch": 18.995502248875564, "grad_norm": 0.2637649178504944, "learning_rate": 9.840065912215049e-05, "loss": 0.0163, "step": 12670 }, { "epoch": 19.01049475262369, "grad_norm": 0.2773473560810089, "learning_rate": 9.839650793086124e-05, "loss": 0.0173, "step": 12680 }, { "epoch": 19.025487256371814, "grad_norm": 0.30206388235092163, "learning_rate": 9.839235144700265e-05, "loss": 0.0192, "step": 12690 }, { "epoch": 19.04047976011994, "grad_norm": 0.2732647657394409, "learning_rate": 9.838818967102926e-05, "loss": 0.0199, "step": 12700 }, { "epoch": 19.055472263868065, "grad_norm": 0.17672719061374664, "learning_rate": 9.83840226033962e-05, "loss": 0.0175, "step": 12710 }, { "epoch": 19.07046476761619, "grad_norm": 0.20046654343605042, "learning_rate": 9.837985024455918e-05, "loss": 0.0157, "step": 12720 }, { "epoch": 19.08545727136432, "grad_norm": 0.22809727489948273, "learning_rate": 9.837567259497447e-05, "loss": 0.016, "step": 12730 }, { "epoch": 19.100449775112445, "grad_norm": 0.3346262276172638, "learning_rate": 9.837148965509894e-05, "loss": 0.0169, "step": 12740 }, { "epoch": 19.11544227886057, "grad_norm": 0.3649369478225708, "learning_rate": 9.836730142539001e-05, "loss": 0.0201, "step": 12750 }, { "epoch": 19.130434782608695, "grad_norm": 0.1846979856491089, "learning_rate": 9.836310790630574e-05, "loss": 0.0165, "step": 12760 }, { "epoch": 19.14542728635682, "grad_norm": 0.255333811044693, "learning_rate": 9.83589090983047e-05, "loss": 0.0179, "step": 12770 }, { "epoch": 19.160419790104946, "grad_norm": 0.39414486289024353, "learning_rate": 9.835470500184605e-05, "loss": 0.0209, "step": 12780 }, { "epoch": 19.175412293853075, "grad_norm": 0.32975324988365173, "learning_rate": 9.835049561738957e-05, "loss": 0.0165, "step": 12790 }, { "epoch": 19.1904047976012, "grad_norm": 0.3512868583202362, "learning_rate": 9.834628094539558e-05, "loss": 0.0214, "step": 12800 }, { "epoch": 19.205397301349326, "grad_norm": 0.23527395725250244, "learning_rate": 9.834206098632499e-05, "loss": 0.0216, "step": 12810 }, { "epoch": 19.22038980509745, "grad_norm": 0.2746981680393219, "learning_rate": 9.833783574063931e-05, "loss": 0.0196, "step": 12820 }, { "epoch": 19.235382308845576, "grad_norm": 0.24950778484344482, "learning_rate": 9.833360520880058e-05, "loss": 0.018, "step": 12830 }, { "epoch": 19.2503748125937, "grad_norm": 0.32438430190086365, "learning_rate": 9.832936939127144e-05, "loss": 0.0214, "step": 12840 }, { "epoch": 19.26536731634183, "grad_norm": 0.36184000968933105, "learning_rate": 9.832512828851515e-05, "loss": 0.017, "step": 12850 }, { "epoch": 19.280359820089956, "grad_norm": 0.285871684551239, "learning_rate": 9.832088190099546e-05, "loss": 0.0178, "step": 12860 }, { "epoch": 19.29535232383808, "grad_norm": 0.43149641156196594, "learning_rate": 9.831663022917679e-05, "loss": 0.0176, "step": 12870 }, { "epoch": 19.310344827586206, "grad_norm": 0.20035021007061005, "learning_rate": 9.831237327352407e-05, "loss": 0.0188, "step": 12880 }, { "epoch": 19.32533733133433, "grad_norm": 0.36328136920928955, "learning_rate": 9.830811103450286e-05, "loss": 0.0206, "step": 12890 }, { "epoch": 19.340329835082457, "grad_norm": 0.2715001106262207, "learning_rate": 9.830384351257924e-05, "loss": 0.0165, "step": 12900 }, { "epoch": 19.355322338830586, "grad_norm": 0.23635263741016388, "learning_rate": 9.829957070821993e-05, "loss": 0.0168, "step": 12910 }, { "epoch": 19.37031484257871, "grad_norm": 0.16656330227851868, "learning_rate": 9.829529262189218e-05, "loss": 0.0166, "step": 12920 }, { "epoch": 19.385307346326837, "grad_norm": 0.24233561754226685, "learning_rate": 9.829100925406385e-05, "loss": 0.0172, "step": 12930 }, { "epoch": 19.400299850074962, "grad_norm": 0.3161177337169647, "learning_rate": 9.828672060520333e-05, "loss": 0.0176, "step": 12940 }, { "epoch": 19.415292353823087, "grad_norm": 0.3796336352825165, "learning_rate": 9.828242667577966e-05, "loss": 0.0161, "step": 12950 }, { "epoch": 19.430284857571216, "grad_norm": 0.35500243306159973, "learning_rate": 9.82781274662624e-05, "loss": 0.0177, "step": 12960 }, { "epoch": 19.44527736131934, "grad_norm": 0.32797470688819885, "learning_rate": 9.82738229771217e-05, "loss": 0.019, "step": 12970 }, { "epoch": 19.460269865067467, "grad_norm": 0.24044419825077057, "learning_rate": 9.826951320882829e-05, "loss": 0.023, "step": 12980 }, { "epoch": 19.475262368815592, "grad_norm": 0.3549741208553314, "learning_rate": 9.826519816185351e-05, "loss": 0.0179, "step": 12990 }, { "epoch": 19.490254872563717, "grad_norm": 0.4027438163757324, "learning_rate": 9.826087783666921e-05, "loss": 0.0177, "step": 13000 }, { "epoch": 19.505247376311843, "grad_norm": 0.35989898443222046, "learning_rate": 9.825655223374787e-05, "loss": 0.0244, "step": 13010 }, { "epoch": 19.52023988005997, "grad_norm": 0.25312358140945435, "learning_rate": 9.825222135356253e-05, "loss": 0.0205, "step": 13020 }, { "epoch": 19.535232383808097, "grad_norm": 0.37182095646858215, "learning_rate": 9.82478851965868e-05, "loss": 0.0193, "step": 13030 }, { "epoch": 19.550224887556222, "grad_norm": 0.33243444561958313, "learning_rate": 9.82435437632949e-05, "loss": 0.0196, "step": 13040 }, { "epoch": 19.565217391304348, "grad_norm": 0.29809650778770447, "learning_rate": 9.823919705416158e-05, "loss": 0.017, "step": 13050 }, { "epoch": 19.580209895052473, "grad_norm": 0.2325151115655899, "learning_rate": 9.82348450696622e-05, "loss": 0.0197, "step": 13060 }, { "epoch": 19.5952023988006, "grad_norm": 0.33357271552085876, "learning_rate": 9.823048781027268e-05, "loss": 0.016, "step": 13070 }, { "epoch": 19.610194902548727, "grad_norm": 0.249668151140213, "learning_rate": 9.822612527646953e-05, "loss": 0.0145, "step": 13080 }, { "epoch": 19.625187406296853, "grad_norm": 0.2761727571487427, "learning_rate": 9.822175746872984e-05, "loss": 0.0198, "step": 13090 }, { "epoch": 19.640179910044978, "grad_norm": 0.30015939474105835, "learning_rate": 9.821738438753123e-05, "loss": 0.0189, "step": 13100 }, { "epoch": 19.655172413793103, "grad_norm": 0.2608415186405182, "learning_rate": 9.821300603335196e-05, "loss": 0.0186, "step": 13110 }, { "epoch": 19.67016491754123, "grad_norm": 0.20082473754882812, "learning_rate": 9.820862240667085e-05, "loss": 0.0186, "step": 13120 }, { "epoch": 19.685157421289354, "grad_norm": 0.21545039117336273, "learning_rate": 9.820423350796726e-05, "loss": 0.0225, "step": 13130 }, { "epoch": 19.700149925037483, "grad_norm": 0.19772951304912567, "learning_rate": 9.819983933772118e-05, "loss": 0.0163, "step": 13140 }, { "epoch": 19.715142428785608, "grad_norm": 0.236614391207695, "learning_rate": 9.819543989641314e-05, "loss": 0.0151, "step": 13150 }, { "epoch": 19.730134932533733, "grad_norm": 0.252807080745697, "learning_rate": 9.819103518452423e-05, "loss": 0.0157, "step": 13160 }, { "epoch": 19.74512743628186, "grad_norm": 0.20828108489513397, "learning_rate": 9.818662520253618e-05, "loss": 0.0155, "step": 13170 }, { "epoch": 19.760119940029984, "grad_norm": 0.2665460407733917, "learning_rate": 9.818220995093126e-05, "loss": 0.0155, "step": 13180 }, { "epoch": 19.77511244377811, "grad_norm": 0.22761230170726776, "learning_rate": 9.817778943019228e-05, "loss": 0.0144, "step": 13190 }, { "epoch": 19.79010494752624, "grad_norm": 0.3716394007205963, "learning_rate": 9.81733636408027e-05, "loss": 0.019, "step": 13200 }, { "epoch": 19.805097451274364, "grad_norm": 0.3469398617744446, "learning_rate": 9.816893258324649e-05, "loss": 0.018, "step": 13210 }, { "epoch": 19.82008995502249, "grad_norm": 0.31343406438827515, "learning_rate": 9.816449625800823e-05, "loss": 0.0191, "step": 13220 }, { "epoch": 19.835082458770614, "grad_norm": 0.31744518876075745, "learning_rate": 9.816005466557308e-05, "loss": 0.0191, "step": 13230 }, { "epoch": 19.85007496251874, "grad_norm": 0.20842017233371735, "learning_rate": 9.815560780642674e-05, "loss": 0.0197, "step": 13240 }, { "epoch": 19.865067466266865, "grad_norm": 0.3410826325416565, "learning_rate": 9.815115568105555e-05, "loss": 0.0191, "step": 13250 }, { "epoch": 19.880059970014994, "grad_norm": 0.3674405813217163, "learning_rate": 9.814669828994638e-05, "loss": 0.019, "step": 13260 }, { "epoch": 19.89505247376312, "grad_norm": 0.24686864018440247, "learning_rate": 9.814223563358665e-05, "loss": 0.0169, "step": 13270 }, { "epoch": 19.910044977511244, "grad_norm": 0.32331129908561707, "learning_rate": 9.813776771246443e-05, "loss": 0.0229, "step": 13280 }, { "epoch": 19.92503748125937, "grad_norm": 0.29576393961906433, "learning_rate": 9.813329452706829e-05, "loss": 0.0186, "step": 13290 }, { "epoch": 19.940029985007495, "grad_norm": 0.2288784682750702, "learning_rate": 9.812881607788744e-05, "loss": 0.0183, "step": 13300 }, { "epoch": 19.955022488755624, "grad_norm": 0.38841214776039124, "learning_rate": 9.812433236541163e-05, "loss": 0.0172, "step": 13310 }, { "epoch": 19.97001499250375, "grad_norm": 0.29272225499153137, "learning_rate": 9.811984339013116e-05, "loss": 0.0145, "step": 13320 }, { "epoch": 19.985007496251875, "grad_norm": 0.32835492491722107, "learning_rate": 9.811534915253698e-05, "loss": 0.0175, "step": 13330 }, { "epoch": 20.0, "grad_norm": 0.3348242938518524, "learning_rate": 9.811084965312056e-05, "loss": 0.0175, "step": 13340 }, { "epoch": 20.014992503748125, "grad_norm": 0.1942165195941925, "learning_rate": 9.810634489237396e-05, "loss": 0.0145, "step": 13350 }, { "epoch": 20.02998500749625, "grad_norm": 0.27739977836608887, "learning_rate": 9.81018348707898e-05, "loss": 0.016, "step": 13360 }, { "epoch": 20.04497751124438, "grad_norm": 0.3151462972164154, "learning_rate": 9.809731958886131e-05, "loss": 0.0148, "step": 13370 }, { "epoch": 20.059970014992505, "grad_norm": 0.29760026931762695, "learning_rate": 9.809279904708224e-05, "loss": 0.0169, "step": 13380 }, { "epoch": 20.07496251874063, "grad_norm": 0.31863540410995483, "learning_rate": 9.808827324594699e-05, "loss": 0.0152, "step": 13390 }, { "epoch": 20.089955022488756, "grad_norm": 0.31194868683815, "learning_rate": 9.808374218595046e-05, "loss": 0.0128, "step": 13400 }, { "epoch": 20.10494752623688, "grad_norm": 0.1976701021194458, "learning_rate": 9.80792058675882e-05, "loss": 0.0173, "step": 13410 }, { "epoch": 20.119940029985006, "grad_norm": 0.24768295884132385, "learning_rate": 9.807466429135627e-05, "loss": 0.017, "step": 13420 }, { "epoch": 20.134932533733135, "grad_norm": 0.4788236618041992, "learning_rate": 9.807011745775132e-05, "loss": 0.02, "step": 13430 }, { "epoch": 20.14992503748126, "grad_norm": 0.3160500228404999, "learning_rate": 9.806556536727061e-05, "loss": 0.0206, "step": 13440 }, { "epoch": 20.164917541229386, "grad_norm": 0.22140814363956451, "learning_rate": 9.806100802041193e-05, "loss": 0.0188, "step": 13450 }, { "epoch": 20.17991004497751, "grad_norm": 0.13495418429374695, "learning_rate": 9.805644541767368e-05, "loss": 0.0169, "step": 13460 }, { "epoch": 20.194902548725636, "grad_norm": 0.40684324502944946, "learning_rate": 9.805187755955478e-05, "loss": 0.0222, "step": 13470 }, { "epoch": 20.20989505247376, "grad_norm": 0.33955880999565125, "learning_rate": 9.804730444655483e-05, "loss": 0.0172, "step": 13480 }, { "epoch": 20.22488755622189, "grad_norm": 0.2410963475704193, "learning_rate": 9.804272607917388e-05, "loss": 0.0228, "step": 13490 }, { "epoch": 20.239880059970016, "grad_norm": 0.32732996344566345, "learning_rate": 9.803814245791265e-05, "loss": 0.0215, "step": 13500 }, { "epoch": 20.25487256371814, "grad_norm": 0.363163024187088, "learning_rate": 9.803355358327239e-05, "loss": 0.0217, "step": 13510 }, { "epoch": 20.269865067466267, "grad_norm": 0.36060550808906555, "learning_rate": 9.802895945575492e-05, "loss": 0.0188, "step": 13520 }, { "epoch": 20.284857571214392, "grad_norm": 0.19116728007793427, "learning_rate": 9.802436007586266e-05, "loss": 0.018, "step": 13530 }, { "epoch": 20.299850074962517, "grad_norm": 0.2859046459197998, "learning_rate": 9.801975544409858e-05, "loss": 0.0194, "step": 13540 }, { "epoch": 20.314842578710646, "grad_norm": 0.26713165640830994, "learning_rate": 9.801514556096625e-05, "loss": 0.0217, "step": 13550 }, { "epoch": 20.32983508245877, "grad_norm": 0.2652893364429474, "learning_rate": 9.801053042696977e-05, "loss": 0.0197, "step": 13560 }, { "epoch": 20.344827586206897, "grad_norm": 0.23141562938690186, "learning_rate": 9.800591004261388e-05, "loss": 0.018, "step": 13570 }, { "epoch": 20.359820089955022, "grad_norm": 0.18782514333724976, "learning_rate": 9.800128440840385e-05, "loss": 0.0225, "step": 13580 }, { "epoch": 20.374812593703147, "grad_norm": 0.21905966103076935, "learning_rate": 9.799665352484552e-05, "loss": 0.0173, "step": 13590 }, { "epoch": 20.389805097451273, "grad_norm": 0.18906201422214508, "learning_rate": 9.799201739244532e-05, "loss": 0.0167, "step": 13600 }, { "epoch": 20.4047976011994, "grad_norm": 0.22617417573928833, "learning_rate": 9.798737601171025e-05, "loss": 0.0154, "step": 13610 }, { "epoch": 20.419790104947527, "grad_norm": 0.2630534768104553, "learning_rate": 9.79827293831479e-05, "loss": 0.0195, "step": 13620 }, { "epoch": 20.434782608695652, "grad_norm": 0.31284359097480774, "learning_rate": 9.797807750726638e-05, "loss": 0.0191, "step": 13630 }, { "epoch": 20.449775112443778, "grad_norm": 0.275217205286026, "learning_rate": 9.797342038457446e-05, "loss": 0.0213, "step": 13640 }, { "epoch": 20.464767616191903, "grad_norm": 0.3649194538593292, "learning_rate": 9.796875801558141e-05, "loss": 0.0142, "step": 13650 }, { "epoch": 20.47976011994003, "grad_norm": 0.31813138723373413, "learning_rate": 9.79640904007971e-05, "loss": 0.0201, "step": 13660 }, { "epoch": 20.494752623688157, "grad_norm": 0.1981005072593689, "learning_rate": 9.795941754073199e-05, "loss": 0.0135, "step": 13670 }, { "epoch": 20.509745127436283, "grad_norm": 0.2551385760307312, "learning_rate": 9.795473943589705e-05, "loss": 0.0192, "step": 13680 }, { "epoch": 20.524737631184408, "grad_norm": 0.18987824022769928, "learning_rate": 9.795005608680394e-05, "loss": 0.0174, "step": 13690 }, { "epoch": 20.539730134932533, "grad_norm": 0.33088603615760803, "learning_rate": 9.794536749396477e-05, "loss": 0.0193, "step": 13700 }, { "epoch": 20.55472263868066, "grad_norm": 0.2913731038570404, "learning_rate": 9.79406736578923e-05, "loss": 0.0203, "step": 13710 }, { "epoch": 20.569715142428784, "grad_norm": 0.32710951566696167, "learning_rate": 9.793597457909984e-05, "loss": 0.0159, "step": 13720 }, { "epoch": 20.584707646176913, "grad_norm": 0.3565142750740051, "learning_rate": 9.793127025810127e-05, "loss": 0.0201, "step": 13730 }, { "epoch": 20.599700149925038, "grad_norm": 0.4213528335094452, "learning_rate": 9.792656069541104e-05, "loss": 0.0175, "step": 13740 }, { "epoch": 20.614692653673163, "grad_norm": 0.3603024184703827, "learning_rate": 9.79218458915442e-05, "loss": 0.0207, "step": 13750 }, { "epoch": 20.62968515742129, "grad_norm": 0.36168450117111206, "learning_rate": 9.791712584701634e-05, "loss": 0.02, "step": 13760 }, { "epoch": 20.644677661169414, "grad_norm": 0.2971864640712738, "learning_rate": 9.791240056234364e-05, "loss": 0.0181, "step": 13770 }, { "epoch": 20.659670164917543, "grad_norm": 0.4101530611515045, "learning_rate": 9.790767003804283e-05, "loss": 0.0177, "step": 13780 }, { "epoch": 20.67466266866567, "grad_norm": 0.2791699767112732, "learning_rate": 9.790293427463126e-05, "loss": 0.0175, "step": 13790 }, { "epoch": 20.689655172413794, "grad_norm": 0.18372635543346405, "learning_rate": 9.789819327262684e-05, "loss": 0.0173, "step": 13800 }, { "epoch": 20.70464767616192, "grad_norm": 0.3356854319572449, "learning_rate": 9.7893447032548e-05, "loss": 0.02, "step": 13810 }, { "epoch": 20.719640179910044, "grad_norm": 0.29214727878570557, "learning_rate": 9.78886955549138e-05, "loss": 0.0166, "step": 13820 }, { "epoch": 20.73463268365817, "grad_norm": 0.3227708637714386, "learning_rate": 9.788393884024387e-05, "loss": 0.0196, "step": 13830 }, { "epoch": 20.7496251874063, "grad_norm": 0.319805771112442, "learning_rate": 9.787917688905836e-05, "loss": 0.0189, "step": 13840 }, { "epoch": 20.764617691154424, "grad_norm": 0.35711804032325745, "learning_rate": 9.787440970187807e-05, "loss": 0.0193, "step": 13850 }, { "epoch": 20.77961019490255, "grad_norm": 0.17957423627376556, "learning_rate": 9.786963727922429e-05, "loss": 0.0172, "step": 13860 }, { "epoch": 20.794602698650674, "grad_norm": 0.25479215383529663, "learning_rate": 9.786485962161897e-05, "loss": 0.0185, "step": 13870 }, { "epoch": 20.8095952023988, "grad_norm": 0.4166600704193115, "learning_rate": 9.786007672958455e-05, "loss": 0.0174, "step": 13880 }, { "epoch": 20.824587706146925, "grad_norm": 0.3121415972709656, "learning_rate": 9.78552886036441e-05, "loss": 0.0153, "step": 13890 }, { "epoch": 20.839580209895054, "grad_norm": 0.3447553515434265, "learning_rate": 9.785049524432124e-05, "loss": 0.0203, "step": 13900 }, { "epoch": 20.85457271364318, "grad_norm": 0.27585864067077637, "learning_rate": 9.784569665214016e-05, "loss": 0.017, "step": 13910 }, { "epoch": 20.869565217391305, "grad_norm": 0.3016524910926819, "learning_rate": 9.784089282762563e-05, "loss": 0.016, "step": 13920 }, { "epoch": 20.88455772113943, "grad_norm": 0.32224148511886597, "learning_rate": 9.7836083771303e-05, "loss": 0.0194, "step": 13930 }, { "epoch": 20.899550224887555, "grad_norm": 0.38317766785621643, "learning_rate": 9.783126948369817e-05, "loss": 0.0201, "step": 13940 }, { "epoch": 20.91454272863568, "grad_norm": 0.3167893588542938, "learning_rate": 9.78264499653376e-05, "loss": 0.0209, "step": 13950 }, { "epoch": 20.92953523238381, "grad_norm": 0.2127041220664978, "learning_rate": 9.782162521674838e-05, "loss": 0.0154, "step": 13960 }, { "epoch": 20.944527736131935, "grad_norm": 0.1864314079284668, "learning_rate": 9.781679523845812e-05, "loss": 0.0143, "step": 13970 }, { "epoch": 20.95952023988006, "grad_norm": 0.2359812706708908, "learning_rate": 9.781196003099502e-05, "loss": 0.0162, "step": 13980 }, { "epoch": 20.974512743628186, "grad_norm": 0.2948458790779114, "learning_rate": 9.780711959488786e-05, "loss": 0.0144, "step": 13990 }, { "epoch": 20.98950524737631, "grad_norm": 0.29502594470977783, "learning_rate": 9.780227393066599e-05, "loss": 0.0149, "step": 14000 }, { "epoch": 21.004497751124436, "grad_norm": 0.30035147070884705, "learning_rate": 9.77974230388593e-05, "loss": 0.0157, "step": 14010 }, { "epoch": 21.019490254872565, "grad_norm": 0.25466540455818176, "learning_rate": 9.779256691999829e-05, "loss": 0.017, "step": 14020 }, { "epoch": 21.03448275862069, "grad_norm": 0.31043848395347595, "learning_rate": 9.778770557461403e-05, "loss": 0.0171, "step": 14030 }, { "epoch": 21.049475262368816, "grad_norm": 0.313014417886734, "learning_rate": 9.778283900323812e-05, "loss": 0.019, "step": 14040 }, { "epoch": 21.06446776611694, "grad_norm": 0.2512334883213043, "learning_rate": 9.777796720640277e-05, "loss": 0.0169, "step": 14050 }, { "epoch": 21.079460269865066, "grad_norm": 0.3681679368019104, "learning_rate": 9.777309018464078e-05, "loss": 0.0172, "step": 14060 }, { "epoch": 21.09445277361319, "grad_norm": 0.3131729066371918, "learning_rate": 9.776820793848547e-05, "loss": 0.0192, "step": 14070 }, { "epoch": 21.10944527736132, "grad_norm": 0.17755649983882904, "learning_rate": 9.776332046847075e-05, "loss": 0.015, "step": 14080 }, { "epoch": 21.124437781109446, "grad_norm": 0.19991588592529297, "learning_rate": 9.775842777513111e-05, "loss": 0.0141, "step": 14090 }, { "epoch": 21.13943028485757, "grad_norm": 0.24766555428504944, "learning_rate": 9.775352985900163e-05, "loss": 0.0177, "step": 14100 }, { "epoch": 21.154422788605697, "grad_norm": 0.2013981193304062, "learning_rate": 9.774862672061791e-05, "loss": 0.0158, "step": 14110 }, { "epoch": 21.169415292353822, "grad_norm": 0.3406141698360443, "learning_rate": 9.774371836051616e-05, "loss": 0.0166, "step": 14120 }, { "epoch": 21.18440779610195, "grad_norm": 0.3107834458351135, "learning_rate": 9.773880477923315e-05, "loss": 0.0163, "step": 14130 }, { "epoch": 21.199400299850076, "grad_norm": 0.32235240936279297, "learning_rate": 9.773388597730623e-05, "loss": 0.0196, "step": 14140 }, { "epoch": 21.2143928035982, "grad_norm": 0.22626829147338867, "learning_rate": 9.77289619552733e-05, "loss": 0.0155, "step": 14150 }, { "epoch": 21.229385307346327, "grad_norm": 0.18253424763679504, "learning_rate": 9.772403271367285e-05, "loss": 0.0147, "step": 14160 }, { "epoch": 21.244377811094452, "grad_norm": 0.2575116753578186, "learning_rate": 9.771909825304396e-05, "loss": 0.0181, "step": 14170 }, { "epoch": 21.259370314842577, "grad_norm": 0.19313398003578186, "learning_rate": 9.771415857392619e-05, "loss": 0.0182, "step": 14180 }, { "epoch": 21.274362818590706, "grad_norm": 0.2754906415939331, "learning_rate": 9.770921367685978e-05, "loss": 0.0153, "step": 14190 }, { "epoch": 21.28935532233883, "grad_norm": 0.18802964687347412, "learning_rate": 9.770426356238551e-05, "loss": 0.0135, "step": 14200 }, { "epoch": 21.304347826086957, "grad_norm": 0.2025705724954605, "learning_rate": 9.769930823104469e-05, "loss": 0.0156, "step": 14210 }, { "epoch": 21.319340329835082, "grad_norm": 0.2770843207836151, "learning_rate": 9.769434768337926e-05, "loss": 0.0188, "step": 14220 }, { "epoch": 21.334332833583208, "grad_norm": 0.3300009071826935, "learning_rate": 9.768938191993164e-05, "loss": 0.0181, "step": 14230 }, { "epoch": 21.349325337331333, "grad_norm": 0.2020072191953659, "learning_rate": 9.768441094124494e-05, "loss": 0.0137, "step": 14240 }, { "epoch": 21.364317841079462, "grad_norm": 0.23095525801181793, "learning_rate": 9.767943474786275e-05, "loss": 0.017, "step": 14250 }, { "epoch": 21.379310344827587, "grad_norm": 0.22784683108329773, "learning_rate": 9.767445334032923e-05, "loss": 0.0169, "step": 14260 }, { "epoch": 21.394302848575713, "grad_norm": 0.25754162669181824, "learning_rate": 9.766946671918919e-05, "loss": 0.0144, "step": 14270 }, { "epoch": 21.409295352323838, "grad_norm": 0.40327373147010803, "learning_rate": 9.766447488498796e-05, "loss": 0.0164, "step": 14280 }, { "epoch": 21.424287856071963, "grad_norm": 0.3130221962928772, "learning_rate": 9.765947783827139e-05, "loss": 0.0138, "step": 14290 }, { "epoch": 21.43928035982009, "grad_norm": 0.29370996356010437, "learning_rate": 9.765447557958599e-05, "loss": 0.0142, "step": 14300 }, { "epoch": 21.454272863568217, "grad_norm": 0.15203772485256195, "learning_rate": 9.764946810947879e-05, "loss": 0.0156, "step": 14310 }, { "epoch": 21.469265367316343, "grad_norm": 0.34268924593925476, "learning_rate": 9.764445542849738e-05, "loss": 0.0185, "step": 14320 }, { "epoch": 21.484257871064468, "grad_norm": 0.1996704787015915, "learning_rate": 9.763943753718998e-05, "loss": 0.0175, "step": 14330 }, { "epoch": 21.499250374812593, "grad_norm": 0.31419113278388977, "learning_rate": 9.76344144361053e-05, "loss": 0.0143, "step": 14340 }, { "epoch": 21.51424287856072, "grad_norm": 0.4733933210372925, "learning_rate": 9.762938612579269e-05, "loss": 0.0198, "step": 14350 }, { "epoch": 21.529235382308844, "grad_norm": 0.3734665811061859, "learning_rate": 9.762435260680202e-05, "loss": 0.0167, "step": 14360 }, { "epoch": 21.544227886056973, "grad_norm": 0.2485605627298355, "learning_rate": 9.761931387968373e-05, "loss": 0.0188, "step": 14370 }, { "epoch": 21.5592203898051, "grad_norm": 0.17298686504364014, "learning_rate": 9.76142699449889e-05, "loss": 0.0167, "step": 14380 }, { "epoch": 21.574212893553224, "grad_norm": 0.25130796432495117, "learning_rate": 9.760922080326908e-05, "loss": 0.0166, "step": 14390 }, { "epoch": 21.58920539730135, "grad_norm": 0.25407254695892334, "learning_rate": 9.760416645507644e-05, "loss": 0.0141, "step": 14400 }, { "epoch": 21.604197901049474, "grad_norm": 0.31153297424316406, "learning_rate": 9.759910690096375e-05, "loss": 0.016, "step": 14410 }, { "epoch": 21.6191904047976, "grad_norm": 0.3694417476654053, "learning_rate": 9.759404214148429e-05, "loss": 0.0166, "step": 14420 }, { "epoch": 21.63418290854573, "grad_norm": 0.20952707529067993, "learning_rate": 9.758897217719191e-05, "loss": 0.0163, "step": 14430 }, { "epoch": 21.649175412293854, "grad_norm": 0.25406986474990845, "learning_rate": 9.758389700864113e-05, "loss": 0.0143, "step": 14440 }, { "epoch": 21.66416791604198, "grad_norm": 0.26296791434288025, "learning_rate": 9.757881663638688e-05, "loss": 0.017, "step": 14450 }, { "epoch": 21.679160419790104, "grad_norm": 0.20150233805179596, "learning_rate": 9.757373106098478e-05, "loss": 0.0197, "step": 14460 }, { "epoch": 21.69415292353823, "grad_norm": 0.2950702905654907, "learning_rate": 9.756864028299097e-05, "loss": 0.0206, "step": 14470 }, { "epoch": 21.709145427286355, "grad_norm": 0.14406372606754303, "learning_rate": 9.75635443029622e-05, "loss": 0.0165, "step": 14480 }, { "epoch": 21.724137931034484, "grad_norm": 0.2908245325088501, "learning_rate": 9.755844312145572e-05, "loss": 0.0226, "step": 14490 }, { "epoch": 21.73913043478261, "grad_norm": 0.3346165716648102, "learning_rate": 9.755333673902941e-05, "loss": 0.0155, "step": 14500 }, { "epoch": 21.754122938530735, "grad_norm": 0.21203595399856567, "learning_rate": 9.75482251562417e-05, "loss": 0.0203, "step": 14510 }, { "epoch": 21.76911544227886, "grad_norm": 0.27434778213500977, "learning_rate": 9.754310837365155e-05, "loss": 0.0152, "step": 14520 }, { "epoch": 21.784107946026985, "grad_norm": 0.8741587400436401, "learning_rate": 9.753798639181856e-05, "loss": 0.0186, "step": 14530 }, { "epoch": 21.79910044977511, "grad_norm": 0.6628063321113586, "learning_rate": 9.753285921130286e-05, "loss": 0.0231, "step": 14540 }, { "epoch": 21.81409295352324, "grad_norm": 0.2178124636411667, "learning_rate": 9.752772683266512e-05, "loss": 0.017, "step": 14550 }, { "epoch": 21.829085457271365, "grad_norm": 0.23616920411586761, "learning_rate": 9.752258925646665e-05, "loss": 0.0229, "step": 14560 }, { "epoch": 21.84407796101949, "grad_norm": 0.24391968548297882, "learning_rate": 9.751744648326926e-05, "loss": 0.0205, "step": 14570 }, { "epoch": 21.859070464767616, "grad_norm": 0.32743626832962036, "learning_rate": 9.751229851363536e-05, "loss": 0.0182, "step": 14580 }, { "epoch": 21.87406296851574, "grad_norm": 0.28643280267715454, "learning_rate": 9.750714534812793e-05, "loss": 0.015, "step": 14590 }, { "epoch": 21.88905547226387, "grad_norm": 0.2959114611148834, "learning_rate": 9.750198698731053e-05, "loss": 0.0174, "step": 14600 }, { "epoch": 21.904047976011995, "grad_norm": 0.22979818284511566, "learning_rate": 9.749682343174722e-05, "loss": 0.0177, "step": 14610 }, { "epoch": 21.91904047976012, "grad_norm": 0.23085641860961914, "learning_rate": 9.749165468200272e-05, "loss": 0.0194, "step": 14620 }, { "epoch": 21.934032983508246, "grad_norm": 0.22109244763851166, "learning_rate": 9.748648073864229e-05, "loss": 0.0201, "step": 14630 }, { "epoch": 21.94902548725637, "grad_norm": 0.2454850673675537, "learning_rate": 9.748130160223168e-05, "loss": 0.0183, "step": 14640 }, { "epoch": 21.964017991004496, "grad_norm": 0.24132436513900757, "learning_rate": 9.747611727333734e-05, "loss": 0.0166, "step": 14650 }, { "epoch": 21.979010494752625, "grad_norm": 0.34359011054039, "learning_rate": 9.74709277525262e-05, "loss": 0.0161, "step": 14660 }, { "epoch": 21.99400299850075, "grad_norm": 0.190283864736557, "learning_rate": 9.746573304036576e-05, "loss": 0.0141, "step": 14670 }, { "epoch": 22.008995502248876, "grad_norm": 0.25724637508392334, "learning_rate": 9.746053313742412e-05, "loss": 0.015, "step": 14680 }, { "epoch": 22.023988005997, "grad_norm": 0.22651644051074982, "learning_rate": 9.745532804426994e-05, "loss": 0.0145, "step": 14690 }, { "epoch": 22.038980509745127, "grad_norm": 0.23560965061187744, "learning_rate": 9.745011776147242e-05, "loss": 0.0188, "step": 14700 }, { "epoch": 22.053973013493252, "grad_norm": 0.17304186522960663, "learning_rate": 9.744490228960138e-05, "loss": 0.0232, "step": 14710 }, { "epoch": 22.06896551724138, "grad_norm": 0.2172386646270752, "learning_rate": 9.743968162922713e-05, "loss": 0.018, "step": 14720 }, { "epoch": 22.083958020989506, "grad_norm": 0.32084596157073975, "learning_rate": 9.743445578092064e-05, "loss": 0.0177, "step": 14730 }, { "epoch": 22.09895052473763, "grad_norm": 0.23453408479690552, "learning_rate": 9.742922474525338e-05, "loss": 0.0202, "step": 14740 }, { "epoch": 22.113943028485757, "grad_norm": 0.25974148511886597, "learning_rate": 9.742398852279741e-05, "loss": 0.016, "step": 14750 }, { "epoch": 22.128935532233882, "grad_norm": 0.23785240948200226, "learning_rate": 9.741874711412535e-05, "loss": 0.0197, "step": 14760 }, { "epoch": 22.143928035982007, "grad_norm": 0.24634622037410736, "learning_rate": 9.741350051981042e-05, "loss": 0.0174, "step": 14770 }, { "epoch": 22.158920539730136, "grad_norm": 0.23042181134223938, "learning_rate": 9.740824874042633e-05, "loss": 0.017, "step": 14780 }, { "epoch": 22.17391304347826, "grad_norm": 0.2726113200187683, "learning_rate": 9.740299177654746e-05, "loss": 0.0171, "step": 14790 }, { "epoch": 22.188905547226387, "grad_norm": 0.18069878220558167, "learning_rate": 9.739772962874867e-05, "loss": 0.015, "step": 14800 }, { "epoch": 22.203898050974512, "grad_norm": 0.23552514612674713, "learning_rate": 9.739246229760541e-05, "loss": 0.0133, "step": 14810 }, { "epoch": 22.218890554722638, "grad_norm": 0.14380328357219696, "learning_rate": 9.738718978369376e-05, "loss": 0.0134, "step": 14820 }, { "epoch": 22.233883058470763, "grad_norm": 0.2879563570022583, "learning_rate": 9.738191208759025e-05, "loss": 0.0156, "step": 14830 }, { "epoch": 22.248875562218892, "grad_norm": 0.1580154299736023, "learning_rate": 9.73766292098721e-05, "loss": 0.0153, "step": 14840 }, { "epoch": 22.263868065967017, "grad_norm": 0.1805155873298645, "learning_rate": 9.737134115111699e-05, "loss": 0.0164, "step": 14850 }, { "epoch": 22.278860569715143, "grad_norm": 0.3507571518421173, "learning_rate": 9.736604791190323e-05, "loss": 0.017, "step": 14860 }, { "epoch": 22.293853073463268, "grad_norm": 0.3423033058643341, "learning_rate": 9.73607494928097e-05, "loss": 0.0152, "step": 14870 }, { "epoch": 22.308845577211393, "grad_norm": 0.2523728311061859, "learning_rate": 9.735544589441581e-05, "loss": 0.0173, "step": 14880 }, { "epoch": 22.32383808095952, "grad_norm": 0.42415088415145874, "learning_rate": 9.735013711730154e-05, "loss": 0.017, "step": 14890 }, { "epoch": 22.338830584707647, "grad_norm": 0.21548691391944885, "learning_rate": 9.734482316204747e-05, "loss": 0.0154, "step": 14900 }, { "epoch": 22.353823088455773, "grad_norm": 0.2387687861919403, "learning_rate": 9.733950402923473e-05, "loss": 0.0147, "step": 14910 }, { "epoch": 22.368815592203898, "grad_norm": 0.26521044969558716, "learning_rate": 9.7334179719445e-05, "loss": 0.0216, "step": 14920 }, { "epoch": 22.383808095952023, "grad_norm": 0.1964312046766281, "learning_rate": 9.732885023326053e-05, "loss": 0.0183, "step": 14930 }, { "epoch": 22.39880059970015, "grad_norm": 0.376968115568161, "learning_rate": 9.732351557126418e-05, "loss": 0.016, "step": 14940 }, { "epoch": 22.413793103448278, "grad_norm": 0.2991052269935608, "learning_rate": 9.731817573403929e-05, "loss": 0.0177, "step": 14950 }, { "epoch": 22.428785607196403, "grad_norm": 0.20478788018226624, "learning_rate": 9.731283072216985e-05, "loss": 0.013, "step": 14960 }, { "epoch": 22.44377811094453, "grad_norm": 0.24671129882335663, "learning_rate": 9.730748053624039e-05, "loss": 0.012, "step": 14970 }, { "epoch": 22.458770614692654, "grad_norm": 0.2083241492509842, "learning_rate": 9.730212517683598e-05, "loss": 0.0154, "step": 14980 }, { "epoch": 22.47376311844078, "grad_norm": 0.28665676712989807, "learning_rate": 9.729676464454228e-05, "loss": 0.0213, "step": 14990 }, { "epoch": 22.488755622188904, "grad_norm": 0.25702548027038574, "learning_rate": 9.72913989399455e-05, "loss": 0.0172, "step": 15000 }, { "epoch": 22.503748125937033, "grad_norm": 0.31195753812789917, "learning_rate": 9.728602806363242e-05, "loss": 0.0196, "step": 15010 }, { "epoch": 22.51874062968516, "grad_norm": 0.32153546810150146, "learning_rate": 9.728065201619043e-05, "loss": 0.0168, "step": 15020 }, { "epoch": 22.533733133433284, "grad_norm": 0.25255024433135986, "learning_rate": 9.727527079820742e-05, "loss": 0.0232, "step": 15030 }, { "epoch": 22.54872563718141, "grad_norm": 0.3766949474811554, "learning_rate": 9.726988441027186e-05, "loss": 0.0174, "step": 15040 }, { "epoch": 22.563718140929534, "grad_norm": 0.44218969345092773, "learning_rate": 9.726449285297281e-05, "loss": 0.021, "step": 15050 }, { "epoch": 22.57871064467766, "grad_norm": 0.31206220388412476, "learning_rate": 9.72590961268999e-05, "loss": 0.0154, "step": 15060 }, { "epoch": 22.59370314842579, "grad_norm": 0.14271488785743713, "learning_rate": 9.725369423264328e-05, "loss": 0.0157, "step": 15070 }, { "epoch": 22.608695652173914, "grad_norm": 0.37517687678337097, "learning_rate": 9.72482871707937e-05, "loss": 0.0175, "step": 15080 }, { "epoch": 22.62368815592204, "grad_norm": 0.3011433184146881, "learning_rate": 9.724287494194247e-05, "loss": 0.0144, "step": 15090 }, { "epoch": 22.638680659670165, "grad_norm": 0.5353286862373352, "learning_rate": 9.723745754668147e-05, "loss": 0.015, "step": 15100 }, { "epoch": 22.65367316341829, "grad_norm": 0.353189617395401, "learning_rate": 9.723203498560313e-05, "loss": 0.0213, "step": 15110 }, { "epoch": 22.668665667166415, "grad_norm": 0.318867027759552, "learning_rate": 9.722660725930046e-05, "loss": 0.025, "step": 15120 }, { "epoch": 22.683658170914544, "grad_norm": 0.2362293303012848, "learning_rate": 9.722117436836702e-05, "loss": 0.0189, "step": 15130 }, { "epoch": 22.69865067466267, "grad_norm": 0.39110442996025085, "learning_rate": 9.721573631339696e-05, "loss": 0.0215, "step": 15140 }, { "epoch": 22.713643178410795, "grad_norm": 0.35140302777290344, "learning_rate": 9.721029309498494e-05, "loss": 0.0183, "step": 15150 }, { "epoch": 22.72863568215892, "grad_norm": 0.2530318796634674, "learning_rate": 9.720484471372627e-05, "loss": 0.0167, "step": 15160 }, { "epoch": 22.743628185907045, "grad_norm": 0.1474679857492447, "learning_rate": 9.719939117021673e-05, "loss": 0.0182, "step": 15170 }, { "epoch": 22.75862068965517, "grad_norm": 0.3213273584842682, "learning_rate": 9.719393246505275e-05, "loss": 0.018, "step": 15180 }, { "epoch": 22.7736131934033, "grad_norm": 0.27369046211242676, "learning_rate": 9.718846859883128e-05, "loss": 0.0169, "step": 15190 }, { "epoch": 22.788605697151425, "grad_norm": 0.22823749482631683, "learning_rate": 9.718299957214982e-05, "loss": 0.0193, "step": 15200 }, { "epoch": 22.80359820089955, "grad_norm": 0.20014454424381256, "learning_rate": 9.717752538560646e-05, "loss": 0.0153, "step": 15210 }, { "epoch": 22.818590704647676, "grad_norm": 0.21373699605464935, "learning_rate": 9.717204603979986e-05, "loss": 0.0164, "step": 15220 }, { "epoch": 22.8335832083958, "grad_norm": 0.26669418811798096, "learning_rate": 9.716656153532922e-05, "loss": 0.0142, "step": 15230 }, { "epoch": 22.848575712143926, "grad_norm": 0.3015044033527374, "learning_rate": 9.716107187279434e-05, "loss": 0.015, "step": 15240 }, { "epoch": 22.863568215892055, "grad_norm": 0.2930566370487213, "learning_rate": 9.715557705279555e-05, "loss": 0.0203, "step": 15250 }, { "epoch": 22.87856071964018, "grad_norm": 0.24139192700386047, "learning_rate": 9.715007707593372e-05, "loss": 0.0174, "step": 15260 }, { "epoch": 22.893553223388306, "grad_norm": 0.2501487731933594, "learning_rate": 9.714457194281036e-05, "loss": 0.0168, "step": 15270 }, { "epoch": 22.90854572713643, "grad_norm": 0.3240945041179657, "learning_rate": 9.713906165402751e-05, "loss": 0.0156, "step": 15280 }, { "epoch": 22.923538230884557, "grad_norm": 0.31794974207878113, "learning_rate": 9.713354621018774e-05, "loss": 0.0175, "step": 15290 }, { "epoch": 22.938530734632685, "grad_norm": 0.2160859853029251, "learning_rate": 9.712802561189422e-05, "loss": 0.0169, "step": 15300 }, { "epoch": 22.95352323838081, "grad_norm": 0.4225770831108093, "learning_rate": 9.712249985975069e-05, "loss": 0.0163, "step": 15310 }, { "epoch": 22.968515742128936, "grad_norm": 0.3643437325954437, "learning_rate": 9.71169689543614e-05, "loss": 0.0185, "step": 15320 }, { "epoch": 22.98350824587706, "grad_norm": 0.28695228695869446, "learning_rate": 9.711143289633123e-05, "loss": 0.0208, "step": 15330 }, { "epoch": 22.998500749625187, "grad_norm": 0.27116870880126953, "learning_rate": 9.710589168626561e-05, "loss": 0.0172, "step": 15340 }, { "epoch": 23.013493253373312, "grad_norm": 0.3230039179325104, "learning_rate": 9.710034532477048e-05, "loss": 0.0166, "step": 15350 }, { "epoch": 23.02848575712144, "grad_norm": 0.2472277581691742, "learning_rate": 9.709479381245239e-05, "loss": 0.0142, "step": 15360 }, { "epoch": 23.043478260869566, "grad_norm": 0.18367499113082886, "learning_rate": 9.708923714991847e-05, "loss": 0.0136, "step": 15370 }, { "epoch": 23.05847076461769, "grad_norm": 0.3435693681240082, "learning_rate": 9.708367533777638e-05, "loss": 0.0151, "step": 15380 }, { "epoch": 23.073463268365817, "grad_norm": 0.24763941764831543, "learning_rate": 9.707810837663431e-05, "loss": 0.0204, "step": 15390 }, { "epoch": 23.088455772113942, "grad_norm": 0.26465684175491333, "learning_rate": 9.707253626710113e-05, "loss": 0.0189, "step": 15400 }, { "epoch": 23.103448275862068, "grad_norm": 0.19557452201843262, "learning_rate": 9.706695900978613e-05, "loss": 0.0169, "step": 15410 }, { "epoch": 23.118440779610197, "grad_norm": 0.30055102705955505, "learning_rate": 9.706137660529926e-05, "loss": 0.0166, "step": 15420 }, { "epoch": 23.133433283358322, "grad_norm": 0.20977194607257843, "learning_rate": 9.705578905425101e-05, "loss": 0.015, "step": 15430 }, { "epoch": 23.148425787106447, "grad_norm": 0.21004189550876617, "learning_rate": 9.705019635725241e-05, "loss": 0.0167, "step": 15440 }, { "epoch": 23.163418290854572, "grad_norm": 0.22147464752197266, "learning_rate": 9.704459851491508e-05, "loss": 0.0177, "step": 15450 }, { "epoch": 23.178410794602698, "grad_norm": 0.23154623806476593, "learning_rate": 9.703899552785118e-05, "loss": 0.0155, "step": 15460 }, { "epoch": 23.193403298350823, "grad_norm": 0.253502756357193, "learning_rate": 9.703338739667346e-05, "loss": 0.0193, "step": 15470 }, { "epoch": 23.208395802098952, "grad_norm": 0.28143763542175293, "learning_rate": 9.70277741219952e-05, "loss": 0.0144, "step": 15480 }, { "epoch": 23.223388305847077, "grad_norm": 0.29007211327552795, "learning_rate": 9.702215570443027e-05, "loss": 0.0184, "step": 15490 }, { "epoch": 23.238380809595203, "grad_norm": 0.22674347460269928, "learning_rate": 9.701653214459309e-05, "loss": 0.0154, "step": 15500 }, { "epoch": 23.253373313343328, "grad_norm": 0.18144473433494568, "learning_rate": 9.701090344309865e-05, "loss": 0.0198, "step": 15510 }, { "epoch": 23.268365817091453, "grad_norm": 0.20627854764461517, "learning_rate": 9.700526960056247e-05, "loss": 0.018, "step": 15520 }, { "epoch": 23.28335832083958, "grad_norm": 0.34477582573890686, "learning_rate": 9.699963061760068e-05, "loss": 0.0251, "step": 15530 }, { "epoch": 23.298350824587708, "grad_norm": 0.3269825577735901, "learning_rate": 9.699398649482997e-05, "loss": 0.0181, "step": 15540 }, { "epoch": 23.313343328335833, "grad_norm": 0.3286760151386261, "learning_rate": 9.698833723286753e-05, "loss": 0.02, "step": 15550 }, { "epoch": 23.328335832083958, "grad_norm": 0.22898931801319122, "learning_rate": 9.698268283233118e-05, "loss": 0.0166, "step": 15560 }, { "epoch": 23.343328335832084, "grad_norm": 0.25027796626091003, "learning_rate": 9.697702329383929e-05, "loss": 0.0164, "step": 15570 }, { "epoch": 23.35832083958021, "grad_norm": 0.30765438079833984, "learning_rate": 9.697135861801074e-05, "loss": 0.0177, "step": 15580 }, { "epoch": 23.373313343328334, "grad_norm": 0.5155242681503296, "learning_rate": 9.696568880546505e-05, "loss": 0.0148, "step": 15590 }, { "epoch": 23.388305847076463, "grad_norm": 0.3714665472507477, "learning_rate": 9.696001385682223e-05, "loss": 0.016, "step": 15600 }, { "epoch": 23.40329835082459, "grad_norm": 0.2584792971611023, "learning_rate": 9.695433377270291e-05, "loss": 0.0132, "step": 15610 }, { "epoch": 23.418290854572714, "grad_norm": 0.4542728662490845, "learning_rate": 9.694864855372824e-05, "loss": 0.0174, "step": 15620 }, { "epoch": 23.43328335832084, "grad_norm": 0.28771960735321045, "learning_rate": 9.694295820051995e-05, "loss": 0.0154, "step": 15630 }, { "epoch": 23.448275862068964, "grad_norm": 0.314432829618454, "learning_rate": 9.693726271370032e-05, "loss": 0.0185, "step": 15640 }, { "epoch": 23.46326836581709, "grad_norm": 0.4444047808647156, "learning_rate": 9.693156209389221e-05, "loss": 0.0246, "step": 15650 }, { "epoch": 23.47826086956522, "grad_norm": 0.34087318181991577, "learning_rate": 9.692585634171905e-05, "loss": 0.0153, "step": 15660 }, { "epoch": 23.493253373313344, "grad_norm": 0.28885939717292786, "learning_rate": 9.692014545780476e-05, "loss": 0.0232, "step": 15670 }, { "epoch": 23.50824587706147, "grad_norm": 0.36830565333366394, "learning_rate": 9.691442944277393e-05, "loss": 0.0193, "step": 15680 }, { "epoch": 23.523238380809595, "grad_norm": 0.34082475304603577, "learning_rate": 9.690870829725162e-05, "loss": 0.0173, "step": 15690 }, { "epoch": 23.53823088455772, "grad_norm": 0.318684458732605, "learning_rate": 9.69029820218635e-05, "loss": 0.0179, "step": 15700 }, { "epoch": 23.553223388305845, "grad_norm": 0.23913995921611786, "learning_rate": 9.689725061723579e-05, "loss": 0.02, "step": 15710 }, { "epoch": 23.568215892053974, "grad_norm": 0.31614890694618225, "learning_rate": 9.689151408399527e-05, "loss": 0.0192, "step": 15720 }, { "epoch": 23.5832083958021, "grad_norm": 0.26689955592155457, "learning_rate": 9.688577242276924e-05, "loss": 0.0166, "step": 15730 }, { "epoch": 23.598200899550225, "grad_norm": 0.21682462096214294, "learning_rate": 9.688002563418566e-05, "loss": 0.02, "step": 15740 }, { "epoch": 23.61319340329835, "grad_norm": 0.2660944163799286, "learning_rate": 9.687427371887293e-05, "loss": 0.018, "step": 15750 }, { "epoch": 23.628185907046475, "grad_norm": 0.3004331588745117, "learning_rate": 9.686851667746012e-05, "loss": 0.0177, "step": 15760 }, { "epoch": 23.643178410794604, "grad_norm": 0.5085901618003845, "learning_rate": 9.686275451057677e-05, "loss": 0.0205, "step": 15770 }, { "epoch": 23.65817091454273, "grad_norm": 0.31811007857322693, "learning_rate": 9.685698721885308e-05, "loss": 0.0203, "step": 15780 }, { "epoch": 23.673163418290855, "grad_norm": 0.3845685124397278, "learning_rate": 9.68512148029197e-05, "loss": 0.0206, "step": 15790 }, { "epoch": 23.68815592203898, "grad_norm": 0.361156165599823, "learning_rate": 9.684543726340791e-05, "loss": 0.0176, "step": 15800 }, { "epoch": 23.703148425787106, "grad_norm": 0.24667680263519287, "learning_rate": 9.683965460094952e-05, "loss": 0.0172, "step": 15810 }, { "epoch": 23.71814092953523, "grad_norm": 0.224639430642128, "learning_rate": 9.683386681617694e-05, "loss": 0.0204, "step": 15820 }, { "epoch": 23.73313343328336, "grad_norm": 0.1994694620370865, "learning_rate": 9.68280739097231e-05, "loss": 0.017, "step": 15830 }, { "epoch": 23.748125937031485, "grad_norm": 0.21795304119586945, "learning_rate": 9.682227588222148e-05, "loss": 0.0178, "step": 15840 }, { "epoch": 23.76311844077961, "grad_norm": 0.4566425383090973, "learning_rate": 9.681647273430618e-05, "loss": 0.0193, "step": 15850 }, { "epoch": 23.778110944527736, "grad_norm": 0.25690802931785583, "learning_rate": 9.681066446661182e-05, "loss": 0.0145, "step": 15860 }, { "epoch": 23.79310344827586, "grad_norm": 0.39689901471138, "learning_rate": 9.680485107977357e-05, "loss": 0.0173, "step": 15870 }, { "epoch": 23.808095952023987, "grad_norm": 0.2851690649986267, "learning_rate": 9.679903257442716e-05, "loss": 0.0206, "step": 15880 }, { "epoch": 23.823088455772115, "grad_norm": 0.4355381429195404, "learning_rate": 9.679320895120891e-05, "loss": 0.0165, "step": 15890 }, { "epoch": 23.83808095952024, "grad_norm": 0.207406684756279, "learning_rate": 9.67873802107557e-05, "loss": 0.0187, "step": 15900 }, { "epoch": 23.853073463268366, "grad_norm": 0.19680802524089813, "learning_rate": 9.67815463537049e-05, "loss": 0.0193, "step": 15910 }, { "epoch": 23.86806596701649, "grad_norm": 0.3487270772457123, "learning_rate": 9.677570738069457e-05, "loss": 0.017, "step": 15920 }, { "epoch": 23.883058470764617, "grad_norm": 0.2879174053668976, "learning_rate": 9.676986329236318e-05, "loss": 0.0231, "step": 15930 }, { "epoch": 23.898050974512742, "grad_norm": 0.2379566878080368, "learning_rate": 9.676401408934987e-05, "loss": 0.014, "step": 15940 }, { "epoch": 23.91304347826087, "grad_norm": 0.1888645738363266, "learning_rate": 9.675815977229428e-05, "loss": 0.0185, "step": 15950 }, { "epoch": 23.928035982008996, "grad_norm": 0.45383405685424805, "learning_rate": 9.675230034183664e-05, "loss": 0.0209, "step": 15960 }, { "epoch": 23.94302848575712, "grad_norm": 0.19634421169757843, "learning_rate": 9.674643579861773e-05, "loss": 0.0167, "step": 15970 }, { "epoch": 23.958020989505247, "grad_norm": 0.27054744958877563, "learning_rate": 9.674056614327886e-05, "loss": 0.0154, "step": 15980 }, { "epoch": 23.973013493253372, "grad_norm": 0.25521624088287354, "learning_rate": 9.673469137646198e-05, "loss": 0.0175, "step": 15990 }, { "epoch": 23.988005997001498, "grad_norm": 0.26965928077697754, "learning_rate": 9.67288114988095e-05, "loss": 0.0195, "step": 16000 }, { "epoch": 24.002998500749626, "grad_norm": 0.18608985841274261, "learning_rate": 9.672292651096447e-05, "loss": 0.013, "step": 16010 }, { "epoch": 24.017991004497752, "grad_norm": 0.23396731913089752, "learning_rate": 9.671703641357042e-05, "loss": 0.0121, "step": 16020 }, { "epoch": 24.032983508245877, "grad_norm": 0.2008184939622879, "learning_rate": 9.67111412072715e-05, "loss": 0.0152, "step": 16030 }, { "epoch": 24.047976011994002, "grad_norm": 0.21556802093982697, "learning_rate": 9.670524089271242e-05, "loss": 0.0164, "step": 16040 }, { "epoch": 24.062968515742128, "grad_norm": 0.22226805984973907, "learning_rate": 9.669933547053842e-05, "loss": 0.0192, "step": 16050 }, { "epoch": 24.077961019490253, "grad_norm": 0.19826366007328033, "learning_rate": 9.669342494139531e-05, "loss": 0.0153, "step": 16060 }, { "epoch": 24.092953523238382, "grad_norm": 0.26619336009025574, "learning_rate": 9.668750930592943e-05, "loss": 0.0133, "step": 16070 }, { "epoch": 24.107946026986507, "grad_norm": 0.2506621181964874, "learning_rate": 9.668158856478775e-05, "loss": 0.0186, "step": 16080 }, { "epoch": 24.122938530734633, "grad_norm": 0.35555723309516907, "learning_rate": 9.66756627186177e-05, "loss": 0.0204, "step": 16090 }, { "epoch": 24.137931034482758, "grad_norm": 0.3598989248275757, "learning_rate": 9.666973176806737e-05, "loss": 0.0171, "step": 16100 }, { "epoch": 24.152923538230883, "grad_norm": 0.2826334834098816, "learning_rate": 9.666379571378534e-05, "loss": 0.0162, "step": 16110 }, { "epoch": 24.167916041979012, "grad_norm": 0.21515752375125885, "learning_rate": 9.665785455642076e-05, "loss": 0.0171, "step": 16120 }, { "epoch": 24.182908545727138, "grad_norm": 0.21776248514652252, "learning_rate": 9.665190829662337e-05, "loss": 0.0163, "step": 16130 }, { "epoch": 24.197901049475263, "grad_norm": 0.19597242772579193, "learning_rate": 9.664595693504342e-05, "loss": 0.0158, "step": 16140 }, { "epoch": 24.212893553223388, "grad_norm": 0.3022514283657074, "learning_rate": 9.664000047233175e-05, "loss": 0.0176, "step": 16150 }, { "epoch": 24.227886056971514, "grad_norm": 0.3122614324092865, "learning_rate": 9.663403890913976e-05, "loss": 0.0207, "step": 16160 }, { "epoch": 24.24287856071964, "grad_norm": 0.22304651141166687, "learning_rate": 9.662807224611938e-05, "loss": 0.0152, "step": 16170 }, { "epoch": 24.257871064467768, "grad_norm": 0.2598601281642914, "learning_rate": 9.662210048392311e-05, "loss": 0.0213, "step": 16180 }, { "epoch": 24.272863568215893, "grad_norm": 0.23717263340950012, "learning_rate": 9.661612362320405e-05, "loss": 0.0189, "step": 16190 }, { "epoch": 24.28785607196402, "grad_norm": 0.3700011670589447, "learning_rate": 9.661014166461579e-05, "loss": 0.0174, "step": 16200 }, { "epoch": 24.302848575712144, "grad_norm": 0.39524805545806885, "learning_rate": 9.66041546088125e-05, "loss": 0.0145, "step": 16210 }, { "epoch": 24.31784107946027, "grad_norm": 0.2565551698207855, "learning_rate": 9.659816245644895e-05, "loss": 0.0164, "step": 16220 }, { "epoch": 24.332833583208394, "grad_norm": 0.2930096685886383, "learning_rate": 9.65921652081804e-05, "loss": 0.016, "step": 16230 }, { "epoch": 24.347826086956523, "grad_norm": 0.17281733453273773, "learning_rate": 9.658616286466271e-05, "loss": 0.0151, "step": 16240 }, { "epoch": 24.36281859070465, "grad_norm": 0.19418902695178986, "learning_rate": 9.65801554265523e-05, "loss": 0.019, "step": 16250 }, { "epoch": 24.377811094452774, "grad_norm": 0.2729834318161011, "learning_rate": 9.657414289450612e-05, "loss": 0.0181, "step": 16260 }, { "epoch": 24.3928035982009, "grad_norm": 0.19745275378227234, "learning_rate": 9.656812526918171e-05, "loss": 0.0174, "step": 16270 }, { "epoch": 24.407796101949025, "grad_norm": 0.21674062311649323, "learning_rate": 9.656210255123712e-05, "loss": 0.0155, "step": 16280 }, { "epoch": 24.42278860569715, "grad_norm": 0.26436877250671387, "learning_rate": 9.6556074741331e-05, "loss": 0.0186, "step": 16290 }, { "epoch": 24.43778110944528, "grad_norm": 0.2538345754146576, "learning_rate": 9.655004184012256e-05, "loss": 0.0151, "step": 16300 }, { "epoch": 24.452773613193404, "grad_norm": 0.367780476808548, "learning_rate": 9.654400384827152e-05, "loss": 0.0206, "step": 16310 }, { "epoch": 24.46776611694153, "grad_norm": 0.27762919664382935, "learning_rate": 9.653796076643818e-05, "loss": 0.0183, "step": 16320 }, { "epoch": 24.482758620689655, "grad_norm": 0.20597130060195923, "learning_rate": 9.653191259528344e-05, "loss": 0.0145, "step": 16330 }, { "epoch": 24.49775112443778, "grad_norm": 0.202681303024292, "learning_rate": 9.65258593354687e-05, "loss": 0.0152, "step": 16340 }, { "epoch": 24.512743628185905, "grad_norm": 0.21724268794059753, "learning_rate": 9.651980098765591e-05, "loss": 0.0163, "step": 16350 }, { "epoch": 24.527736131934034, "grad_norm": 0.2596499025821686, "learning_rate": 9.651373755250765e-05, "loss": 0.0176, "step": 16360 }, { "epoch": 24.54272863568216, "grad_norm": 0.18383923172950745, "learning_rate": 9.650766903068697e-05, "loss": 0.0162, "step": 16370 }, { "epoch": 24.557721139430285, "grad_norm": 0.3290175497531891, "learning_rate": 9.650159542285753e-05, "loss": 0.0146, "step": 16380 }, { "epoch": 24.57271364317841, "grad_norm": 0.23251283168792725, "learning_rate": 9.649551672968353e-05, "loss": 0.0191, "step": 16390 }, { "epoch": 24.587706146926536, "grad_norm": 0.1931799203157425, "learning_rate": 9.648943295182973e-05, "loss": 0.0174, "step": 16400 }, { "epoch": 24.60269865067466, "grad_norm": 0.19005024433135986, "learning_rate": 9.648334408996144e-05, "loss": 0.0177, "step": 16410 }, { "epoch": 24.61769115442279, "grad_norm": 0.2806815505027771, "learning_rate": 9.647725014474452e-05, "loss": 0.013, "step": 16420 }, { "epoch": 24.632683658170915, "grad_norm": 0.2758657932281494, "learning_rate": 9.64711511168454e-05, "loss": 0.0165, "step": 16430 }, { "epoch": 24.64767616191904, "grad_norm": 0.27268752455711365, "learning_rate": 9.646504700693108e-05, "loss": 0.0166, "step": 16440 }, { "epoch": 24.662668665667166, "grad_norm": 0.35480818152427673, "learning_rate": 9.645893781566907e-05, "loss": 0.0207, "step": 16450 }, { "epoch": 24.67766116941529, "grad_norm": 0.27300652861595154, "learning_rate": 9.645282354372744e-05, "loss": 0.014, "step": 16460 }, { "epoch": 24.69265367316342, "grad_norm": 0.2660861015319824, "learning_rate": 9.644670419177491e-05, "loss": 0.0165, "step": 16470 }, { "epoch": 24.707646176911545, "grad_norm": 0.2382700890302658, "learning_rate": 9.644057976048062e-05, "loss": 0.0163, "step": 16480 }, { "epoch": 24.72263868065967, "grad_norm": 0.23581403493881226, "learning_rate": 9.643445025051435e-05, "loss": 0.0185, "step": 16490 }, { "epoch": 24.737631184407796, "grad_norm": 0.30917081236839294, "learning_rate": 9.642831566254641e-05, "loss": 0.0165, "step": 16500 }, { "epoch": 24.75262368815592, "grad_norm": 0.25301629304885864, "learning_rate": 9.642217599724769e-05, "loss": 0.0143, "step": 16510 }, { "epoch": 24.767616191904047, "grad_norm": 0.2734927833080292, "learning_rate": 9.64160312552896e-05, "loss": 0.0159, "step": 16520 }, { "epoch": 24.782608695652176, "grad_norm": 0.22296151518821716, "learning_rate": 9.64098814373441e-05, "loss": 0.0162, "step": 16530 }, { "epoch": 24.7976011994003, "grad_norm": 0.4266456365585327, "learning_rate": 9.640372654408374e-05, "loss": 0.0149, "step": 16540 }, { "epoch": 24.812593703148426, "grad_norm": 0.18085350096225739, "learning_rate": 9.639756657618162e-05, "loss": 0.0154, "step": 16550 }, { "epoch": 24.82758620689655, "grad_norm": 0.2336369752883911, "learning_rate": 9.639140153431138e-05, "loss": 0.0157, "step": 16560 }, { "epoch": 24.842578710644677, "grad_norm": 0.18631073832511902, "learning_rate": 9.638523141914721e-05, "loss": 0.0156, "step": 16570 }, { "epoch": 24.857571214392802, "grad_norm": 0.18723343312740326, "learning_rate": 9.637905623136388e-05, "loss": 0.0187, "step": 16580 }, { "epoch": 24.87256371814093, "grad_norm": 0.25452277064323425, "learning_rate": 9.637287597163669e-05, "loss": 0.0161, "step": 16590 }, { "epoch": 24.887556221889056, "grad_norm": 0.6754015684127808, "learning_rate": 9.63666906406415e-05, "loss": 0.0168, "step": 16600 }, { "epoch": 24.902548725637182, "grad_norm": 0.3297923803329468, "learning_rate": 9.636050023905473e-05, "loss": 0.0166, "step": 16610 }, { "epoch": 24.917541229385307, "grad_norm": 0.3428608179092407, "learning_rate": 9.635430476755336e-05, "loss": 0.0179, "step": 16620 }, { "epoch": 24.932533733133432, "grad_norm": 0.3172890245914459, "learning_rate": 9.63481042268149e-05, "loss": 0.0175, "step": 16630 }, { "epoch": 24.947526236881558, "grad_norm": 0.35026565194129944, "learning_rate": 9.634189861751745e-05, "loss": 0.0139, "step": 16640 }, { "epoch": 24.962518740629687, "grad_norm": 0.31871381402015686, "learning_rate": 9.633568794033967e-05, "loss": 0.016, "step": 16650 }, { "epoch": 24.977511244377812, "grad_norm": 0.25669366121292114, "learning_rate": 9.63294721959607e-05, "loss": 0.0158, "step": 16660 }, { "epoch": 24.992503748125937, "grad_norm": 0.21319712698459625, "learning_rate": 9.63232513850603e-05, "loss": 0.0123, "step": 16670 }, { "epoch": 25.007496251874063, "grad_norm": 0.25843116641044617, "learning_rate": 9.631702550831878e-05, "loss": 0.0163, "step": 16680 }, { "epoch": 25.022488755622188, "grad_norm": 0.22633597254753113, "learning_rate": 9.631079456641698e-05, "loss": 0.0151, "step": 16690 }, { "epoch": 25.037481259370313, "grad_norm": 0.29395055770874023, "learning_rate": 9.630455856003632e-05, "loss": 0.0151, "step": 16700 }, { "epoch": 25.052473763118442, "grad_norm": 0.32743579149246216, "learning_rate": 9.629831748985876e-05, "loss": 0.0218, "step": 16710 }, { "epoch": 25.067466266866568, "grad_norm": 0.48872438073158264, "learning_rate": 9.629207135656679e-05, "loss": 0.0165, "step": 16720 }, { "epoch": 25.082458770614693, "grad_norm": 0.2936735451221466, "learning_rate": 9.628582016084353e-05, "loss": 0.0157, "step": 16730 }, { "epoch": 25.097451274362818, "grad_norm": 0.31423693895339966, "learning_rate": 9.627956390337254e-05, "loss": 0.0156, "step": 16740 }, { "epoch": 25.112443778110944, "grad_norm": 0.2869270145893097, "learning_rate": 9.627330258483802e-05, "loss": 0.0129, "step": 16750 }, { "epoch": 25.12743628185907, "grad_norm": 0.21500924229621887, "learning_rate": 9.62670362059247e-05, "loss": 0.0176, "step": 16760 }, { "epoch": 25.142428785607198, "grad_norm": 0.3560444414615631, "learning_rate": 9.626076476731786e-05, "loss": 0.0181, "step": 16770 }, { "epoch": 25.157421289355323, "grad_norm": 0.2312375009059906, "learning_rate": 9.625448826970336e-05, "loss": 0.0124, "step": 16780 }, { "epoch": 25.17241379310345, "grad_norm": 0.16860458254814148, "learning_rate": 9.624820671376755e-05, "loss": 0.0149, "step": 16790 }, { "epoch": 25.187406296851574, "grad_norm": 0.3198331892490387, "learning_rate": 9.62419201001974e-05, "loss": 0.0188, "step": 16800 }, { "epoch": 25.2023988005997, "grad_norm": 0.2784753441810608, "learning_rate": 9.623562842968037e-05, "loss": 0.0186, "step": 16810 }, { "epoch": 25.217391304347824, "grad_norm": 0.2226734459400177, "learning_rate": 9.622933170290454e-05, "loss": 0.0148, "step": 16820 }, { "epoch": 25.232383808095953, "grad_norm": 0.3540462553501129, "learning_rate": 9.622302992055849e-05, "loss": 0.0151, "step": 16830 }, { "epoch": 25.24737631184408, "grad_norm": 0.2637062668800354, "learning_rate": 9.62167230833314e-05, "loss": 0.0174, "step": 16840 }, { "epoch": 25.262368815592204, "grad_norm": 0.14033789932727814, "learning_rate": 9.621041119191295e-05, "loss": 0.0143, "step": 16850 }, { "epoch": 25.27736131934033, "grad_norm": 0.31234997510910034, "learning_rate": 9.620409424699342e-05, "loss": 0.0177, "step": 16860 }, { "epoch": 25.292353823088455, "grad_norm": 0.39086228609085083, "learning_rate": 9.619777224926359e-05, "loss": 0.0158, "step": 16870 }, { "epoch": 25.30734632683658, "grad_norm": 0.2822529375553131, "learning_rate": 9.619144519941485e-05, "loss": 0.0178, "step": 16880 }, { "epoch": 25.32233883058471, "grad_norm": 0.32736775279045105, "learning_rate": 9.618511309813912e-05, "loss": 0.0175, "step": 16890 }, { "epoch": 25.337331334332834, "grad_norm": 0.2141961306333542, "learning_rate": 9.617877594612886e-05, "loss": 0.0224, "step": 16900 }, { "epoch": 25.35232383808096, "grad_norm": 0.37148821353912354, "learning_rate": 9.617243374407707e-05, "loss": 0.0189, "step": 16910 }, { "epoch": 25.367316341829085, "grad_norm": 0.3081977069377899, "learning_rate": 9.616608649267736e-05, "loss": 0.0166, "step": 16920 }, { "epoch": 25.38230884557721, "grad_norm": 0.2081092745065689, "learning_rate": 9.615973419262385e-05, "loss": 0.0147, "step": 16930 }, { "epoch": 25.39730134932534, "grad_norm": 0.24659062922000885, "learning_rate": 9.615337684461119e-05, "loss": 0.0177, "step": 16940 }, { "epoch": 25.412293853073464, "grad_norm": 0.1886141449213028, "learning_rate": 9.614701444933465e-05, "loss": 0.0138, "step": 16950 }, { "epoch": 25.42728635682159, "grad_norm": 0.30563870072364807, "learning_rate": 9.614064700748997e-05, "loss": 0.0187, "step": 16960 }, { "epoch": 25.442278860569715, "grad_norm": 0.1978403627872467, "learning_rate": 9.613427451977352e-05, "loss": 0.0196, "step": 16970 }, { "epoch": 25.45727136431784, "grad_norm": 0.2286282479763031, "learning_rate": 9.612789698688216e-05, "loss": 0.0159, "step": 16980 }, { "epoch": 25.472263868065966, "grad_norm": 0.2668491303920746, "learning_rate": 9.612151440951334e-05, "loss": 0.0162, "step": 16990 }, { "epoch": 25.487256371814095, "grad_norm": 0.2694700360298157, "learning_rate": 9.611512678836506e-05, "loss": 0.0156, "step": 17000 }, { "epoch": 25.50224887556222, "grad_norm": 0.2875704765319824, "learning_rate": 9.610873412413584e-05, "loss": 0.0174, "step": 17010 }, { "epoch": 25.517241379310345, "grad_norm": 0.22902347147464752, "learning_rate": 9.610233641752476e-05, "loss": 0.0155, "step": 17020 }, { "epoch": 25.53223388305847, "grad_norm": 0.22966624796390533, "learning_rate": 9.609593366923151e-05, "loss": 0.0141, "step": 17030 }, { "epoch": 25.547226386806596, "grad_norm": 0.3581661581993103, "learning_rate": 9.608952587995625e-05, "loss": 0.0136, "step": 17040 }, { "epoch": 25.56221889055472, "grad_norm": 0.22339680790901184, "learning_rate": 9.608311305039972e-05, "loss": 0.0131, "step": 17050 }, { "epoch": 25.57721139430285, "grad_norm": 0.37441569566726685, "learning_rate": 9.607669518126326e-05, "loss": 0.0167, "step": 17060 }, { "epoch": 25.592203898050975, "grad_norm": 0.39361435174942017, "learning_rate": 9.607027227324866e-05, "loss": 0.0156, "step": 17070 }, { "epoch": 25.6071964017991, "grad_norm": 0.28686392307281494, "learning_rate": 9.606384432705837e-05, "loss": 0.019, "step": 17080 }, { "epoch": 25.622188905547226, "grad_norm": 0.25164762139320374, "learning_rate": 9.60574113433953e-05, "loss": 0.0153, "step": 17090 }, { "epoch": 25.63718140929535, "grad_norm": 0.14113062620162964, "learning_rate": 9.6050973322963e-05, "loss": 0.0175, "step": 17100 }, { "epoch": 25.652173913043477, "grad_norm": 0.35547104477882385, "learning_rate": 9.604453026646547e-05, "loss": 0.0169, "step": 17110 }, { "epoch": 25.667166416791606, "grad_norm": 0.27170172333717346, "learning_rate": 9.603808217460735e-05, "loss": 0.0167, "step": 17120 }, { "epoch": 25.68215892053973, "grad_norm": 0.31726765632629395, "learning_rate": 9.603162904809377e-05, "loss": 0.018, "step": 17130 }, { "epoch": 25.697151424287856, "grad_norm": 0.3590933680534363, "learning_rate": 9.602517088763045e-05, "loss": 0.0231, "step": 17140 }, { "epoch": 25.71214392803598, "grad_norm": 0.23990964889526367, "learning_rate": 9.601870769392365e-05, "loss": 0.0164, "step": 17150 }, { "epoch": 25.727136431784107, "grad_norm": 0.29001113772392273, "learning_rate": 9.601223946768017e-05, "loss": 0.0153, "step": 17160 }, { "epoch": 25.742128935532232, "grad_norm": 0.2032664567232132, "learning_rate": 9.600576620960734e-05, "loss": 0.015, "step": 17170 }, { "epoch": 25.75712143928036, "grad_norm": 0.38009002804756165, "learning_rate": 9.599928792041308e-05, "loss": 0.0141, "step": 17180 }, { "epoch": 25.772113943028486, "grad_norm": 0.4226874113082886, "learning_rate": 9.599280460080587e-05, "loss": 0.0157, "step": 17190 }, { "epoch": 25.787106446776612, "grad_norm": 0.27872294187545776, "learning_rate": 9.59863162514947e-05, "loss": 0.0169, "step": 17200 }, { "epoch": 25.802098950524737, "grad_norm": 0.2554077208042145, "learning_rate": 9.597982287318911e-05, "loss": 0.0179, "step": 17210 }, { "epoch": 25.817091454272862, "grad_norm": 0.31824615597724915, "learning_rate": 9.597332446659923e-05, "loss": 0.0163, "step": 17220 }, { "epoch": 25.832083958020988, "grad_norm": 0.37412434816360474, "learning_rate": 9.59668210324357e-05, "loss": 0.0139, "step": 17230 }, { "epoch": 25.847076461769117, "grad_norm": 0.29070255160331726, "learning_rate": 9.596031257140974e-05, "loss": 0.0157, "step": 17240 }, { "epoch": 25.862068965517242, "grad_norm": 0.33027684688568115, "learning_rate": 9.59537990842331e-05, "loss": 0.0221, "step": 17250 }, { "epoch": 25.877061469265367, "grad_norm": 0.316023588180542, "learning_rate": 9.594728057161806e-05, "loss": 0.0156, "step": 17260 }, { "epoch": 25.892053973013493, "grad_norm": 0.20579221844673157, "learning_rate": 9.594075703427752e-05, "loss": 0.0165, "step": 17270 }, { "epoch": 25.907046476761618, "grad_norm": 0.39505499601364136, "learning_rate": 9.593422847292486e-05, "loss": 0.0143, "step": 17280 }, { "epoch": 25.922038980509747, "grad_norm": 0.26262810826301575, "learning_rate": 9.592769488827402e-05, "loss": 0.0174, "step": 17290 }, { "epoch": 25.937031484257872, "grad_norm": 0.29777035117149353, "learning_rate": 9.592115628103952e-05, "loss": 0.0169, "step": 17300 }, { "epoch": 25.952023988005998, "grad_norm": 0.2727007269859314, "learning_rate": 9.591461265193643e-05, "loss": 0.0162, "step": 17310 }, { "epoch": 25.967016491754123, "grad_norm": 0.39860430359840393, "learning_rate": 9.590806400168032e-05, "loss": 0.0167, "step": 17320 }, { "epoch": 25.982008995502248, "grad_norm": 0.232103630900383, "learning_rate": 9.590151033098735e-05, "loss": 0.0148, "step": 17330 }, { "epoch": 25.997001499250374, "grad_norm": 0.2336079627275467, "learning_rate": 9.589495164057423e-05, "loss": 0.0198, "step": 17340 }, { "epoch": 26.011994002998502, "grad_norm": 0.24314431846141815, "learning_rate": 9.58883879311582e-05, "loss": 0.0165, "step": 17350 }, { "epoch": 26.026986506746628, "grad_norm": 0.20637205243110657, "learning_rate": 9.588181920345705e-05, "loss": 0.0158, "step": 17360 }, { "epoch": 26.041979010494753, "grad_norm": 0.33854639530181885, "learning_rate": 9.587524545818913e-05, "loss": 0.0158, "step": 17370 }, { "epoch": 26.05697151424288, "grad_norm": 0.23306649923324585, "learning_rate": 9.586866669607335e-05, "loss": 0.0127, "step": 17380 }, { "epoch": 26.071964017991004, "grad_norm": 0.12467658519744873, "learning_rate": 9.586208291782915e-05, "loss": 0.0117, "step": 17390 }, { "epoch": 26.08695652173913, "grad_norm": 0.2665659189224243, "learning_rate": 9.58554941241765e-05, "loss": 0.0137, "step": 17400 }, { "epoch": 26.101949025487258, "grad_norm": 0.22792629897594452, "learning_rate": 9.584890031583596e-05, "loss": 0.016, "step": 17410 }, { "epoch": 26.116941529235383, "grad_norm": 0.26000329852104187, "learning_rate": 9.584230149352861e-05, "loss": 0.0183, "step": 17420 }, { "epoch": 26.13193403298351, "grad_norm": 0.177031010389328, "learning_rate": 9.58356976579761e-05, "loss": 0.0153, "step": 17430 }, { "epoch": 26.146926536731634, "grad_norm": 0.2459612488746643, "learning_rate": 9.58290888099006e-05, "loss": 0.0157, "step": 17440 }, { "epoch": 26.16191904047976, "grad_norm": 0.26499441266059875, "learning_rate": 9.582247495002486e-05, "loss": 0.0158, "step": 17450 }, { "epoch": 26.176911544227885, "grad_norm": 0.2685220241546631, "learning_rate": 9.581585607907214e-05, "loss": 0.014, "step": 17460 }, { "epoch": 26.191904047976013, "grad_norm": 0.36087334156036377, "learning_rate": 9.580923219776628e-05, "loss": 0.0179, "step": 17470 }, { "epoch": 26.20689655172414, "grad_norm": 0.27580374479293823, "learning_rate": 9.580260330683167e-05, "loss": 0.0154, "step": 17480 }, { "epoch": 26.221889055472264, "grad_norm": 0.2622172236442566, "learning_rate": 9.579596940699322e-05, "loss": 0.0159, "step": 17490 }, { "epoch": 26.23688155922039, "grad_norm": 0.3389027416706085, "learning_rate": 9.578933049897643e-05, "loss": 0.0139, "step": 17500 }, { "epoch": 26.251874062968515, "grad_norm": 0.1521281749010086, "learning_rate": 9.578268658350728e-05, "loss": 0.0126, "step": 17510 }, { "epoch": 26.26686656671664, "grad_norm": 0.22758585214614868, "learning_rate": 9.577603766131235e-05, "loss": 0.0137, "step": 17520 }, { "epoch": 26.28185907046477, "grad_norm": 0.2101256102323532, "learning_rate": 9.576938373311878e-05, "loss": 0.0129, "step": 17530 }, { "epoch": 26.296851574212894, "grad_norm": 0.3010301887989044, "learning_rate": 9.576272479965421e-05, "loss": 0.0216, "step": 17540 }, { "epoch": 26.31184407796102, "grad_norm": 0.2541378438472748, "learning_rate": 9.575606086164687e-05, "loss": 0.0173, "step": 17550 }, { "epoch": 26.326836581709145, "grad_norm": 0.2357034534215927, "learning_rate": 9.57493919198255e-05, "loss": 0.0144, "step": 17560 }, { "epoch": 26.34182908545727, "grad_norm": 0.3167750835418701, "learning_rate": 9.57427179749194e-05, "loss": 0.0146, "step": 17570 }, { "epoch": 26.356821589205396, "grad_norm": 0.286638081073761, "learning_rate": 9.573603902765846e-05, "loss": 0.0174, "step": 17580 }, { "epoch": 26.371814092953525, "grad_norm": 0.2334517389535904, "learning_rate": 9.572935507877304e-05, "loss": 0.0144, "step": 17590 }, { "epoch": 26.38680659670165, "grad_norm": 0.24350781738758087, "learning_rate": 9.57226661289941e-05, "loss": 0.0113, "step": 17600 }, { "epoch": 26.401799100449775, "grad_norm": 0.23723573982715607, "learning_rate": 9.571597217905315e-05, "loss": 0.0125, "step": 17610 }, { "epoch": 26.4167916041979, "grad_norm": 0.21148446202278137, "learning_rate": 9.57092732296822e-05, "loss": 0.0165, "step": 17620 }, { "epoch": 26.431784107946026, "grad_norm": 0.20931804180145264, "learning_rate": 9.570256928161385e-05, "loss": 0.0138, "step": 17630 }, { "epoch": 26.446776611694155, "grad_norm": 0.33122727274894714, "learning_rate": 9.569586033558126e-05, "loss": 0.0171, "step": 17640 }, { "epoch": 26.46176911544228, "grad_norm": 0.2538944482803345, "learning_rate": 9.568914639231807e-05, "loss": 0.016, "step": 17650 }, { "epoch": 26.476761619190405, "grad_norm": 0.29483720660209656, "learning_rate": 9.568242745255852e-05, "loss": 0.0167, "step": 17660 }, { "epoch": 26.49175412293853, "grad_norm": 0.23687565326690674, "learning_rate": 9.567570351703739e-05, "loss": 0.0223, "step": 17670 }, { "epoch": 26.506746626686656, "grad_norm": 0.18198709189891815, "learning_rate": 9.566897458649001e-05, "loss": 0.0137, "step": 17680 }, { "epoch": 26.52173913043478, "grad_norm": 0.2791576683521271, "learning_rate": 9.566224066165221e-05, "loss": 0.014, "step": 17690 }, { "epoch": 26.53673163418291, "grad_norm": 0.1991434395313263, "learning_rate": 9.565550174326043e-05, "loss": 0.0198, "step": 17700 }, { "epoch": 26.551724137931036, "grad_norm": 0.18631641566753387, "learning_rate": 9.564875783205162e-05, "loss": 0.016, "step": 17710 }, { "epoch": 26.56671664167916, "grad_norm": 0.36200112104415894, "learning_rate": 9.564200892876328e-05, "loss": 0.0147, "step": 17720 }, { "epoch": 26.581709145427286, "grad_norm": 0.28180381655693054, "learning_rate": 9.563525503413348e-05, "loss": 0.0165, "step": 17730 }, { "epoch": 26.59670164917541, "grad_norm": 0.2915526032447815, "learning_rate": 9.562849614890079e-05, "loss": 0.0176, "step": 17740 }, { "epoch": 26.611694152923537, "grad_norm": 0.28993964195251465, "learning_rate": 9.562173227380436e-05, "loss": 0.0159, "step": 17750 }, { "epoch": 26.626686656671666, "grad_norm": 0.28307071328163147, "learning_rate": 9.561496340958389e-05, "loss": 0.0139, "step": 17760 }, { "epoch": 26.64167916041979, "grad_norm": 0.23630495369434357, "learning_rate": 9.560818955697959e-05, "loss": 0.0211, "step": 17770 }, { "epoch": 26.656671664167916, "grad_norm": 0.49825093150138855, "learning_rate": 9.560141071673228e-05, "loss": 0.0163, "step": 17780 }, { "epoch": 26.671664167916042, "grad_norm": 0.1976112425327301, "learning_rate": 9.559462688958323e-05, "loss": 0.016, "step": 17790 }, { "epoch": 26.686656671664167, "grad_norm": 0.29444751143455505, "learning_rate": 9.558783807627434e-05, "loss": 0.0145, "step": 17800 }, { "epoch": 26.701649175412292, "grad_norm": 0.2186099886894226, "learning_rate": 9.558104427754801e-05, "loss": 0.0175, "step": 17810 }, { "epoch": 26.71664167916042, "grad_norm": 0.2812463939189911, "learning_rate": 9.557424549414722e-05, "loss": 0.0147, "step": 17820 }, { "epoch": 26.731634182908547, "grad_norm": 0.2766430974006653, "learning_rate": 9.556744172681546e-05, "loss": 0.0183, "step": 17830 }, { "epoch": 26.746626686656672, "grad_norm": 0.3375700116157532, "learning_rate": 9.556063297629677e-05, "loss": 0.018, "step": 17840 }, { "epoch": 26.761619190404797, "grad_norm": 0.18366053700447083, "learning_rate": 9.555381924333578e-05, "loss": 0.0247, "step": 17850 }, { "epoch": 26.776611694152923, "grad_norm": 0.3786552846431732, "learning_rate": 9.554700052867758e-05, "loss": 0.0174, "step": 17860 }, { "epoch": 26.791604197901048, "grad_norm": 0.22787223756313324, "learning_rate": 9.554017683306789e-05, "loss": 0.0151, "step": 17870 }, { "epoch": 26.806596701649177, "grad_norm": 0.25769081711769104, "learning_rate": 9.553334815725294e-05, "loss": 0.0198, "step": 17880 }, { "epoch": 26.821589205397302, "grad_norm": 0.31787896156311035, "learning_rate": 9.552651450197949e-05, "loss": 0.0188, "step": 17890 }, { "epoch": 26.836581709145428, "grad_norm": 0.2157890796661377, "learning_rate": 9.551967586799486e-05, "loss": 0.0143, "step": 17900 }, { "epoch": 26.851574212893553, "grad_norm": 0.2317812293767929, "learning_rate": 9.551283225604692e-05, "loss": 0.0147, "step": 17910 }, { "epoch": 26.866566716641678, "grad_norm": 0.261169970035553, "learning_rate": 9.550598366688406e-05, "loss": 0.0178, "step": 17920 }, { "epoch": 26.881559220389803, "grad_norm": 0.19480012357234955, "learning_rate": 9.549913010125526e-05, "loss": 0.0148, "step": 17930 }, { "epoch": 26.896551724137932, "grad_norm": 0.28428325057029724, "learning_rate": 9.549227155990999e-05, "loss": 0.0149, "step": 17940 }, { "epoch": 26.911544227886058, "grad_norm": 0.21445241570472717, "learning_rate": 9.548540804359828e-05, "loss": 0.0123, "step": 17950 }, { "epoch": 26.926536731634183, "grad_norm": 0.17504440248012543, "learning_rate": 9.547853955307077e-05, "loss": 0.0139, "step": 17960 }, { "epoch": 26.94152923538231, "grad_norm": 0.3731864094734192, "learning_rate": 9.547166608907853e-05, "loss": 0.0193, "step": 17970 }, { "epoch": 26.956521739130434, "grad_norm": 0.348638653755188, "learning_rate": 9.546478765237326e-05, "loss": 0.017, "step": 17980 }, { "epoch": 26.97151424287856, "grad_norm": 0.35886162519454956, "learning_rate": 9.545790424370715e-05, "loss": 0.0209, "step": 17990 }, { "epoch": 26.986506746626688, "grad_norm": 0.2713591754436493, "learning_rate": 9.5451015863833e-05, "loss": 0.0195, "step": 18000 }, { "epoch": 27.001499250374813, "grad_norm": 0.143751859664917, "learning_rate": 9.544412251350408e-05, "loss": 0.0199, "step": 18010 }, { "epoch": 27.01649175412294, "grad_norm": 0.2598904073238373, "learning_rate": 9.543722419347422e-05, "loss": 0.018, "step": 18020 }, { "epoch": 27.031484257871064, "grad_norm": 0.23424355685710907, "learning_rate": 9.543032090449788e-05, "loss": 0.019, "step": 18030 }, { "epoch": 27.04647676161919, "grad_norm": 0.29150909185409546, "learning_rate": 9.542341264732992e-05, "loss": 0.0143, "step": 18040 }, { "epoch": 27.061469265367315, "grad_norm": 0.31425485014915466, "learning_rate": 9.541649942272585e-05, "loss": 0.022, "step": 18050 }, { "epoch": 27.076461769115443, "grad_norm": 0.2868637144565582, "learning_rate": 9.54095812314417e-05, "loss": 0.0186, "step": 18060 }, { "epoch": 27.09145427286357, "grad_norm": 0.2422032356262207, "learning_rate": 9.540265807423401e-05, "loss": 0.0153, "step": 18070 }, { "epoch": 27.106446776611694, "grad_norm": 0.4035455584526062, "learning_rate": 9.53957299518599e-05, "loss": 0.0153, "step": 18080 }, { "epoch": 27.12143928035982, "grad_norm": 0.2693590521812439, "learning_rate": 9.5388796865077e-05, "loss": 0.0161, "step": 18090 }, { "epoch": 27.136431784107945, "grad_norm": 0.35320621728897095, "learning_rate": 9.538185881464353e-05, "loss": 0.0186, "step": 18100 }, { "epoch": 27.151424287856074, "grad_norm": 0.19450201094150543, "learning_rate": 9.537491580131821e-05, "loss": 0.0167, "step": 18110 }, { "epoch": 27.1664167916042, "grad_norm": 0.21928350627422333, "learning_rate": 9.53679678258603e-05, "loss": 0.0144, "step": 18120 }, { "epoch": 27.181409295352324, "grad_norm": 0.22411389648914337, "learning_rate": 9.536101488902966e-05, "loss": 0.0141, "step": 18130 }, { "epoch": 27.19640179910045, "grad_norm": 0.3054937422275543, "learning_rate": 9.535405699158663e-05, "loss": 0.016, "step": 18140 }, { "epoch": 27.211394302848575, "grad_norm": 0.2137681245803833, "learning_rate": 9.53470941342921e-05, "loss": 0.018, "step": 18150 }, { "epoch": 27.2263868065967, "grad_norm": 0.32138556241989136, "learning_rate": 9.534012631790756e-05, "loss": 0.0162, "step": 18160 }, { "epoch": 27.24137931034483, "grad_norm": 0.2695384919643402, "learning_rate": 9.533315354319494e-05, "loss": 0.0139, "step": 18170 }, { "epoch": 27.256371814092955, "grad_norm": 0.24612028896808624, "learning_rate": 9.532617581091682e-05, "loss": 0.0164, "step": 18180 }, { "epoch": 27.27136431784108, "grad_norm": 0.2575179934501648, "learning_rate": 9.531919312183629e-05, "loss": 0.0179, "step": 18190 }, { "epoch": 27.286356821589205, "grad_norm": 0.20441889762878418, "learning_rate": 9.531220547671688e-05, "loss": 0.014, "step": 18200 }, { "epoch": 27.30134932533733, "grad_norm": 0.25195547938346863, "learning_rate": 9.530521287632285e-05, "loss": 0.0125, "step": 18210 }, { "epoch": 27.316341829085456, "grad_norm": 0.34186193346977234, "learning_rate": 9.529821532141884e-05, "loss": 0.0144, "step": 18220 }, { "epoch": 27.331334332833585, "grad_norm": 0.19388052821159363, "learning_rate": 9.52912128127701e-05, "loss": 0.0155, "step": 18230 }, { "epoch": 27.34632683658171, "grad_norm": 0.14908066391944885, "learning_rate": 9.528420535114244e-05, "loss": 0.019, "step": 18240 }, { "epoch": 27.361319340329835, "grad_norm": 0.27898672223091125, "learning_rate": 9.527719293730215e-05, "loss": 0.0158, "step": 18250 }, { "epoch": 27.37631184407796, "grad_norm": 0.5725167989730835, "learning_rate": 9.527017557201611e-05, "loss": 0.019, "step": 18260 }, { "epoch": 27.391304347826086, "grad_norm": 0.2728968560695648, "learning_rate": 9.526315325605176e-05, "loss": 0.0166, "step": 18270 }, { "epoch": 27.40629685157421, "grad_norm": 0.2821887135505676, "learning_rate": 9.525612599017699e-05, "loss": 0.0156, "step": 18280 }, { "epoch": 27.42128935532234, "grad_norm": 0.367112398147583, "learning_rate": 9.524909377516033e-05, "loss": 0.0151, "step": 18290 }, { "epoch": 27.436281859070466, "grad_norm": 0.19477957487106323, "learning_rate": 9.524205661177081e-05, "loss": 0.0188, "step": 18300 }, { "epoch": 27.45127436281859, "grad_norm": 0.2586708664894104, "learning_rate": 9.523501450077801e-05, "loss": 0.0168, "step": 18310 }, { "epoch": 27.466266866566716, "grad_norm": 0.1993657350540161, "learning_rate": 9.522796744295202e-05, "loss": 0.0138, "step": 18320 }, { "epoch": 27.48125937031484, "grad_norm": 0.29566332697868347, "learning_rate": 9.522091543906352e-05, "loss": 0.0125, "step": 18330 }, { "epoch": 27.496251874062967, "grad_norm": 0.23672866821289062, "learning_rate": 9.521385848988369e-05, "loss": 0.017, "step": 18340 }, { "epoch": 27.511244377811096, "grad_norm": 0.3396475911140442, "learning_rate": 9.520679659618428e-05, "loss": 0.0148, "step": 18350 }, { "epoch": 27.52623688155922, "grad_norm": 0.2678010165691376, "learning_rate": 9.519972975873754e-05, "loss": 0.0135, "step": 18360 }, { "epoch": 27.541229385307346, "grad_norm": 0.2329280972480774, "learning_rate": 9.519265797831633e-05, "loss": 0.0151, "step": 18370 }, { "epoch": 27.55622188905547, "grad_norm": 0.18938995897769928, "learning_rate": 9.518558125569399e-05, "loss": 0.0162, "step": 18380 }, { "epoch": 27.571214392803597, "grad_norm": 0.23345668613910675, "learning_rate": 9.517849959164442e-05, "loss": 0.0174, "step": 18390 }, { "epoch": 27.586206896551722, "grad_norm": 0.35736578702926636, "learning_rate": 9.517141298694205e-05, "loss": 0.02, "step": 18400 }, { "epoch": 27.60119940029985, "grad_norm": 0.31691601872444153, "learning_rate": 9.516432144236188e-05, "loss": 0.0169, "step": 18410 }, { "epoch": 27.616191904047977, "grad_norm": 0.2389357089996338, "learning_rate": 9.515722495867941e-05, "loss": 0.0175, "step": 18420 }, { "epoch": 27.631184407796102, "grad_norm": 0.3690081536769867, "learning_rate": 9.515012353667072e-05, "loss": 0.0133, "step": 18430 }, { "epoch": 27.646176911544227, "grad_norm": 0.40137991309165955, "learning_rate": 9.51430171771124e-05, "loss": 0.0138, "step": 18440 }, { "epoch": 27.661169415292353, "grad_norm": 0.1581800878047943, "learning_rate": 9.513590588078159e-05, "loss": 0.0156, "step": 18450 }, { "epoch": 27.67616191904048, "grad_norm": 0.2513771951198578, "learning_rate": 9.512878964845597e-05, "loss": 0.0159, "step": 18460 }, { "epoch": 27.691154422788607, "grad_norm": 0.3255137503147125, "learning_rate": 9.512166848091377e-05, "loss": 0.0154, "step": 18470 }, { "epoch": 27.706146926536732, "grad_norm": 0.27594268321990967, "learning_rate": 9.511454237893376e-05, "loss": 0.0144, "step": 18480 }, { "epoch": 27.721139430284857, "grad_norm": 0.2595321536064148, "learning_rate": 9.51074113432952e-05, "loss": 0.0178, "step": 18490 }, { "epoch": 27.736131934032983, "grad_norm": 0.3238576650619507, "learning_rate": 9.510027537477797e-05, "loss": 0.0173, "step": 18500 }, { "epoch": 27.751124437781108, "grad_norm": 0.1894141286611557, "learning_rate": 9.509313447416242e-05, "loss": 0.0172, "step": 18510 }, { "epoch": 27.766116941529237, "grad_norm": 0.39427369832992554, "learning_rate": 9.508598864222949e-05, "loss": 0.0176, "step": 18520 }, { "epoch": 27.781109445277362, "grad_norm": 0.31981605291366577, "learning_rate": 9.507883787976062e-05, "loss": 0.0165, "step": 18530 }, { "epoch": 27.796101949025488, "grad_norm": 0.17099714279174805, "learning_rate": 9.507168218753781e-05, "loss": 0.024, "step": 18540 }, { "epoch": 27.811094452773613, "grad_norm": 0.34799253940582275, "learning_rate": 9.506452156634362e-05, "loss": 0.0189, "step": 18550 }, { "epoch": 27.82608695652174, "grad_norm": 0.2394268661737442, "learning_rate": 9.505735601696109e-05, "loss": 0.0194, "step": 18560 }, { "epoch": 27.841079460269864, "grad_norm": 0.1895424872636795, "learning_rate": 9.505018554017385e-05, "loss": 0.0176, "step": 18570 }, { "epoch": 27.856071964017993, "grad_norm": 0.2507805824279785, "learning_rate": 9.504301013676604e-05, "loss": 0.0143, "step": 18580 }, { "epoch": 27.871064467766118, "grad_norm": 0.20449091494083405, "learning_rate": 9.503582980752238e-05, "loss": 0.0181, "step": 18590 }, { "epoch": 27.886056971514243, "grad_norm": 0.311708927154541, "learning_rate": 9.502864455322809e-05, "loss": 0.0171, "step": 18600 }, { "epoch": 27.90104947526237, "grad_norm": 0.2379022240638733, "learning_rate": 9.502145437466891e-05, "loss": 0.017, "step": 18610 }, { "epoch": 27.916041979010494, "grad_norm": 0.21960076689720154, "learning_rate": 9.501425927263116e-05, "loss": 0.0144, "step": 18620 }, { "epoch": 27.93103448275862, "grad_norm": 0.23702161014080048, "learning_rate": 9.500705924790172e-05, "loss": 0.0171, "step": 18630 }, { "epoch": 27.946026986506748, "grad_norm": 0.3499869108200073, "learning_rate": 9.499985430126794e-05, "loss": 0.0128, "step": 18640 }, { "epoch": 27.961019490254873, "grad_norm": 0.2809913158416748, "learning_rate": 9.499264443351775e-05, "loss": 0.0156, "step": 18650 }, { "epoch": 27.976011994003, "grad_norm": 0.214985653758049, "learning_rate": 9.498542964543961e-05, "loss": 0.015, "step": 18660 }, { "epoch": 27.991004497751124, "grad_norm": 0.18161669373512268, "learning_rate": 9.497820993782252e-05, "loss": 0.0184, "step": 18670 }, { "epoch": 28.00599700149925, "grad_norm": 0.22511842846870422, "learning_rate": 9.497098531145601e-05, "loss": 0.0236, "step": 18680 }, { "epoch": 28.020989505247375, "grad_norm": 0.20628708600997925, "learning_rate": 9.496375576713017e-05, "loss": 0.0151, "step": 18690 }, { "epoch": 28.035982008995504, "grad_norm": 0.281920850276947, "learning_rate": 9.49565213056356e-05, "loss": 0.0174, "step": 18700 }, { "epoch": 28.05097451274363, "grad_norm": 0.3313135504722595, "learning_rate": 9.494928192776342e-05, "loss": 0.0174, "step": 18710 }, { "epoch": 28.065967016491754, "grad_norm": 0.3156997263431549, "learning_rate": 9.494203763430538e-05, "loss": 0.0132, "step": 18720 }, { "epoch": 28.08095952023988, "grad_norm": 0.2759210765361786, "learning_rate": 9.493478842605366e-05, "loss": 0.0127, "step": 18730 }, { "epoch": 28.095952023988005, "grad_norm": 0.21781107783317566, "learning_rate": 9.492753430380105e-05, "loss": 0.0154, "step": 18740 }, { "epoch": 28.11094452773613, "grad_norm": 0.3504922688007355, "learning_rate": 9.492027526834083e-05, "loss": 0.0163, "step": 18750 }, { "epoch": 28.12593703148426, "grad_norm": 0.456049382686615, "learning_rate": 9.491301132046684e-05, "loss": 0.0145, "step": 18760 }, { "epoch": 28.140929535232384, "grad_norm": 0.3473648428916931, "learning_rate": 9.490574246097345e-05, "loss": 0.0162, "step": 18770 }, { "epoch": 28.15592203898051, "grad_norm": 0.21006329357624054, "learning_rate": 9.48984686906556e-05, "loss": 0.0131, "step": 18780 }, { "epoch": 28.170914542728635, "grad_norm": 0.2651069462299347, "learning_rate": 9.489119001030871e-05, "loss": 0.0172, "step": 18790 }, { "epoch": 28.18590704647676, "grad_norm": 0.2846440076828003, "learning_rate": 9.488390642072878e-05, "loss": 0.0169, "step": 18800 }, { "epoch": 28.200899550224886, "grad_norm": 0.31189876794815063, "learning_rate": 9.48766179227123e-05, "loss": 0.016, "step": 18810 }, { "epoch": 28.215892053973015, "grad_norm": 0.2955370843410492, "learning_rate": 9.486932451705636e-05, "loss": 0.0148, "step": 18820 }, { "epoch": 28.23088455772114, "grad_norm": 0.1678425818681717, "learning_rate": 9.486202620455857e-05, "loss": 0.0152, "step": 18830 }, { "epoch": 28.245877061469265, "grad_norm": 0.2465660274028778, "learning_rate": 9.485472298601704e-05, "loss": 0.015, "step": 18840 }, { "epoch": 28.26086956521739, "grad_norm": 0.2875027060508728, "learning_rate": 9.484741486223043e-05, "loss": 0.0175, "step": 18850 }, { "epoch": 28.275862068965516, "grad_norm": 0.1809883415699005, "learning_rate": 9.484010183399797e-05, "loss": 0.0133, "step": 18860 }, { "epoch": 28.290854572713645, "grad_norm": 0.3163459599018097, "learning_rate": 9.483278390211938e-05, "loss": 0.0139, "step": 18870 }, { "epoch": 28.30584707646177, "grad_norm": 0.17372839152812958, "learning_rate": 9.482546106739496e-05, "loss": 0.0153, "step": 18880 }, { "epoch": 28.320839580209896, "grad_norm": 0.24919690191745758, "learning_rate": 9.48181333306255e-05, "loss": 0.0156, "step": 18890 }, { "epoch": 28.33583208395802, "grad_norm": 0.2389836311340332, "learning_rate": 9.481080069261237e-05, "loss": 0.013, "step": 18900 }, { "epoch": 28.350824587706146, "grad_norm": 0.2737343907356262, "learning_rate": 9.480346315415745e-05, "loss": 0.0174, "step": 18910 }, { "epoch": 28.36581709145427, "grad_norm": 0.22619594633579254, "learning_rate": 9.479612071606314e-05, "loss": 0.0173, "step": 18920 }, { "epoch": 28.3808095952024, "grad_norm": 0.1923779398202896, "learning_rate": 9.478877337913244e-05, "loss": 0.0201, "step": 18930 }, { "epoch": 28.395802098950526, "grad_norm": 0.3811130225658417, "learning_rate": 9.478142114416881e-05, "loss": 0.0158, "step": 18940 }, { "epoch": 28.41079460269865, "grad_norm": 0.2360740453004837, "learning_rate": 9.47740640119763e-05, "loss": 0.0156, "step": 18950 }, { "epoch": 28.425787106446776, "grad_norm": 0.2593648433685303, "learning_rate": 9.476670198335947e-05, "loss": 0.0156, "step": 18960 }, { "epoch": 28.4407796101949, "grad_norm": 0.31986358761787415, "learning_rate": 9.47593350591234e-05, "loss": 0.0188, "step": 18970 }, { "epoch": 28.455772113943027, "grad_norm": 0.3785470128059387, "learning_rate": 9.475196324007376e-05, "loss": 0.0155, "step": 18980 }, { "epoch": 28.470764617691156, "grad_norm": 0.22158871591091156, "learning_rate": 9.474458652701669e-05, "loss": 0.0144, "step": 18990 }, { "epoch": 28.48575712143928, "grad_norm": 0.20457442104816437, "learning_rate": 9.473720492075892e-05, "loss": 0.0164, "step": 19000 }, { "epoch": 28.500749625187407, "grad_norm": 0.21339185535907745, "learning_rate": 9.472981842210768e-05, "loss": 0.0132, "step": 19010 }, { "epoch": 28.515742128935532, "grad_norm": 0.2080898880958557, "learning_rate": 9.472242703187074e-05, "loss": 0.0167, "step": 19020 }, { "epoch": 28.530734632683657, "grad_norm": 0.208204984664917, "learning_rate": 9.471503075085643e-05, "loss": 0.0162, "step": 19030 }, { "epoch": 28.545727136431783, "grad_norm": 0.3563164174556732, "learning_rate": 9.470762957987359e-05, "loss": 0.0164, "step": 19040 }, { "epoch": 28.56071964017991, "grad_norm": 0.1990954875946045, "learning_rate": 9.470022351973158e-05, "loss": 0.0126, "step": 19050 }, { "epoch": 28.575712143928037, "grad_norm": 0.414098858833313, "learning_rate": 9.469281257124034e-05, "loss": 0.0131, "step": 19060 }, { "epoch": 28.590704647676162, "grad_norm": 0.37009698152542114, "learning_rate": 9.46853967352103e-05, "loss": 0.016, "step": 19070 }, { "epoch": 28.605697151424287, "grad_norm": 0.32325035333633423, "learning_rate": 9.467797601245246e-05, "loss": 0.0182, "step": 19080 }, { "epoch": 28.620689655172413, "grad_norm": 0.36337631940841675, "learning_rate": 9.467055040377834e-05, "loss": 0.0149, "step": 19090 }, { "epoch": 28.635682158920538, "grad_norm": 0.21612800657749176, "learning_rate": 9.466311990999999e-05, "loss": 0.0147, "step": 19100 }, { "epoch": 28.650674662668667, "grad_norm": 0.25336936116218567, "learning_rate": 9.465568453193e-05, "loss": 0.0181, "step": 19110 }, { "epoch": 28.665667166416792, "grad_norm": 0.22389930486679077, "learning_rate": 9.464824427038148e-05, "loss": 0.0141, "step": 19120 }, { "epoch": 28.680659670164918, "grad_norm": 0.3924027383327484, "learning_rate": 9.46407991261681e-05, "loss": 0.0161, "step": 19130 }, { "epoch": 28.695652173913043, "grad_norm": 0.29785454273223877, "learning_rate": 9.463334910010404e-05, "loss": 0.0183, "step": 19140 }, { "epoch": 28.71064467766117, "grad_norm": 0.23324573040008545, "learning_rate": 9.462589419300403e-05, "loss": 0.0149, "step": 19150 }, { "epoch": 28.725637181409294, "grad_norm": 0.34842684864997864, "learning_rate": 9.461843440568333e-05, "loss": 0.0148, "step": 19160 }, { "epoch": 28.740629685157423, "grad_norm": 0.2472863644361496, "learning_rate": 9.461096973895773e-05, "loss": 0.0183, "step": 19170 }, { "epoch": 28.755622188905548, "grad_norm": 0.18401460349559784, "learning_rate": 9.460350019364355e-05, "loss": 0.0138, "step": 19180 }, { "epoch": 28.770614692653673, "grad_norm": 0.17249062657356262, "learning_rate": 9.459602577055764e-05, "loss": 0.0135, "step": 19190 }, { "epoch": 28.7856071964018, "grad_norm": 0.16306817531585693, "learning_rate": 9.45885464705174e-05, "loss": 0.0159, "step": 19200 }, { "epoch": 28.800599700149924, "grad_norm": 0.23125426471233368, "learning_rate": 9.458106229434076e-05, "loss": 0.0104, "step": 19210 }, { "epoch": 28.81559220389805, "grad_norm": 0.32991689443588257, "learning_rate": 9.457357324284617e-05, "loss": 0.0144, "step": 19220 }, { "epoch": 28.830584707646178, "grad_norm": 0.2892981171607971, "learning_rate": 9.456607931685262e-05, "loss": 0.0175, "step": 19230 }, { "epoch": 28.845577211394303, "grad_norm": 0.23903140425682068, "learning_rate": 9.455858051717965e-05, "loss": 0.0143, "step": 19240 }, { "epoch": 28.86056971514243, "grad_norm": 0.2169184684753418, "learning_rate": 9.45510768446473e-05, "loss": 0.0144, "step": 19250 }, { "epoch": 28.875562218890554, "grad_norm": 0.3816428780555725, "learning_rate": 9.454356830007618e-05, "loss": 0.0159, "step": 19260 }, { "epoch": 28.89055472263868, "grad_norm": 0.32295629382133484, "learning_rate": 9.45360548842874e-05, "loss": 0.0156, "step": 19270 }, { "epoch": 28.90554722638681, "grad_norm": 0.2481907606124878, "learning_rate": 9.452853659810261e-05, "loss": 0.0161, "step": 19280 }, { "epoch": 28.920539730134934, "grad_norm": 0.17330127954483032, "learning_rate": 9.452101344234401e-05, "loss": 0.0169, "step": 19290 }, { "epoch": 28.93553223388306, "grad_norm": 0.15486085414886475, "learning_rate": 9.451348541783431e-05, "loss": 0.0153, "step": 19300 }, { "epoch": 28.950524737631184, "grad_norm": 0.4146181344985962, "learning_rate": 9.450595252539678e-05, "loss": 0.0137, "step": 19310 }, { "epoch": 28.96551724137931, "grad_norm": 0.2854381799697876, "learning_rate": 9.449841476585518e-05, "loss": 0.014, "step": 19320 }, { "epoch": 28.980509745127435, "grad_norm": 0.1901637315750122, "learning_rate": 9.449087214003384e-05, "loss": 0.0145, "step": 19330 }, { "epoch": 28.995502248875564, "grad_norm": 0.370717316865921, "learning_rate": 9.448332464875765e-05, "loss": 0.0158, "step": 19340 }, { "epoch": 29.01049475262369, "grad_norm": 0.2381884604692459, "learning_rate": 9.447577229285192e-05, "loss": 0.0139, "step": 19350 }, { "epoch": 29.025487256371814, "grad_norm": 0.3403952717781067, "learning_rate": 9.446821507314261e-05, "loss": 0.0168, "step": 19360 }, { "epoch": 29.04047976011994, "grad_norm": 0.21853236854076385, "learning_rate": 9.446065299045617e-05, "loss": 0.0124, "step": 19370 }, { "epoch": 29.055472263868065, "grad_norm": 0.16855213046073914, "learning_rate": 9.445308604561955e-05, "loss": 0.0139, "step": 19380 }, { "epoch": 29.07046476761619, "grad_norm": 0.2385929822921753, "learning_rate": 9.444551423946028e-05, "loss": 0.0121, "step": 19390 }, { "epoch": 29.08545727136432, "grad_norm": 0.24691292643547058, "learning_rate": 9.443793757280638e-05, "loss": 0.0152, "step": 19400 }, { "epoch": 29.100449775112445, "grad_norm": 0.17007894814014435, "learning_rate": 9.443035604648646e-05, "loss": 0.0158, "step": 19410 }, { "epoch": 29.11544227886057, "grad_norm": 0.239970400929451, "learning_rate": 9.44227696613296e-05, "loss": 0.0155, "step": 19420 }, { "epoch": 29.130434782608695, "grad_norm": 0.1799054890871048, "learning_rate": 9.441517841816542e-05, "loss": 0.0134, "step": 19430 }, { "epoch": 29.14542728635682, "grad_norm": 0.21216215193271637, "learning_rate": 9.440758231782413e-05, "loss": 0.015, "step": 19440 }, { "epoch": 29.160419790104946, "grad_norm": 0.25788936018943787, "learning_rate": 9.439998136113639e-05, "loss": 0.0167, "step": 19450 }, { "epoch": 29.175412293853075, "grad_norm": 0.27357882261276245, "learning_rate": 9.439237554893344e-05, "loss": 0.0122, "step": 19460 }, { "epoch": 29.1904047976012, "grad_norm": 0.19578711688518524, "learning_rate": 9.438476488204705e-05, "loss": 0.0147, "step": 19470 }, { "epoch": 29.205397301349326, "grad_norm": 0.5664684772491455, "learning_rate": 9.43771493613095e-05, "loss": 0.0193, "step": 19480 }, { "epoch": 29.22038980509745, "grad_norm": 0.2273656129837036, "learning_rate": 9.436952898755362e-05, "loss": 0.0178, "step": 19490 }, { "epoch": 29.235382308845576, "grad_norm": 0.20356690883636475, "learning_rate": 9.436190376161276e-05, "loss": 0.0151, "step": 19500 }, { "epoch": 29.2503748125937, "grad_norm": 0.2519472539424896, "learning_rate": 9.43542736843208e-05, "loss": 0.0158, "step": 19510 }, { "epoch": 29.26536731634183, "grad_norm": 0.29709291458129883, "learning_rate": 9.434663875651216e-05, "loss": 0.0172, "step": 19520 }, { "epoch": 29.280359820089956, "grad_norm": 0.2756016254425049, "learning_rate": 9.433899897902177e-05, "loss": 0.0149, "step": 19530 }, { "epoch": 29.29535232383808, "grad_norm": 0.3035442531108856, "learning_rate": 9.433135435268511e-05, "loss": 0.0149, "step": 19540 }, { "epoch": 29.310344827586206, "grad_norm": 0.19090315699577332, "learning_rate": 9.432370487833819e-05, "loss": 0.0158, "step": 19550 }, { "epoch": 29.32533733133433, "grad_norm": 0.2784755825996399, "learning_rate": 9.431605055681756e-05, "loss": 0.0218, "step": 19560 }, { "epoch": 29.340329835082457, "grad_norm": 0.22825181484222412, "learning_rate": 9.430839138896026e-05, "loss": 0.0181, "step": 19570 }, { "epoch": 29.355322338830586, "grad_norm": 0.25627386569976807, "learning_rate": 9.43007273756039e-05, "loss": 0.014, "step": 19580 }, { "epoch": 29.37031484257871, "grad_norm": 0.3844562768936157, "learning_rate": 9.429305851758658e-05, "loss": 0.0149, "step": 19590 }, { "epoch": 29.385307346326837, "grad_norm": 0.24233874678611755, "learning_rate": 9.428538481574699e-05, "loss": 0.015, "step": 19600 }, { "epoch": 29.400299850074962, "grad_norm": 0.22669416666030884, "learning_rate": 9.42777062709243e-05, "loss": 0.0135, "step": 19610 }, { "epoch": 29.415292353823087, "grad_norm": 0.32502511143684387, "learning_rate": 9.427002288395821e-05, "loss": 0.0122, "step": 19620 }, { "epoch": 29.430284857571216, "grad_norm": 0.15465137362480164, "learning_rate": 9.426233465568898e-05, "loss": 0.0148, "step": 19630 }, { "epoch": 29.44527736131934, "grad_norm": 0.25101131200790405, "learning_rate": 9.42546415869574e-05, "loss": 0.0134, "step": 19640 }, { "epoch": 29.460269865067467, "grad_norm": 0.23014716804027557, "learning_rate": 9.424694367860473e-05, "loss": 0.0118, "step": 19650 }, { "epoch": 29.475262368815592, "grad_norm": 0.2823716104030609, "learning_rate": 9.423924093147284e-05, "loss": 0.0134, "step": 19660 }, { "epoch": 29.490254872563717, "grad_norm": 0.21573889255523682, "learning_rate": 9.423153334640407e-05, "loss": 0.0165, "step": 19670 }, { "epoch": 29.505247376311843, "grad_norm": 0.2727203667163849, "learning_rate": 9.42238209242413e-05, "loss": 0.0139, "step": 19680 }, { "epoch": 29.52023988005997, "grad_norm": 0.12736409902572632, "learning_rate": 9.421610366582798e-05, "loss": 0.0126, "step": 19690 }, { "epoch": 29.535232383808097, "grad_norm": 0.18257556855678558, "learning_rate": 9.420838157200803e-05, "loss": 0.0185, "step": 19700 }, { "epoch": 29.550224887556222, "grad_norm": 0.2450694739818573, "learning_rate": 9.420065464362594e-05, "loss": 0.0172, "step": 19710 }, { "epoch": 29.565217391304348, "grad_norm": 0.2921430170536041, "learning_rate": 9.419292288152673e-05, "loss": 0.0159, "step": 19720 }, { "epoch": 29.580209895052473, "grad_norm": 0.2636406719684601, "learning_rate": 9.418518628655588e-05, "loss": 0.0163, "step": 19730 }, { "epoch": 29.5952023988006, "grad_norm": 0.3380866050720215, "learning_rate": 9.417744485955951e-05, "loss": 0.0137, "step": 19740 }, { "epoch": 29.610194902548727, "grad_norm": 0.23915371298789978, "learning_rate": 9.41696986013842e-05, "loss": 0.0189, "step": 19750 }, { "epoch": 29.625187406296853, "grad_norm": 0.280657023191452, "learning_rate": 9.416194751287705e-05, "loss": 0.0171, "step": 19760 }, { "epoch": 29.640179910044978, "grad_norm": 0.2677460312843323, "learning_rate": 9.415419159488572e-05, "loss": 0.0163, "step": 19770 }, { "epoch": 29.655172413793103, "grad_norm": 0.5469359755516052, "learning_rate": 9.414643084825837e-05, "loss": 0.0141, "step": 19780 }, { "epoch": 29.67016491754123, "grad_norm": 0.7974211573600769, "learning_rate": 9.413866527384372e-05, "loss": 0.0207, "step": 19790 }, { "epoch": 29.685157421289354, "grad_norm": 0.27656203508377075, "learning_rate": 9.4130894872491e-05, "loss": 0.0153, "step": 19800 }, { "epoch": 29.700149925037483, "grad_norm": 0.3540825843811035, "learning_rate": 9.412311964504998e-05, "loss": 0.0148, "step": 19810 }, { "epoch": 29.715142428785608, "grad_norm": 0.3274586796760559, "learning_rate": 9.411533959237091e-05, "loss": 0.0157, "step": 19820 }, { "epoch": 29.730134932533733, "grad_norm": 0.3610801696777344, "learning_rate": 9.410755471530464e-05, "loss": 0.0216, "step": 19830 }, { "epoch": 29.74512743628186, "grad_norm": 0.26267102360725403, "learning_rate": 9.40997650147025e-05, "loss": 0.0193, "step": 19840 }, { "epoch": 29.760119940029984, "grad_norm": 0.3192768096923828, "learning_rate": 9.409197049141637e-05, "loss": 0.0165, "step": 19850 }, { "epoch": 29.77511244377811, "grad_norm": 0.2716461420059204, "learning_rate": 9.408417114629863e-05, "loss": 0.0213, "step": 19860 }, { "epoch": 29.79010494752624, "grad_norm": 0.2229275107383728, "learning_rate": 9.40763669802022e-05, "loss": 0.0171, "step": 19870 }, { "epoch": 29.805097451274364, "grad_norm": 0.17609237134456635, "learning_rate": 9.406855799398056e-05, "loss": 0.0178, "step": 19880 }, { "epoch": 29.82008995502249, "grad_norm": 0.18964430689811707, "learning_rate": 9.406074418848767e-05, "loss": 0.0166, "step": 19890 }, { "epoch": 29.835082458770614, "grad_norm": 1.121917486190796, "learning_rate": 9.405292556457805e-05, "loss": 0.0156, "step": 19900 }, { "epoch": 29.85007496251874, "grad_norm": 0.24551701545715332, "learning_rate": 9.404510212310671e-05, "loss": 0.0146, "step": 19910 }, { "epoch": 29.865067466266865, "grad_norm": 0.2454705685377121, "learning_rate": 9.403727386492924e-05, "loss": 0.0137, "step": 19920 }, { "epoch": 29.880059970014994, "grad_norm": 0.25363922119140625, "learning_rate": 9.40294407909017e-05, "loss": 0.0135, "step": 19930 }, { "epoch": 29.89505247376312, "grad_norm": 0.33303219079971313, "learning_rate": 9.40216029018807e-05, "loss": 0.0154, "step": 19940 }, { "epoch": 29.910044977511244, "grad_norm": 0.28461775183677673, "learning_rate": 9.401376019872338e-05, "loss": 0.016, "step": 19950 }, { "epoch": 29.92503748125937, "grad_norm": 0.3151218593120575, "learning_rate": 9.400591268228746e-05, "loss": 0.0141, "step": 19960 }, { "epoch": 29.940029985007495, "grad_norm": 0.23029442131519318, "learning_rate": 9.399806035343106e-05, "loss": 0.0166, "step": 19970 }, { "epoch": 29.955022488755624, "grad_norm": 0.3082249164581299, "learning_rate": 9.399020321301294e-05, "loss": 0.0119, "step": 19980 }, { "epoch": 29.97001499250375, "grad_norm": 0.6488118767738342, "learning_rate": 9.398234126189234e-05, "loss": 0.0149, "step": 19990 }, { "epoch": 29.985007496251875, "grad_norm": 0.38223886489868164, "learning_rate": 9.397447450092902e-05, "loss": 0.0146, "step": 20000 }, { "epoch": 30.0, "grad_norm": 0.2232271432876587, "learning_rate": 9.39666029309833e-05, "loss": 0.0122, "step": 20010 }, { "epoch": 30.014992503748125, "grad_norm": 0.16814270615577698, "learning_rate": 9.395872655291596e-05, "loss": 0.0155, "step": 20020 }, { "epoch": 30.02998500749625, "grad_norm": 0.2320227026939392, "learning_rate": 9.395084536758838e-05, "loss": 0.0155, "step": 20030 }, { "epoch": 30.04497751124438, "grad_norm": 0.19831401109695435, "learning_rate": 9.394295937586243e-05, "loss": 0.0135, "step": 20040 }, { "epoch": 30.059970014992505, "grad_norm": 0.23671407997608185, "learning_rate": 9.393506857860052e-05, "loss": 0.0148, "step": 20050 }, { "epoch": 30.07496251874063, "grad_norm": 0.16802482306957245, "learning_rate": 9.392717297666555e-05, "loss": 0.0169, "step": 20060 }, { "epoch": 30.089955022488756, "grad_norm": 0.3212326765060425, "learning_rate": 9.391927257092101e-05, "loss": 0.0142, "step": 20070 }, { "epoch": 30.10494752623688, "grad_norm": 0.17184066772460938, "learning_rate": 9.391136736223085e-05, "loss": 0.0133, "step": 20080 }, { "epoch": 30.119940029985006, "grad_norm": 0.39205217361450195, "learning_rate": 9.390345735145956e-05, "loss": 0.015, "step": 20090 }, { "epoch": 30.134932533733135, "grad_norm": 0.27692940831184387, "learning_rate": 9.389554253947219e-05, "loss": 0.0149, "step": 20100 }, { "epoch": 30.14992503748126, "grad_norm": 0.27580124139785767, "learning_rate": 9.388762292713428e-05, "loss": 0.0131, "step": 20110 }, { "epoch": 30.164917541229386, "grad_norm": 0.3287576735019684, "learning_rate": 9.38796985153119e-05, "loss": 0.0108, "step": 20120 }, { "epoch": 30.17991004497751, "grad_norm": 0.3141285181045532, "learning_rate": 9.387176930487169e-05, "loss": 0.0148, "step": 20130 }, { "epoch": 30.194902548725636, "grad_norm": 0.38589808344841003, "learning_rate": 9.386383529668072e-05, "loss": 0.0163, "step": 20140 }, { "epoch": 30.20989505247376, "grad_norm": 0.19814063608646393, "learning_rate": 9.385589649160669e-05, "loss": 0.0177, "step": 20150 }, { "epoch": 30.22488755622189, "grad_norm": 0.3905055820941925, "learning_rate": 9.384795289051775e-05, "loss": 0.0178, "step": 20160 }, { "epoch": 30.239880059970016, "grad_norm": 0.25960609316825867, "learning_rate": 9.384000449428261e-05, "loss": 0.0137, "step": 20170 }, { "epoch": 30.25487256371814, "grad_norm": 0.21550627052783966, "learning_rate": 9.383205130377048e-05, "loss": 0.0162, "step": 20180 }, { "epoch": 30.269865067466267, "grad_norm": 0.25606057047843933, "learning_rate": 9.382409331985114e-05, "loss": 0.0145, "step": 20190 }, { "epoch": 30.284857571214392, "grad_norm": 0.28323987126350403, "learning_rate": 9.381613054339482e-05, "loss": 0.0175, "step": 20200 }, { "epoch": 30.299850074962517, "grad_norm": 0.23045532405376434, "learning_rate": 9.380816297527235e-05, "loss": 0.0158, "step": 20210 }, { "epoch": 30.314842578710646, "grad_norm": 0.32412001490592957, "learning_rate": 9.380019061635506e-05, "loss": 0.0142, "step": 20220 }, { "epoch": 30.32983508245877, "grad_norm": 0.29235973954200745, "learning_rate": 9.379221346751474e-05, "loss": 0.016, "step": 20230 }, { "epoch": 30.344827586206897, "grad_norm": 0.4213528335094452, "learning_rate": 9.378423152962382e-05, "loss": 0.0169, "step": 20240 }, { "epoch": 30.359820089955022, "grad_norm": 0.23183973133563995, "learning_rate": 9.377624480355517e-05, "loss": 0.0159, "step": 20250 }, { "epoch": 30.374812593703147, "grad_norm": 0.23353639245033264, "learning_rate": 9.376825329018219e-05, "loss": 0.0141, "step": 20260 }, { "epoch": 30.389805097451273, "grad_norm": 0.23771852254867554, "learning_rate": 9.376025699037884e-05, "loss": 0.0154, "step": 20270 }, { "epoch": 30.4047976011994, "grad_norm": 0.27314722537994385, "learning_rate": 9.37522559050196e-05, "loss": 0.0148, "step": 20280 }, { "epoch": 30.419790104947527, "grad_norm": 0.14226357638835907, "learning_rate": 9.37442500349794e-05, "loss": 0.0107, "step": 20290 }, { "epoch": 30.434782608695652, "grad_norm": 0.17318257689476013, "learning_rate": 9.373623938113381e-05, "loss": 0.0144, "step": 20300 }, { "epoch": 30.449775112443778, "grad_norm": 0.2917690873146057, "learning_rate": 9.372822394435883e-05, "loss": 0.0125, "step": 20310 }, { "epoch": 30.464767616191903, "grad_norm": 0.29010048508644104, "learning_rate": 9.372020372553102e-05, "loss": 0.0149, "step": 20320 }, { "epoch": 30.47976011994003, "grad_norm": 0.24295437335968018, "learning_rate": 9.371217872552746e-05, "loss": 0.013, "step": 20330 }, { "epoch": 30.494752623688157, "grad_norm": 0.28364574909210205, "learning_rate": 9.370414894522576e-05, "loss": 0.0159, "step": 20340 }, { "epoch": 30.509745127436283, "grad_norm": 0.24444831907749176, "learning_rate": 9.369611438550406e-05, "loss": 0.0158, "step": 20350 }, { "epoch": 30.524737631184408, "grad_norm": 0.28877344727516174, "learning_rate": 9.368807504724095e-05, "loss": 0.0147, "step": 20360 }, { "epoch": 30.539730134932533, "grad_norm": 0.28681305050849915, "learning_rate": 9.368003093131565e-05, "loss": 0.0149, "step": 20370 }, { "epoch": 30.55472263868066, "grad_norm": 0.12283913791179657, "learning_rate": 9.367198203860785e-05, "loss": 0.0123, "step": 20380 }, { "epoch": 30.569715142428784, "grad_norm": 0.364122599363327, "learning_rate": 9.366392836999774e-05, "loss": 0.0139, "step": 20390 }, { "epoch": 30.584707646176913, "grad_norm": 0.15671540796756744, "learning_rate": 9.365586992636607e-05, "loss": 0.0104, "step": 20400 }, { "epoch": 30.599700149925038, "grad_norm": 0.28880181908607483, "learning_rate": 9.364780670859412e-05, "loss": 0.0122, "step": 20410 }, { "epoch": 30.614692653673163, "grad_norm": 0.2654559910297394, "learning_rate": 9.363973871756364e-05, "loss": 0.0164, "step": 20420 }, { "epoch": 30.62968515742129, "grad_norm": 0.3332326114177704, "learning_rate": 9.363166595415696e-05, "loss": 0.0162, "step": 20430 }, { "epoch": 30.644677661169414, "grad_norm": 0.31065934896469116, "learning_rate": 9.362358841925686e-05, "loss": 0.0149, "step": 20440 }, { "epoch": 30.659670164917543, "grad_norm": 0.18330857157707214, "learning_rate": 9.361550611374674e-05, "loss": 0.0122, "step": 20450 }, { "epoch": 30.67466266866567, "grad_norm": 0.2290225625038147, "learning_rate": 9.360741903851043e-05, "loss": 0.0147, "step": 20460 }, { "epoch": 30.689655172413794, "grad_norm": 0.16620483994483948, "learning_rate": 9.359932719443236e-05, "loss": 0.015, "step": 20470 }, { "epoch": 30.70464767616192, "grad_norm": 0.18551257252693176, "learning_rate": 9.35912305823974e-05, "loss": 0.0127, "step": 20480 }, { "epoch": 30.719640179910044, "grad_norm": 0.19506807625293732, "learning_rate": 9.358312920329101e-05, "loss": 0.0131, "step": 20490 }, { "epoch": 30.73463268365817, "grad_norm": 0.21485044062137604, "learning_rate": 9.357502305799914e-05, "loss": 0.0139, "step": 20500 }, { "epoch": 30.7496251874063, "grad_norm": 0.3029332160949707, "learning_rate": 9.356691214740824e-05, "loss": 0.0159, "step": 20510 }, { "epoch": 30.764617691154424, "grad_norm": 0.24619899690151215, "learning_rate": 9.355879647240535e-05, "loss": 0.0154, "step": 20520 }, { "epoch": 30.77961019490255, "grad_norm": 0.9591369032859802, "learning_rate": 9.355067603387798e-05, "loss": 0.0156, "step": 20530 }, { "epoch": 30.794602698650674, "grad_norm": 0.24135154485702515, "learning_rate": 9.354255083271412e-05, "loss": 0.0156, "step": 20540 }, { "epoch": 30.8095952023988, "grad_norm": 0.3707614839076996, "learning_rate": 9.353442086980239e-05, "loss": 0.0145, "step": 20550 }, { "epoch": 30.824587706146925, "grad_norm": 0.2067548930644989, "learning_rate": 9.352628614603185e-05, "loss": 0.0127, "step": 20560 }, { "epoch": 30.839580209895054, "grad_norm": 0.1394432634115219, "learning_rate": 9.351814666229209e-05, "loss": 0.012, "step": 20570 }, { "epoch": 30.85457271364318, "grad_norm": 0.282042920589447, "learning_rate": 9.351000241947324e-05, "loss": 0.0185, "step": 20580 }, { "epoch": 30.869565217391305, "grad_norm": 0.28007951378822327, "learning_rate": 9.350185341846594e-05, "loss": 0.0145, "step": 20590 }, { "epoch": 30.88455772113943, "grad_norm": 0.3427363932132721, "learning_rate": 9.349369966016134e-05, "loss": 0.0153, "step": 20600 }, { "epoch": 30.899550224887555, "grad_norm": 0.2905445098876953, "learning_rate": 9.348554114545117e-05, "loss": 0.0145, "step": 20610 }, { "epoch": 30.91454272863568, "grad_norm": 0.2004663497209549, "learning_rate": 9.347737787522758e-05, "loss": 0.0144, "step": 20620 }, { "epoch": 30.92953523238381, "grad_norm": 0.2943611145019531, "learning_rate": 9.346920985038332e-05, "loss": 0.0126, "step": 20630 }, { "epoch": 30.944527736131935, "grad_norm": 0.2049584686756134, "learning_rate": 9.346103707181162e-05, "loss": 0.0143, "step": 20640 }, { "epoch": 30.95952023988006, "grad_norm": 0.32908856868743896, "learning_rate": 9.345285954040626e-05, "loss": 0.0146, "step": 20650 }, { "epoch": 30.974512743628186, "grad_norm": 0.2740759253501892, "learning_rate": 9.34446772570615e-05, "loss": 0.0208, "step": 20660 }, { "epoch": 30.98950524737631, "grad_norm": 0.4523284137248993, "learning_rate": 9.343649022267214e-05, "loss": 0.0169, "step": 20670 }, { "epoch": 31.004497751124436, "grad_norm": 0.2556588053703308, "learning_rate": 9.342829843813353e-05, "loss": 0.0154, "step": 20680 }, { "epoch": 31.019490254872565, "grad_norm": 0.27959778904914856, "learning_rate": 9.342010190434149e-05, "loss": 0.0141, "step": 20690 }, { "epoch": 31.03448275862069, "grad_norm": 0.28039437532424927, "learning_rate": 9.34119006221924e-05, "loss": 0.014, "step": 20700 }, { "epoch": 31.049475262368816, "grad_norm": 0.33223554491996765, "learning_rate": 9.340369459258313e-05, "loss": 0.0149, "step": 20710 }, { "epoch": 31.06446776611694, "grad_norm": 0.3622635006904602, "learning_rate": 9.339548381641106e-05, "loss": 0.017, "step": 20720 }, { "epoch": 31.079460269865066, "grad_norm": 0.2693498134613037, "learning_rate": 9.338726829457413e-05, "loss": 0.0178, "step": 20730 }, { "epoch": 31.09445277361319, "grad_norm": 0.2627866268157959, "learning_rate": 9.337904802797078e-05, "loss": 0.0145, "step": 20740 }, { "epoch": 31.10944527736132, "grad_norm": 0.19844269752502441, "learning_rate": 9.337082301749993e-05, "loss": 0.014, "step": 20750 }, { "epoch": 31.124437781109446, "grad_norm": 0.29066792130470276, "learning_rate": 9.336259326406109e-05, "loss": 0.0123, "step": 20760 }, { "epoch": 31.13943028485757, "grad_norm": 0.24023739993572235, "learning_rate": 9.335435876855427e-05, "loss": 0.0148, "step": 20770 }, { "epoch": 31.154422788605697, "grad_norm": 0.19487126171588898, "learning_rate": 9.334611953187994e-05, "loss": 0.0151, "step": 20780 }, { "epoch": 31.169415292353822, "grad_norm": 0.3417710065841675, "learning_rate": 9.333787555493914e-05, "loss": 0.0153, "step": 20790 }, { "epoch": 31.18440779610195, "grad_norm": 0.2002650499343872, "learning_rate": 9.332962683863345e-05, "loss": 0.0147, "step": 20800 }, { "epoch": 31.199400299850076, "grad_norm": 0.24837848544120789, "learning_rate": 9.332137338386489e-05, "loss": 0.0127, "step": 20810 }, { "epoch": 31.2143928035982, "grad_norm": 0.3051658570766449, "learning_rate": 9.33131151915361e-05, "loss": 0.0155, "step": 20820 }, { "epoch": 31.229385307346327, "grad_norm": 0.24748535454273224, "learning_rate": 9.330485226255012e-05, "loss": 0.0153, "step": 20830 }, { "epoch": 31.244377811094452, "grad_norm": 0.23401746153831482, "learning_rate": 9.329658459781061e-05, "loss": 0.0163, "step": 20840 }, { "epoch": 31.259370314842577, "grad_norm": 0.2458677589893341, "learning_rate": 9.328831219822172e-05, "loss": 0.0158, "step": 20850 }, { "epoch": 31.274362818590706, "grad_norm": 0.31267407536506653, "learning_rate": 9.328003506468808e-05, "loss": 0.0155, "step": 20860 }, { "epoch": 31.28935532233883, "grad_norm": 0.2262892872095108, "learning_rate": 9.327175319811488e-05, "loss": 0.0122, "step": 20870 }, { "epoch": 31.304347826086957, "grad_norm": 0.2664051055908203, "learning_rate": 9.326346659940781e-05, "loss": 0.015, "step": 20880 }, { "epoch": 31.319340329835082, "grad_norm": 0.28827643394470215, "learning_rate": 9.325517526947308e-05, "loss": 0.0122, "step": 20890 }, { "epoch": 31.334332833583208, "grad_norm": 0.2804287374019623, "learning_rate": 9.32468792092174e-05, "loss": 0.0162, "step": 20900 }, { "epoch": 31.349325337331333, "grad_norm": 0.264845609664917, "learning_rate": 9.323857841954803e-05, "loss": 0.0143, "step": 20910 }, { "epoch": 31.364317841079462, "grad_norm": 0.2299884706735611, "learning_rate": 9.323027290137276e-05, "loss": 0.0171, "step": 20920 }, { "epoch": 31.379310344827587, "grad_norm": 0.15119668841362, "learning_rate": 9.322196265559981e-05, "loss": 0.015, "step": 20930 }, { "epoch": 31.394302848575713, "grad_norm": 0.2503393888473511, "learning_rate": 9.321364768313803e-05, "loss": 0.0121, "step": 20940 }, { "epoch": 31.409295352323838, "grad_norm": 0.12440554797649384, "learning_rate": 9.32053279848967e-05, "loss": 0.0131, "step": 20950 }, { "epoch": 31.424287856071963, "grad_norm": 0.29621556401252747, "learning_rate": 9.319700356178567e-05, "loss": 0.0152, "step": 20960 }, { "epoch": 31.43928035982009, "grad_norm": 0.2280733287334442, "learning_rate": 9.318867441471527e-05, "loss": 0.0177, "step": 20970 }, { "epoch": 31.454272863568217, "grad_norm": 0.17591223120689392, "learning_rate": 9.318034054459637e-05, "loss": 0.0143, "step": 20980 }, { "epoch": 31.469265367316343, "grad_norm": 0.3161684274673462, "learning_rate": 9.317200195234034e-05, "loss": 0.0159, "step": 20990 }, { "epoch": 31.484257871064468, "grad_norm": 0.21330860257148743, "learning_rate": 9.316365863885909e-05, "loss": 0.0213, "step": 21000 }, { "epoch": 31.499250374812593, "grad_norm": 0.23898138105869293, "learning_rate": 9.315531060506502e-05, "loss": 0.0126, "step": 21010 }, { "epoch": 31.51424287856072, "grad_norm": 0.24094676971435547, "learning_rate": 9.314695785187108e-05, "loss": 0.0141, "step": 21020 }, { "epoch": 31.529235382308844, "grad_norm": 0.40383297204971313, "learning_rate": 9.313860038019069e-05, "loss": 0.0156, "step": 21030 }, { "epoch": 31.544227886056973, "grad_norm": 0.29007041454315186, "learning_rate": 9.313023819093782e-05, "loss": 0.0131, "step": 21040 }, { "epoch": 31.5592203898051, "grad_norm": 0.2636885643005371, "learning_rate": 9.312187128502695e-05, "loss": 0.0125, "step": 21050 }, { "epoch": 31.574212893553224, "grad_norm": 0.2743169665336609, "learning_rate": 9.311349966337307e-05, "loss": 0.0159, "step": 21060 }, { "epoch": 31.58920539730135, "grad_norm": 0.2732585668563843, "learning_rate": 9.310512332689169e-05, "loss": 0.0146, "step": 21070 }, { "epoch": 31.604197901049474, "grad_norm": 0.2112400233745575, "learning_rate": 9.309674227649883e-05, "loss": 0.0171, "step": 21080 }, { "epoch": 31.6191904047976, "grad_norm": 0.2830733358860016, "learning_rate": 9.308835651311103e-05, "loss": 0.0163, "step": 21090 }, { "epoch": 31.63418290854573, "grad_norm": 0.27816373109817505, "learning_rate": 9.307996603764533e-05, "loss": 0.0153, "step": 21100 }, { "epoch": 31.649175412293854, "grad_norm": 0.3664487898349762, "learning_rate": 9.307157085101932e-05, "loss": 0.0169, "step": 21110 }, { "epoch": 31.66416791604198, "grad_norm": 0.3455115854740143, "learning_rate": 9.306317095415109e-05, "loss": 0.0168, "step": 21120 }, { "epoch": 31.679160419790104, "grad_norm": 0.21494567394256592, "learning_rate": 9.305476634795922e-05, "loss": 0.0117, "step": 21130 }, { "epoch": 31.69415292353823, "grad_norm": 0.2706649601459503, "learning_rate": 9.304635703336284e-05, "loss": 0.0197, "step": 21140 }, { "epoch": 31.709145427286355, "grad_norm": 0.24316807091236115, "learning_rate": 9.303794301128157e-05, "loss": 0.014, "step": 21150 }, { "epoch": 31.724137931034484, "grad_norm": 0.23859646916389465, "learning_rate": 9.302952428263555e-05, "loss": 0.0129, "step": 21160 }, { "epoch": 31.73913043478261, "grad_norm": 0.3559103310108185, "learning_rate": 9.302110084834545e-05, "loss": 0.0143, "step": 21170 }, { "epoch": 31.754122938530735, "grad_norm": 0.2041262984275818, "learning_rate": 9.301267270933245e-05, "loss": 0.0124, "step": 21180 }, { "epoch": 31.76911544227886, "grad_norm": 0.1911216825246811, "learning_rate": 9.300423986651823e-05, "loss": 0.0129, "step": 21190 }, { "epoch": 31.784107946026985, "grad_norm": 0.19302161037921906, "learning_rate": 9.299580232082501e-05, "loss": 0.0164, "step": 21200 }, { "epoch": 31.79910044977511, "grad_norm": 0.20254601538181305, "learning_rate": 9.298736007317547e-05, "loss": 0.0119, "step": 21210 }, { "epoch": 31.81409295352324, "grad_norm": 0.27370303869247437, "learning_rate": 9.297891312449288e-05, "loss": 0.0144, "step": 21220 }, { "epoch": 31.829085457271365, "grad_norm": 0.18020716309547424, "learning_rate": 9.297046147570094e-05, "loss": 0.0131, "step": 21230 }, { "epoch": 31.84407796101949, "grad_norm": 0.3146991431713104, "learning_rate": 9.296200512772396e-05, "loss": 0.0155, "step": 21240 }, { "epoch": 31.859070464767616, "grad_norm": 0.18307504057884216, "learning_rate": 9.295354408148668e-05, "loss": 0.014, "step": 21250 }, { "epoch": 31.87406296851574, "grad_norm": 0.4450148642063141, "learning_rate": 9.294507833791441e-05, "loss": 0.0161, "step": 21260 }, { "epoch": 31.88905547226387, "grad_norm": 0.36209481954574585, "learning_rate": 9.293660789793295e-05, "loss": 0.0178, "step": 21270 }, { "epoch": 31.904047976011995, "grad_norm": 0.174624964594841, "learning_rate": 9.292813276246858e-05, "loss": 0.0197, "step": 21280 }, { "epoch": 31.91904047976012, "grad_norm": 0.21671070158481598, "learning_rate": 9.291965293244816e-05, "loss": 0.0133, "step": 21290 }, { "epoch": 31.934032983508246, "grad_norm": 0.30515891313552856, "learning_rate": 9.291116840879904e-05, "loss": 0.0173, "step": 21300 }, { "epoch": 31.94902548725637, "grad_norm": 0.20120038092136383, "learning_rate": 9.290267919244904e-05, "loss": 0.0134, "step": 21310 }, { "epoch": 31.964017991004496, "grad_norm": 0.3249635398387909, "learning_rate": 9.289418528432655e-05, "loss": 0.0137, "step": 21320 }, { "epoch": 31.979010494752625, "grad_norm": 0.26615655422210693, "learning_rate": 9.288568668536045e-05, "loss": 0.0123, "step": 21330 }, { "epoch": 31.99400299850075, "grad_norm": 0.1654936671257019, "learning_rate": 9.287718339648013e-05, "loss": 0.0133, "step": 21340 }, { "epoch": 32.00899550224887, "grad_norm": 0.27006128430366516, "learning_rate": 9.28686754186155e-05, "loss": 0.0137, "step": 21350 }, { "epoch": 32.023988005997005, "grad_norm": 0.3023955225944519, "learning_rate": 9.286016275269698e-05, "loss": 0.0175, "step": 21360 }, { "epoch": 32.03898050974513, "grad_norm": 0.3322297930717468, "learning_rate": 9.285164539965551e-05, "loss": 0.0157, "step": 21370 }, { "epoch": 32.053973013493255, "grad_norm": 0.24985229969024658, "learning_rate": 9.284312336042251e-05, "loss": 0.0133, "step": 21380 }, { "epoch": 32.06896551724138, "grad_norm": 0.19389529526233673, "learning_rate": 9.283459663592996e-05, "loss": 0.0129, "step": 21390 }, { "epoch": 32.083958020989506, "grad_norm": 1.1417351961135864, "learning_rate": 9.282606522711033e-05, "loss": 0.019, "step": 21400 }, { "epoch": 32.09895052473763, "grad_norm": 0.2556653618812561, "learning_rate": 9.281752913489657e-05, "loss": 0.0144, "step": 21410 }, { "epoch": 32.11394302848576, "grad_norm": 0.2953743040561676, "learning_rate": 9.280898836022222e-05, "loss": 0.0153, "step": 21420 }, { "epoch": 32.12893553223388, "grad_norm": 0.4186094105243683, "learning_rate": 9.280044290402126e-05, "loss": 0.0157, "step": 21430 }, { "epoch": 32.14392803598201, "grad_norm": 0.20383980870246887, "learning_rate": 9.279189276722821e-05, "loss": 0.0157, "step": 21440 }, { "epoch": 32.15892053973013, "grad_norm": 0.22407802939414978, "learning_rate": 9.278333795077812e-05, "loss": 0.0182, "step": 21450 }, { "epoch": 32.17391304347826, "grad_norm": 0.25097256898880005, "learning_rate": 9.27747784556065e-05, "loss": 0.0153, "step": 21460 }, { "epoch": 32.18890554722638, "grad_norm": 0.25974467396736145, "learning_rate": 9.276621428264942e-05, "loss": 0.0154, "step": 21470 }, { "epoch": 32.203898050974516, "grad_norm": 0.46097224950790405, "learning_rate": 9.275764543284345e-05, "loss": 0.016, "step": 21480 }, { "epoch": 32.21889055472264, "grad_norm": 0.28840893507003784, "learning_rate": 9.274907190712566e-05, "loss": 0.0162, "step": 21490 }, { "epoch": 32.23388305847077, "grad_norm": 0.31512391567230225, "learning_rate": 9.274049370643363e-05, "loss": 0.0165, "step": 21500 }, { "epoch": 32.24887556221889, "grad_norm": 0.199310302734375, "learning_rate": 9.273191083170547e-05, "loss": 0.0133, "step": 21510 }, { "epoch": 32.26386806596702, "grad_norm": 0.3926694691181183, "learning_rate": 9.27233232838798e-05, "loss": 0.017, "step": 21520 }, { "epoch": 32.27886056971514, "grad_norm": 0.39207908511161804, "learning_rate": 9.27147310638957e-05, "loss": 0.0114, "step": 21530 }, { "epoch": 32.29385307346327, "grad_norm": 0.25025320053100586, "learning_rate": 9.270613417269286e-05, "loss": 0.0152, "step": 21540 }, { "epoch": 32.30884557721139, "grad_norm": 0.2149893045425415, "learning_rate": 9.269753261121138e-05, "loss": 0.0113, "step": 21550 }, { "epoch": 32.32383808095952, "grad_norm": 0.21415743231773376, "learning_rate": 9.268892638039194e-05, "loss": 0.013, "step": 21560 }, { "epoch": 32.338830584707644, "grad_norm": 0.35327771306037903, "learning_rate": 9.268031548117569e-05, "loss": 0.014, "step": 21570 }, { "epoch": 32.35382308845577, "grad_norm": 0.2793293595314026, "learning_rate": 9.26716999145043e-05, "loss": 0.0153, "step": 21580 }, { "epoch": 32.3688155922039, "grad_norm": 0.2118748426437378, "learning_rate": 9.266307968131998e-05, "loss": 0.0159, "step": 21590 }, { "epoch": 32.38380809595203, "grad_norm": 0.3700208365917206, "learning_rate": 9.26544547825654e-05, "loss": 0.0153, "step": 21600 }, { "epoch": 32.39880059970015, "grad_norm": 0.2980888783931732, "learning_rate": 9.264582521918376e-05, "loss": 0.014, "step": 21610 }, { "epoch": 32.41379310344828, "grad_norm": 0.2776355743408203, "learning_rate": 9.263719099211881e-05, "loss": 0.0145, "step": 21620 }, { "epoch": 32.4287856071964, "grad_norm": 0.30916064977645874, "learning_rate": 9.262855210231476e-05, "loss": 0.0169, "step": 21630 }, { "epoch": 32.44377811094453, "grad_norm": 0.25076714158058167, "learning_rate": 9.261990855071633e-05, "loss": 0.011, "step": 21640 }, { "epoch": 32.458770614692654, "grad_norm": 0.21997793018817902, "learning_rate": 9.261126033826878e-05, "loss": 0.0147, "step": 21650 }, { "epoch": 32.47376311844078, "grad_norm": 0.19300848245620728, "learning_rate": 9.260260746591786e-05, "loss": 0.0131, "step": 21660 }, { "epoch": 32.488755622188904, "grad_norm": 0.3270876109600067, "learning_rate": 9.259394993460985e-05, "loss": 0.0122, "step": 21670 }, { "epoch": 32.50374812593703, "grad_norm": 0.2583938539028168, "learning_rate": 9.258528774529151e-05, "loss": 0.0153, "step": 21680 }, { "epoch": 32.518740629685155, "grad_norm": 0.22532446682453156, "learning_rate": 9.257662089891013e-05, "loss": 0.0142, "step": 21690 }, { "epoch": 32.53373313343328, "grad_norm": 0.24661293625831604, "learning_rate": 9.25679493964135e-05, "loss": 0.0155, "step": 21700 }, { "epoch": 32.54872563718141, "grad_norm": 0.24356015026569366, "learning_rate": 9.255927323874994e-05, "loss": 0.0152, "step": 21710 }, { "epoch": 32.56371814092954, "grad_norm": 0.2683451771736145, "learning_rate": 9.255059242686822e-05, "loss": 0.0136, "step": 21720 }, { "epoch": 32.57871064467766, "grad_norm": 0.2564743459224701, "learning_rate": 9.254190696171769e-05, "loss": 0.0136, "step": 21730 }, { "epoch": 32.59370314842579, "grad_norm": 0.2508116066455841, "learning_rate": 9.25332168442482e-05, "loss": 0.0149, "step": 21740 }, { "epoch": 32.608695652173914, "grad_norm": 0.32268592715263367, "learning_rate": 9.252452207541004e-05, "loss": 0.0139, "step": 21750 }, { "epoch": 32.62368815592204, "grad_norm": 0.25752389430999756, "learning_rate": 9.251582265615409e-05, "loss": 0.0135, "step": 21760 }, { "epoch": 32.638680659670165, "grad_norm": 0.28622370958328247, "learning_rate": 9.250711858743169e-05, "loss": 0.0153, "step": 21770 }, { "epoch": 32.65367316341829, "grad_norm": 0.214075967669487, "learning_rate": 9.24984098701947e-05, "loss": 0.0132, "step": 21780 }, { "epoch": 32.668665667166415, "grad_norm": 0.20579813420772552, "learning_rate": 9.248969650539552e-05, "loss": 0.0142, "step": 21790 }, { "epoch": 32.68365817091454, "grad_norm": 0.2582528293132782, "learning_rate": 9.2480978493987e-05, "loss": 0.0143, "step": 21800 }, { "epoch": 32.698650674662666, "grad_norm": 0.30952218174934387, "learning_rate": 9.247225583692256e-05, "loss": 0.0135, "step": 21810 }, { "epoch": 32.71364317841079, "grad_norm": 0.16866524517536163, "learning_rate": 9.246352853515607e-05, "loss": 0.0165, "step": 21820 }, { "epoch": 32.728635682158924, "grad_norm": 0.18141545355319977, "learning_rate": 9.245479658964194e-05, "loss": 0.0139, "step": 21830 }, { "epoch": 32.74362818590705, "grad_norm": 0.2508896589279175, "learning_rate": 9.244606000133507e-05, "loss": 0.0134, "step": 21840 }, { "epoch": 32.758620689655174, "grad_norm": 0.29886677861213684, "learning_rate": 9.24373187711909e-05, "loss": 0.0156, "step": 21850 }, { "epoch": 32.7736131934033, "grad_norm": 0.2609637677669525, "learning_rate": 9.242857290016537e-05, "loss": 0.0193, "step": 21860 }, { "epoch": 32.788605697151425, "grad_norm": 0.22851724922657013, "learning_rate": 9.241982238921488e-05, "loss": 0.0154, "step": 21870 }, { "epoch": 32.80359820089955, "grad_norm": 0.3232591152191162, "learning_rate": 9.24110672392964e-05, "loss": 0.0148, "step": 21880 }, { "epoch": 32.818590704647676, "grad_norm": 0.3157676160335541, "learning_rate": 9.240230745136737e-05, "loss": 0.0146, "step": 21890 }, { "epoch": 32.8335832083958, "grad_norm": 0.3187699615955353, "learning_rate": 9.239354302638575e-05, "loss": 0.014, "step": 21900 }, { "epoch": 32.848575712143926, "grad_norm": 0.42403683066368103, "learning_rate": 9.238477396531e-05, "loss": 0.0134, "step": 21910 }, { "epoch": 32.86356821589205, "grad_norm": 0.20912344753742218, "learning_rate": 9.23760002690991e-05, "loss": 0.0133, "step": 21920 }, { "epoch": 32.87856071964018, "grad_norm": 0.29506736993789673, "learning_rate": 9.236722193871252e-05, "loss": 0.0164, "step": 21930 }, { "epoch": 32.89355322338831, "grad_norm": 0.3282971978187561, "learning_rate": 9.235843897511023e-05, "loss": 0.0193, "step": 21940 }, { "epoch": 32.908545727136435, "grad_norm": 0.18273265659809113, "learning_rate": 9.234965137925276e-05, "loss": 0.0128, "step": 21950 }, { "epoch": 32.92353823088456, "grad_norm": 0.26477280259132385, "learning_rate": 9.234085915210108e-05, "loss": 0.0137, "step": 21960 }, { "epoch": 32.938530734632685, "grad_norm": 0.29100003838539124, "learning_rate": 9.23320622946167e-05, "loss": 0.0105, "step": 21970 }, { "epoch": 32.95352323838081, "grad_norm": 0.1901889592409134, "learning_rate": 9.232326080776163e-05, "loss": 0.0156, "step": 21980 }, { "epoch": 32.968515742128936, "grad_norm": 0.18862409889698029, "learning_rate": 9.23144546924984e-05, "loss": 0.0196, "step": 21990 }, { "epoch": 32.98350824587706, "grad_norm": 0.2167569100856781, "learning_rate": 9.230564394979e-05, "loss": 0.0131, "step": 22000 }, { "epoch": 32.99850074962519, "grad_norm": 0.1820726990699768, "learning_rate": 9.22968285806e-05, "loss": 0.0114, "step": 22010 }, { "epoch": 33.01349325337331, "grad_norm": 0.2666928172111511, "learning_rate": 9.228800858589242e-05, "loss": 0.0144, "step": 22020 }, { "epoch": 33.02848575712144, "grad_norm": 0.2403814047574997, "learning_rate": 9.227918396663179e-05, "loss": 0.0114, "step": 22030 }, { "epoch": 33.04347826086956, "grad_norm": 0.15739773213863373, "learning_rate": 9.227035472378319e-05, "loss": 0.0122, "step": 22040 }, { "epoch": 33.05847076461769, "grad_norm": 0.23782110214233398, "learning_rate": 9.226152085831213e-05, "loss": 0.0138, "step": 22050 }, { "epoch": 33.07346326836582, "grad_norm": 0.4731338322162628, "learning_rate": 9.22526823711847e-05, "loss": 0.0133, "step": 22060 }, { "epoch": 33.088455772113946, "grad_norm": 0.22667983174324036, "learning_rate": 9.224383926336745e-05, "loss": 0.0116, "step": 22070 }, { "epoch": 33.10344827586207, "grad_norm": 0.36452603340148926, "learning_rate": 9.223499153582744e-05, "loss": 0.0157, "step": 22080 }, { "epoch": 33.1184407796102, "grad_norm": 0.20756281912326813, "learning_rate": 9.222613918953226e-05, "loss": 0.0141, "step": 22090 }, { "epoch": 33.13343328335832, "grad_norm": 0.30587393045425415, "learning_rate": 9.221728222544999e-05, "loss": 0.0145, "step": 22100 }, { "epoch": 33.14842578710645, "grad_norm": 0.40793734788894653, "learning_rate": 9.22084206445492e-05, "loss": 0.0153, "step": 22110 }, { "epoch": 33.16341829085457, "grad_norm": 0.33670860528945923, "learning_rate": 9.2199554447799e-05, "loss": 0.0178, "step": 22120 }, { "epoch": 33.1784107946027, "grad_norm": 0.22486726939678192, "learning_rate": 9.219068363616897e-05, "loss": 0.0139, "step": 22130 }, { "epoch": 33.19340329835082, "grad_norm": 0.29524871706962585, "learning_rate": 9.218180821062919e-05, "loss": 0.0125, "step": 22140 }, { "epoch": 33.20839580209895, "grad_norm": 0.3754793405532837, "learning_rate": 9.21729281721503e-05, "loss": 0.0151, "step": 22150 }, { "epoch": 33.223388305847074, "grad_norm": 0.39471906423568726, "learning_rate": 9.216404352170339e-05, "loss": 0.0139, "step": 22160 }, { "epoch": 33.2383808095952, "grad_norm": 0.3987620174884796, "learning_rate": 9.215515426026007e-05, "loss": 0.014, "step": 22170 }, { "epoch": 33.25337331334333, "grad_norm": 0.18401968479156494, "learning_rate": 9.214626038879246e-05, "loss": 0.0127, "step": 22180 }, { "epoch": 33.26836581709146, "grad_norm": 0.2150288075208664, "learning_rate": 9.21373619082732e-05, "loss": 0.0149, "step": 22190 }, { "epoch": 33.28335832083958, "grad_norm": 0.2796476185321808, "learning_rate": 9.212845881967535e-05, "loss": 0.0107, "step": 22200 }, { "epoch": 33.29835082458771, "grad_norm": 0.23591002821922302, "learning_rate": 9.211955112397262e-05, "loss": 0.0137, "step": 22210 }, { "epoch": 33.31334332833583, "grad_norm": 0.3122240900993347, "learning_rate": 9.211063882213909e-05, "loss": 0.0148, "step": 22220 }, { "epoch": 33.32833583208396, "grad_norm": 0.25683730840682983, "learning_rate": 9.210172191514942e-05, "loss": 0.0152, "step": 22230 }, { "epoch": 33.343328335832084, "grad_norm": 0.2510616183280945, "learning_rate": 9.209280040397874e-05, "loss": 0.0124, "step": 22240 }, { "epoch": 33.35832083958021, "grad_norm": 0.20770539343357086, "learning_rate": 9.208387428960268e-05, "loss": 0.0174, "step": 22250 }, { "epoch": 33.373313343328334, "grad_norm": 0.48765867948532104, "learning_rate": 9.20749435729974e-05, "loss": 0.0174, "step": 22260 }, { "epoch": 33.38830584707646, "grad_norm": 0.2565407454967499, "learning_rate": 9.206600825513957e-05, "loss": 0.0109, "step": 22270 }, { "epoch": 33.403298350824585, "grad_norm": 0.22248028218746185, "learning_rate": 9.20570683370063e-05, "loss": 0.0119, "step": 22280 }, { "epoch": 33.41829085457271, "grad_norm": 0.19186460971832275, "learning_rate": 9.204812381957528e-05, "loss": 0.0109, "step": 22290 }, { "epoch": 33.43328335832084, "grad_norm": 0.15625715255737305, "learning_rate": 9.203917470382465e-05, "loss": 0.0103, "step": 22300 }, { "epoch": 33.44827586206897, "grad_norm": 0.2590790092945099, "learning_rate": 9.203022099073309e-05, "loss": 0.0116, "step": 22310 }, { "epoch": 33.46326836581709, "grad_norm": 0.31984224915504456, "learning_rate": 9.202126268127976e-05, "loss": 0.0137, "step": 22320 }, { "epoch": 33.47826086956522, "grad_norm": 0.2862773835659027, "learning_rate": 9.20122997764443e-05, "loss": 0.015, "step": 22330 }, { "epoch": 33.493253373313344, "grad_norm": 0.14361536502838135, "learning_rate": 9.200333227720692e-05, "loss": 0.0146, "step": 22340 }, { "epoch": 33.50824587706147, "grad_norm": 0.24370957911014557, "learning_rate": 9.199436018454826e-05, "loss": 0.0152, "step": 22350 }, { "epoch": 33.523238380809595, "grad_norm": 0.24788504838943481, "learning_rate": 9.198538349944952e-05, "loss": 0.0129, "step": 22360 }, { "epoch": 33.53823088455772, "grad_norm": 0.3455914855003357, "learning_rate": 9.197640222289234e-05, "loss": 0.0157, "step": 22370 }, { "epoch": 33.553223388305845, "grad_norm": 0.40168440341949463, "learning_rate": 9.196741635585895e-05, "loss": 0.0281, "step": 22380 }, { "epoch": 33.56821589205397, "grad_norm": 0.31366434693336487, "learning_rate": 9.195842589933199e-05, "loss": 0.0164, "step": 22390 }, { "epoch": 33.583208395802096, "grad_norm": 0.2961229681968689, "learning_rate": 9.194943085429466e-05, "loss": 0.0186, "step": 22400 }, { "epoch": 33.59820089955023, "grad_norm": 0.2654397785663605, "learning_rate": 9.194043122173065e-05, "loss": 0.0164, "step": 22410 }, { "epoch": 33.613193403298354, "grad_norm": 0.2985329329967499, "learning_rate": 9.193142700262413e-05, "loss": 0.0174, "step": 22420 }, { "epoch": 33.62818590704648, "grad_norm": 0.2916295528411865, "learning_rate": 9.192241819795979e-05, "loss": 0.0138, "step": 22430 }, { "epoch": 33.643178410794604, "grad_norm": 0.37119612097740173, "learning_rate": 9.191340480872284e-05, "loss": 0.014, "step": 22440 }, { "epoch": 33.65817091454273, "grad_norm": 0.32538801431655884, "learning_rate": 9.190438683589895e-05, "loss": 0.0152, "step": 22450 }, { "epoch": 33.673163418290855, "grad_norm": 0.31000784039497375, "learning_rate": 9.189536428047432e-05, "loss": 0.0169, "step": 22460 }, { "epoch": 33.68815592203898, "grad_norm": 0.2422192096710205, "learning_rate": 9.188633714343564e-05, "loss": 0.0164, "step": 22470 }, { "epoch": 33.703148425787106, "grad_norm": 0.3004416525363922, "learning_rate": 9.18773054257701e-05, "loss": 0.0148, "step": 22480 }, { "epoch": 33.71814092953523, "grad_norm": 0.23395416140556335, "learning_rate": 9.18682691284654e-05, "loss": 0.0142, "step": 22490 }, { "epoch": 33.733133433283356, "grad_norm": 0.28191035985946655, "learning_rate": 9.185922825250974e-05, "loss": 0.0159, "step": 22500 }, { "epoch": 33.74812593703148, "grad_norm": 0.24629315733909607, "learning_rate": 9.185018279889181e-05, "loss": 0.0146, "step": 22510 }, { "epoch": 33.76311844077961, "grad_norm": 0.34393247961997986, "learning_rate": 9.184113276860082e-05, "loss": 0.0157, "step": 22520 }, { "epoch": 33.77811094452774, "grad_norm": 0.41636115312576294, "learning_rate": 9.183207816262645e-05, "loss": 0.0171, "step": 22530 }, { "epoch": 33.793103448275865, "grad_norm": 0.18739153444766998, "learning_rate": 9.182301898195891e-05, "loss": 0.0126, "step": 22540 }, { "epoch": 33.80809595202399, "grad_norm": 0.3889821469783783, "learning_rate": 9.181395522758889e-05, "loss": 0.0142, "step": 22550 }, { "epoch": 33.823088455772115, "grad_norm": 0.3210555911064148, "learning_rate": 9.180488690050759e-05, "loss": 0.0131, "step": 22560 }, { "epoch": 33.83808095952024, "grad_norm": 0.17395684123039246, "learning_rate": 9.179581400170671e-05, "loss": 0.0111, "step": 22570 }, { "epoch": 33.853073463268366, "grad_norm": 0.3876434862613678, "learning_rate": 9.178673653217845e-05, "loss": 0.0148, "step": 22580 }, { "epoch": 33.86806596701649, "grad_norm": 0.32890281081199646, "learning_rate": 9.177765449291551e-05, "loss": 0.0138, "step": 22590 }, { "epoch": 33.88305847076462, "grad_norm": 0.27116549015045166, "learning_rate": 9.176856788491109e-05, "loss": 0.0121, "step": 22600 }, { "epoch": 33.89805097451274, "grad_norm": 0.21647906303405762, "learning_rate": 9.175947670915887e-05, "loss": 0.013, "step": 22610 }, { "epoch": 33.91304347826087, "grad_norm": 0.2637392580509186, "learning_rate": 9.175038096665309e-05, "loss": 0.0148, "step": 22620 }, { "epoch": 33.92803598200899, "grad_norm": 0.22881734371185303, "learning_rate": 9.17412806583884e-05, "loss": 0.0127, "step": 22630 }, { "epoch": 33.94302848575712, "grad_norm": 0.23943664133548737, "learning_rate": 9.173217578536002e-05, "loss": 0.0132, "step": 22640 }, { "epoch": 33.95802098950525, "grad_norm": 0.3080226480960846, "learning_rate": 9.172306634856362e-05, "loss": 0.0142, "step": 22650 }, { "epoch": 33.973013493253376, "grad_norm": 0.1594795435667038, "learning_rate": 9.171395234899545e-05, "loss": 0.0146, "step": 22660 }, { "epoch": 33.9880059970015, "grad_norm": 0.2838643193244934, "learning_rate": 9.170483378765214e-05, "loss": 0.0145, "step": 22670 }, { "epoch": 34.00299850074963, "grad_norm": 0.16348323225975037, "learning_rate": 9.169571066553091e-05, "loss": 0.0117, "step": 22680 }, { "epoch": 34.01799100449775, "grad_norm": 0.20411929488182068, "learning_rate": 9.168658298362946e-05, "loss": 0.0182, "step": 22690 }, { "epoch": 34.03298350824588, "grad_norm": 0.24929560720920563, "learning_rate": 9.167745074294598e-05, "loss": 0.0144, "step": 22700 }, { "epoch": 34.047976011994, "grad_norm": 0.2522962689399719, "learning_rate": 9.166831394447913e-05, "loss": 0.0135, "step": 22710 }, { "epoch": 34.06296851574213, "grad_norm": 0.1816224902868271, "learning_rate": 9.165917258922812e-05, "loss": 0.0125, "step": 22720 }, { "epoch": 34.07796101949025, "grad_norm": 0.16227637231349945, "learning_rate": 9.165002667819262e-05, "loss": 0.0157, "step": 22730 }, { "epoch": 34.09295352323838, "grad_norm": 0.27460724115371704, "learning_rate": 9.164087621237282e-05, "loss": 0.0155, "step": 22740 }, { "epoch": 34.107946026986504, "grad_norm": 0.2785874307155609, "learning_rate": 9.163172119276942e-05, "loss": 0.0137, "step": 22750 }, { "epoch": 34.12293853073463, "grad_norm": 0.17491593956947327, "learning_rate": 9.162256162038358e-05, "loss": 0.0139, "step": 22760 }, { "epoch": 34.13793103448276, "grad_norm": 0.12556712329387665, "learning_rate": 9.161339749621698e-05, "loss": 0.0126, "step": 22770 }, { "epoch": 34.15292353823089, "grad_norm": 0.19028663635253906, "learning_rate": 9.160422882127177e-05, "loss": 0.0152, "step": 22780 }, { "epoch": 34.16791604197901, "grad_norm": 0.3906174302101135, "learning_rate": 9.159505559655069e-05, "loss": 0.0189, "step": 22790 }, { "epoch": 34.18290854572714, "grad_norm": 0.28675463795661926, "learning_rate": 9.158587782305684e-05, "loss": 0.0115, "step": 22800 }, { "epoch": 34.19790104947526, "grad_norm": 0.19635002315044403, "learning_rate": 9.157669550179391e-05, "loss": 0.0137, "step": 22810 }, { "epoch": 34.21289355322339, "grad_norm": 0.2687790095806122, "learning_rate": 9.156750863376609e-05, "loss": 0.0133, "step": 22820 }, { "epoch": 34.22788605697151, "grad_norm": 0.21515877544879913, "learning_rate": 9.155831721997801e-05, "loss": 0.0118, "step": 22830 }, { "epoch": 34.24287856071964, "grad_norm": 0.22461390495300293, "learning_rate": 9.154912126143484e-05, "loss": 0.0135, "step": 22840 }, { "epoch": 34.257871064467764, "grad_norm": 0.2347741425037384, "learning_rate": 9.153992075914224e-05, "loss": 0.0137, "step": 22850 }, { "epoch": 34.27286356821589, "grad_norm": 0.38432490825653076, "learning_rate": 9.153071571410635e-05, "loss": 0.0149, "step": 22860 }, { "epoch": 34.287856071964015, "grad_norm": 0.2008311152458191, "learning_rate": 9.152150612733384e-05, "loss": 0.0141, "step": 22870 }, { "epoch": 34.30284857571215, "grad_norm": 0.2001478523015976, "learning_rate": 9.151229199983184e-05, "loss": 0.0131, "step": 22880 }, { "epoch": 34.31784107946027, "grad_norm": 0.25902700424194336, "learning_rate": 9.150307333260802e-05, "loss": 0.0143, "step": 22890 }, { "epoch": 34.3328335832084, "grad_norm": 0.2797527611255646, "learning_rate": 9.149385012667048e-05, "loss": 0.0139, "step": 22900 }, { "epoch": 34.34782608695652, "grad_norm": 0.2926182746887207, "learning_rate": 9.148462238302788e-05, "loss": 0.0144, "step": 22910 }, { "epoch": 34.36281859070465, "grad_norm": 0.23028913140296936, "learning_rate": 9.147539010268936e-05, "loss": 0.0165, "step": 22920 }, { "epoch": 34.377811094452774, "grad_norm": 0.2799278497695923, "learning_rate": 9.14661532866645e-05, "loss": 0.0122, "step": 22930 }, { "epoch": 34.3928035982009, "grad_norm": 0.22470368444919586, "learning_rate": 9.145691193596348e-05, "loss": 0.0129, "step": 22940 }, { "epoch": 34.407796101949025, "grad_norm": 0.33247002959251404, "learning_rate": 9.144766605159691e-05, "loss": 0.0134, "step": 22950 }, { "epoch": 34.42278860569715, "grad_norm": 0.4708809554576874, "learning_rate": 9.14384156345759e-05, "loss": 0.0134, "step": 22960 }, { "epoch": 34.437781109445275, "grad_norm": 0.15432365238666534, "learning_rate": 9.142916068591204e-05, "loss": 0.0118, "step": 22970 }, { "epoch": 34.4527736131934, "grad_norm": 0.22369587421417236, "learning_rate": 9.141990120661746e-05, "loss": 0.0131, "step": 22980 }, { "epoch": 34.467766116941526, "grad_norm": 0.2621956169605255, "learning_rate": 9.141063719770475e-05, "loss": 0.0121, "step": 22990 }, { "epoch": 34.48275862068966, "grad_norm": 0.33331382274627686, "learning_rate": 9.140136866018704e-05, "loss": 0.0141, "step": 23000 }, { "epoch": 34.497751124437784, "grad_norm": 0.3060976564884186, "learning_rate": 9.139209559507788e-05, "loss": 0.0127, "step": 23010 }, { "epoch": 34.51274362818591, "grad_norm": 0.31281405687332153, "learning_rate": 9.13828180033914e-05, "loss": 0.0149, "step": 23020 }, { "epoch": 34.527736131934034, "grad_norm": 0.1873794049024582, "learning_rate": 9.137353588614212e-05, "loss": 0.0135, "step": 23030 }, { "epoch": 34.54272863568216, "grad_norm": 0.2146487534046173, "learning_rate": 9.136424924434519e-05, "loss": 0.0159, "step": 23040 }, { "epoch": 34.557721139430285, "grad_norm": 0.29048627614974976, "learning_rate": 9.135495807901615e-05, "loss": 0.0155, "step": 23050 }, { "epoch": 34.57271364317841, "grad_norm": 0.2783236503601074, "learning_rate": 9.134566239117108e-05, "loss": 0.0161, "step": 23060 }, { "epoch": 34.587706146926536, "grad_norm": 0.2388877272605896, "learning_rate": 9.13363621818265e-05, "loss": 0.0162, "step": 23070 }, { "epoch": 34.60269865067466, "grad_norm": 0.2871883809566498, "learning_rate": 9.132705745199953e-05, "loss": 0.0174, "step": 23080 }, { "epoch": 34.617691154422786, "grad_norm": 0.3249244689941406, "learning_rate": 9.131774820270768e-05, "loss": 0.0129, "step": 23090 }, { "epoch": 34.63268365817091, "grad_norm": 0.27217021584510803, "learning_rate": 9.130843443496901e-05, "loss": 0.0147, "step": 23100 }, { "epoch": 34.64767616191904, "grad_norm": 0.33604705333709717, "learning_rate": 9.129911614980206e-05, "loss": 0.0159, "step": 23110 }, { "epoch": 34.66266866566717, "grad_norm": 0.27714625000953674, "learning_rate": 9.128979334822584e-05, "loss": 0.0158, "step": 23120 }, { "epoch": 34.677661169415295, "grad_norm": 0.24401092529296875, "learning_rate": 9.128046603125992e-05, "loss": 0.0167, "step": 23130 }, { "epoch": 34.69265367316342, "grad_norm": 0.2894132733345032, "learning_rate": 9.12711341999243e-05, "loss": 0.0144, "step": 23140 }, { "epoch": 34.707646176911545, "grad_norm": 0.17271684110164642, "learning_rate": 9.12617978552395e-05, "loss": 0.0163, "step": 23150 }, { "epoch": 34.72263868065967, "grad_norm": 0.4071822762489319, "learning_rate": 9.12524569982265e-05, "loss": 0.016, "step": 23160 }, { "epoch": 34.737631184407796, "grad_norm": 0.2689719796180725, "learning_rate": 9.124311162990684e-05, "loss": 0.0142, "step": 23170 }, { "epoch": 34.75262368815592, "grad_norm": 0.26617321372032166, "learning_rate": 9.12337617513025e-05, "loss": 0.0124, "step": 23180 }, { "epoch": 34.76761619190405, "grad_norm": 0.23251719772815704, "learning_rate": 9.122440736343596e-05, "loss": 0.0188, "step": 23190 }, { "epoch": 34.78260869565217, "grad_norm": 0.22327986359596252, "learning_rate": 9.12150484673302e-05, "loss": 0.0119, "step": 23200 }, { "epoch": 34.7976011994003, "grad_norm": 0.24503985047340393, "learning_rate": 9.120568506400873e-05, "loss": 0.015, "step": 23210 }, { "epoch": 34.81259370314842, "grad_norm": 0.2180107682943344, "learning_rate": 9.119631715449548e-05, "loss": 0.018, "step": 23220 }, { "epoch": 34.827586206896555, "grad_norm": 0.1977195143699646, "learning_rate": 9.118694473981493e-05, "loss": 0.0185, "step": 23230 }, { "epoch": 34.84257871064468, "grad_norm": 0.3255189061164856, "learning_rate": 9.117756782099203e-05, "loss": 0.0165, "step": 23240 }, { "epoch": 34.857571214392806, "grad_norm": 0.22454829514026642, "learning_rate": 9.11681863990522e-05, "loss": 0.0154, "step": 23250 }, { "epoch": 34.87256371814093, "grad_norm": 0.3825650215148926, "learning_rate": 9.115880047502142e-05, "loss": 0.0147, "step": 23260 }, { "epoch": 34.88755622188906, "grad_norm": 0.24720455706119537, "learning_rate": 9.114941004992609e-05, "loss": 0.0177, "step": 23270 }, { "epoch": 34.90254872563718, "grad_norm": 0.29082927107810974, "learning_rate": 9.114001512479317e-05, "loss": 0.0154, "step": 23280 }, { "epoch": 34.91754122938531, "grad_norm": 0.18865415453910828, "learning_rate": 9.113061570065003e-05, "loss": 0.0158, "step": 23290 }, { "epoch": 34.93253373313343, "grad_norm": 0.3353174030780792, "learning_rate": 9.112121177852459e-05, "loss": 0.0154, "step": 23300 }, { "epoch": 34.94752623688156, "grad_norm": 0.29195499420166016, "learning_rate": 9.111180335944527e-05, "loss": 0.0131, "step": 23310 }, { "epoch": 34.96251874062968, "grad_norm": 0.20840847492218018, "learning_rate": 9.110239044444093e-05, "loss": 0.0147, "step": 23320 }, { "epoch": 34.97751124437781, "grad_norm": 0.23486073315143585, "learning_rate": 9.109297303454099e-05, "loss": 0.0119, "step": 23330 }, { "epoch": 34.992503748125934, "grad_norm": 0.30323663353919983, "learning_rate": 9.108355113077526e-05, "loss": 0.0125, "step": 23340 }, { "epoch": 35.007496251874066, "grad_norm": 0.1790703386068344, "learning_rate": 9.107412473417419e-05, "loss": 0.0138, "step": 23350 }, { "epoch": 35.02248875562219, "grad_norm": 0.2269752323627472, "learning_rate": 9.106469384576858e-05, "loss": 0.0168, "step": 23360 }, { "epoch": 35.03748125937032, "grad_norm": 0.20295722782611847, "learning_rate": 9.105525846658978e-05, "loss": 0.0108, "step": 23370 }, { "epoch": 35.05247376311844, "grad_norm": 0.2401728630065918, "learning_rate": 9.104581859766965e-05, "loss": 0.0128, "step": 23380 }, { "epoch": 35.06746626686657, "grad_norm": 0.5042163729667664, "learning_rate": 9.10363742400405e-05, "loss": 0.0152, "step": 23390 }, { "epoch": 35.08245877061469, "grad_norm": 0.24820755422115326, "learning_rate": 9.102692539473518e-05, "loss": 0.0151, "step": 23400 }, { "epoch": 35.09745127436282, "grad_norm": 0.292920857667923, "learning_rate": 9.101747206278697e-05, "loss": 0.0127, "step": 23410 }, { "epoch": 35.11244377811094, "grad_norm": 0.19080547988414764, "learning_rate": 9.100801424522968e-05, "loss": 0.0129, "step": 23420 }, { "epoch": 35.12743628185907, "grad_norm": 0.20932000875473022, "learning_rate": 9.099855194309762e-05, "loss": 0.0151, "step": 23430 }, { "epoch": 35.142428785607194, "grad_norm": 0.21370962262153625, "learning_rate": 9.098908515742554e-05, "loss": 0.0133, "step": 23440 }, { "epoch": 35.15742128935532, "grad_norm": 0.2080465853214264, "learning_rate": 9.097961388924873e-05, "loss": 0.0122, "step": 23450 }, { "epoch": 35.172413793103445, "grad_norm": 0.19874395430088043, "learning_rate": 9.097013813960298e-05, "loss": 0.0151, "step": 23460 }, { "epoch": 35.18740629685158, "grad_norm": 0.20973706245422363, "learning_rate": 9.09606579095245e-05, "loss": 0.0136, "step": 23470 }, { "epoch": 35.2023988005997, "grad_norm": 0.4248502850532532, "learning_rate": 9.095117320005008e-05, "loss": 0.0163, "step": 23480 }, { "epoch": 35.21739130434783, "grad_norm": 0.26758071780204773, "learning_rate": 9.094168401221691e-05, "loss": 0.0145, "step": 23490 }, { "epoch": 35.23238380809595, "grad_norm": 0.2759811580181122, "learning_rate": 9.093219034706273e-05, "loss": 0.0157, "step": 23500 }, { "epoch": 35.24737631184408, "grad_norm": 0.2000948190689087, "learning_rate": 9.092269220562577e-05, "loss": 0.0134, "step": 23510 }, { "epoch": 35.262368815592204, "grad_norm": 0.28931036591529846, "learning_rate": 9.09131895889447e-05, "loss": 0.0133, "step": 23520 }, { "epoch": 35.27736131934033, "grad_norm": 0.2633744776248932, "learning_rate": 9.090368249805873e-05, "loss": 0.0154, "step": 23530 }, { "epoch": 35.292353823088455, "grad_norm": 0.20662817358970642, "learning_rate": 9.089417093400754e-05, "loss": 0.0123, "step": 23540 }, { "epoch": 35.30734632683658, "grad_norm": 0.16710135340690613, "learning_rate": 9.088465489783131e-05, "loss": 0.0141, "step": 23550 }, { "epoch": 35.322338830584705, "grad_norm": 0.5329375267028809, "learning_rate": 9.087513439057068e-05, "loss": 0.0126, "step": 23560 }, { "epoch": 35.33733133433283, "grad_norm": 0.2719194293022156, "learning_rate": 9.08656094132668e-05, "loss": 0.0147, "step": 23570 }, { "epoch": 35.35232383808096, "grad_norm": 0.22924134135246277, "learning_rate": 9.085607996696134e-05, "loss": 0.0141, "step": 23580 }, { "epoch": 35.36731634182909, "grad_norm": 0.2599416673183441, "learning_rate": 9.084654605269639e-05, "loss": 0.0126, "step": 23590 }, { "epoch": 35.382308845577214, "grad_norm": 0.3832169473171234, "learning_rate": 9.083700767151457e-05, "loss": 0.0166, "step": 23600 }, { "epoch": 35.39730134932534, "grad_norm": 0.21651233732700348, "learning_rate": 9.082746482445898e-05, "loss": 0.014, "step": 23610 }, { "epoch": 35.412293853073464, "grad_norm": 0.2457912564277649, "learning_rate": 9.081791751257325e-05, "loss": 0.016, "step": 23620 }, { "epoch": 35.42728635682159, "grad_norm": 0.2269251048564911, "learning_rate": 9.080836573690142e-05, "loss": 0.015, "step": 23630 }, { "epoch": 35.442278860569715, "grad_norm": 0.18501313030719757, "learning_rate": 9.079880949848805e-05, "loss": 0.0113, "step": 23640 }, { "epoch": 35.45727136431784, "grad_norm": 0.27640262246131897, "learning_rate": 9.078924879837822e-05, "loss": 0.0161, "step": 23650 }, { "epoch": 35.472263868065966, "grad_norm": 0.28311243653297424, "learning_rate": 9.077968363761747e-05, "loss": 0.0167, "step": 23660 }, { "epoch": 35.48725637181409, "grad_norm": 0.2773045003414154, "learning_rate": 9.077011401725182e-05, "loss": 0.0147, "step": 23670 }, { "epoch": 35.502248875562216, "grad_norm": 0.2571597695350647, "learning_rate": 9.07605399383278e-05, "loss": 0.0135, "step": 23680 }, { "epoch": 35.51724137931034, "grad_norm": 0.3859371244907379, "learning_rate": 9.075096140189243e-05, "loss": 0.0158, "step": 23690 }, { "epoch": 35.532233883058474, "grad_norm": 0.3734884262084961, "learning_rate": 9.074137840899318e-05, "loss": 0.0139, "step": 23700 }, { "epoch": 35.5472263868066, "grad_norm": 0.28267577290534973, "learning_rate": 9.073179096067804e-05, "loss": 0.0143, "step": 23710 }, { "epoch": 35.562218890554725, "grad_norm": 0.28667280077934265, "learning_rate": 9.072219905799549e-05, "loss": 0.0151, "step": 23720 }, { "epoch": 35.57721139430285, "grad_norm": 0.3707549273967743, "learning_rate": 9.071260270199447e-05, "loss": 0.0198, "step": 23730 }, { "epoch": 35.592203898050975, "grad_norm": 0.16367319226264954, "learning_rate": 9.070300189372441e-05, "loss": 0.0116, "step": 23740 }, { "epoch": 35.6071964017991, "grad_norm": 0.4911201596260071, "learning_rate": 9.069339663423528e-05, "loss": 0.0122, "step": 23750 }, { "epoch": 35.622188905547226, "grad_norm": 0.2599828839302063, "learning_rate": 9.068378692457747e-05, "loss": 0.0164, "step": 23760 }, { "epoch": 35.63718140929535, "grad_norm": 0.36340007185935974, "learning_rate": 9.067417276580189e-05, "loss": 0.017, "step": 23770 }, { "epoch": 35.65217391304348, "grad_norm": 0.2985913157463074, "learning_rate": 9.066455415895993e-05, "loss": 0.013, "step": 23780 }, { "epoch": 35.6671664167916, "grad_norm": 0.2966776490211487, "learning_rate": 9.065493110510346e-05, "loss": 0.0162, "step": 23790 }, { "epoch": 35.68215892053973, "grad_norm": 0.28087371587753296, "learning_rate": 9.064530360528484e-05, "loss": 0.0146, "step": 23800 }, { "epoch": 35.69715142428785, "grad_norm": 0.2599206864833832, "learning_rate": 9.063567166055695e-05, "loss": 0.0121, "step": 23810 }, { "epoch": 35.712143928035985, "grad_norm": 0.2190220057964325, "learning_rate": 9.062603527197308e-05, "loss": 0.0125, "step": 23820 }, { "epoch": 35.72713643178411, "grad_norm": 0.26992562413215637, "learning_rate": 9.06163944405871e-05, "loss": 0.0132, "step": 23830 }, { "epoch": 35.742128935532236, "grad_norm": 0.33794113993644714, "learning_rate": 9.060674916745327e-05, "loss": 0.0149, "step": 23840 }, { "epoch": 35.75712143928036, "grad_norm": 0.35257604718208313, "learning_rate": 9.05970994536264e-05, "loss": 0.0191, "step": 23850 }, { "epoch": 35.77211394302849, "grad_norm": 0.24351367354393005, "learning_rate": 9.05874453001618e-05, "loss": 0.0143, "step": 23860 }, { "epoch": 35.78710644677661, "grad_norm": 0.21362578868865967, "learning_rate": 9.057778670811517e-05, "loss": 0.0144, "step": 23870 }, { "epoch": 35.80209895052474, "grad_norm": 0.3051263988018036, "learning_rate": 9.056812367854281e-05, "loss": 0.016, "step": 23880 }, { "epoch": 35.81709145427286, "grad_norm": 0.1789083182811737, "learning_rate": 9.055845621250143e-05, "loss": 0.0174, "step": 23890 }, { "epoch": 35.83208395802099, "grad_norm": 0.2236717939376831, "learning_rate": 9.054878431104825e-05, "loss": 0.0137, "step": 23900 }, { "epoch": 35.84707646176911, "grad_norm": 0.27406179904937744, "learning_rate": 9.0539107975241e-05, "loss": 0.0145, "step": 23910 }, { "epoch": 35.86206896551724, "grad_norm": 0.20404039323329926, "learning_rate": 9.052942720613784e-05, "loss": 0.0146, "step": 23920 }, { "epoch": 35.87706146926537, "grad_norm": 0.21307101845741272, "learning_rate": 9.051974200479745e-05, "loss": 0.0148, "step": 23930 }, { "epoch": 35.892053973013496, "grad_norm": 0.2538624405860901, "learning_rate": 9.051005237227901e-05, "loss": 0.0142, "step": 23940 }, { "epoch": 35.90704647676162, "grad_norm": 0.3614160120487213, "learning_rate": 9.050035830964215e-05, "loss": 0.014, "step": 23950 }, { "epoch": 35.92203898050975, "grad_norm": 0.21284830570220947, "learning_rate": 9.049065981794698e-05, "loss": 0.0141, "step": 23960 }, { "epoch": 35.93703148425787, "grad_norm": 0.17853540182113647, "learning_rate": 9.048095689825414e-05, "loss": 0.0144, "step": 23970 }, { "epoch": 35.952023988006, "grad_norm": 0.269807368516922, "learning_rate": 9.047124955162472e-05, "loss": 0.0142, "step": 23980 }, { "epoch": 35.96701649175412, "grad_norm": 0.24812667071819305, "learning_rate": 9.046153777912028e-05, "loss": 0.0141, "step": 23990 }, { "epoch": 35.98200899550225, "grad_norm": 0.17971083521842957, "learning_rate": 9.045182158180292e-05, "loss": 0.0174, "step": 24000 }, { "epoch": 35.99700149925037, "grad_norm": 0.2063715010881424, "learning_rate": 9.044210096073516e-05, "loss": 0.0155, "step": 24010 }, { "epoch": 36.0119940029985, "grad_norm": 0.22812406718730927, "learning_rate": 9.043237591698004e-05, "loss": 0.013, "step": 24020 }, { "epoch": 36.026986506746624, "grad_norm": 0.21251071989536285, "learning_rate": 9.04226464516011e-05, "loss": 0.019, "step": 24030 }, { "epoch": 36.04197901049475, "grad_norm": 0.21716853976249695, "learning_rate": 9.041291256566229e-05, "loss": 0.0107, "step": 24040 }, { "epoch": 36.05697151424288, "grad_norm": 0.1844199001789093, "learning_rate": 9.040317426022814e-05, "loss": 0.0127, "step": 24050 }, { "epoch": 36.07196401799101, "grad_norm": 0.2559763491153717, "learning_rate": 9.03934315363636e-05, "loss": 0.0118, "step": 24060 }, { "epoch": 36.08695652173913, "grad_norm": 0.19790565967559814, "learning_rate": 9.038368439513409e-05, "loss": 0.0105, "step": 24070 }, { "epoch": 36.10194902548726, "grad_norm": 0.2329334169626236, "learning_rate": 9.03739328376056e-05, "loss": 0.0158, "step": 24080 }, { "epoch": 36.11694152923538, "grad_norm": 0.2831892967224121, "learning_rate": 9.036417686484451e-05, "loss": 0.0157, "step": 24090 }, { "epoch": 36.13193403298351, "grad_norm": 0.23545148968696594, "learning_rate": 9.035441647791773e-05, "loss": 0.0138, "step": 24100 }, { "epoch": 36.146926536731634, "grad_norm": 0.35000044107437134, "learning_rate": 9.034465167789263e-05, "loss": 0.0132, "step": 24110 }, { "epoch": 36.16191904047976, "grad_norm": 0.2056993544101715, "learning_rate": 9.033488246583706e-05, "loss": 0.0157, "step": 24120 }, { "epoch": 36.176911544227885, "grad_norm": 0.24497276544570923, "learning_rate": 9.032510884281941e-05, "loss": 0.0117, "step": 24130 }, { "epoch": 36.19190404797601, "grad_norm": 0.28238344192504883, "learning_rate": 9.031533080990848e-05, "loss": 0.0108, "step": 24140 }, { "epoch": 36.206896551724135, "grad_norm": 0.3618822395801544, "learning_rate": 9.030554836817358e-05, "loss": 0.0104, "step": 24150 }, { "epoch": 36.22188905547226, "grad_norm": 0.49520888924598694, "learning_rate": 9.029576151868451e-05, "loss": 0.0143, "step": 24160 }, { "epoch": 36.23688155922039, "grad_norm": 0.20098376274108887, "learning_rate": 9.028597026251155e-05, "loss": 0.0115, "step": 24170 }, { "epoch": 36.25187406296852, "grad_norm": 0.14215855300426483, "learning_rate": 9.027617460072547e-05, "loss": 0.0128, "step": 24180 }, { "epoch": 36.266866566716644, "grad_norm": 0.35957634449005127, "learning_rate": 9.026637453439745e-05, "loss": 0.0131, "step": 24190 }, { "epoch": 36.28185907046477, "grad_norm": 0.2867872714996338, "learning_rate": 9.025657006459927e-05, "loss": 0.0143, "step": 24200 }, { "epoch": 36.296851574212894, "grad_norm": 0.31513792276382446, "learning_rate": 9.024676119240311e-05, "loss": 0.016, "step": 24210 }, { "epoch": 36.31184407796102, "grad_norm": 0.29322710633277893, "learning_rate": 9.023694791888166e-05, "loss": 0.0148, "step": 24220 }, { "epoch": 36.326836581709145, "grad_norm": 0.2704208493232727, "learning_rate": 9.022713024510808e-05, "loss": 0.0135, "step": 24230 }, { "epoch": 36.34182908545727, "grad_norm": 0.32917165756225586, "learning_rate": 9.021730817215601e-05, "loss": 0.0146, "step": 24240 }, { "epoch": 36.356821589205396, "grad_norm": 0.26379770040512085, "learning_rate": 9.02074817010996e-05, "loss": 0.0132, "step": 24250 }, { "epoch": 36.37181409295352, "grad_norm": 0.22453255951404572, "learning_rate": 9.019765083301342e-05, "loss": 0.0111, "step": 24260 }, { "epoch": 36.386806596701646, "grad_norm": 0.3431008756160736, "learning_rate": 9.01878155689726e-05, "loss": 0.0146, "step": 24270 }, { "epoch": 36.40179910044977, "grad_norm": 0.1501302272081375, "learning_rate": 9.017797591005268e-05, "loss": 0.0145, "step": 24280 }, { "epoch": 36.416791604197904, "grad_norm": 0.18315503001213074, "learning_rate": 9.016813185732972e-05, "loss": 0.014, "step": 24290 }, { "epoch": 36.43178410794603, "grad_norm": 0.2874588370323181, "learning_rate": 9.015828341188027e-05, "loss": 0.0103, "step": 24300 }, { "epoch": 36.446776611694155, "grad_norm": 0.37300965189933777, "learning_rate": 9.01484305747813e-05, "loss": 0.0115, "step": 24310 }, { "epoch": 36.46176911544228, "grad_norm": 0.2763119041919708, "learning_rate": 9.013857334711033e-05, "loss": 0.0138, "step": 24320 }, { "epoch": 36.476761619190405, "grad_norm": 0.3672398328781128, "learning_rate": 9.012871172994534e-05, "loss": 0.0146, "step": 24330 }, { "epoch": 36.49175412293853, "grad_norm": 0.18707489967346191, "learning_rate": 9.011884572436476e-05, "loss": 0.0131, "step": 24340 }, { "epoch": 36.506746626686656, "grad_norm": 0.18673555552959442, "learning_rate": 9.010897533144754e-05, "loss": 0.0136, "step": 24350 }, { "epoch": 36.52173913043478, "grad_norm": 0.2382739633321762, "learning_rate": 9.009910055227306e-05, "loss": 0.0155, "step": 24360 }, { "epoch": 36.53673163418291, "grad_norm": 0.2524891495704651, "learning_rate": 9.008922138792124e-05, "loss": 0.0146, "step": 24370 }, { "epoch": 36.55172413793103, "grad_norm": 0.20503801107406616, "learning_rate": 9.007933783947244e-05, "loss": 0.0126, "step": 24380 }, { "epoch": 36.56671664167916, "grad_norm": 0.24702854454517365, "learning_rate": 9.006944990800752e-05, "loss": 0.0178, "step": 24390 }, { "epoch": 36.58170914542729, "grad_norm": 0.30246254801750183, "learning_rate": 9.005955759460779e-05, "loss": 0.0138, "step": 24400 }, { "epoch": 36.596701649175415, "grad_norm": 0.33424267172813416, "learning_rate": 9.004966090035508e-05, "loss": 0.0124, "step": 24410 }, { "epoch": 36.61169415292354, "grad_norm": 0.25915059447288513, "learning_rate": 9.003975982633166e-05, "loss": 0.0143, "step": 24420 }, { "epoch": 36.626686656671666, "grad_norm": 0.279197633266449, "learning_rate": 9.00298543736203e-05, "loss": 0.0182, "step": 24430 }, { "epoch": 36.64167916041979, "grad_norm": 0.2310645431280136, "learning_rate": 9.001994454330427e-05, "loss": 0.0112, "step": 24440 }, { "epoch": 36.656671664167916, "grad_norm": 0.261368989944458, "learning_rate": 9.001003033646727e-05, "loss": 0.0118, "step": 24450 }, { "epoch": 36.67166416791604, "grad_norm": 0.2579815089702606, "learning_rate": 9.00001117541935e-05, "loss": 0.0158, "step": 24460 }, { "epoch": 36.68665667166417, "grad_norm": 0.20941707491874695, "learning_rate": 8.999018879756764e-05, "loss": 0.0141, "step": 24470 }, { "epoch": 36.70164917541229, "grad_norm": 0.2644839882850647, "learning_rate": 8.998026146767487e-05, "loss": 0.0151, "step": 24480 }, { "epoch": 36.71664167916042, "grad_norm": 0.16693805158138275, "learning_rate": 8.99703297656008e-05, "loss": 0.0151, "step": 24490 }, { "epoch": 36.73163418290854, "grad_norm": 0.20996138453483582, "learning_rate": 8.996039369243156e-05, "loss": 0.0107, "step": 24500 }, { "epoch": 36.74662668665667, "grad_norm": 0.30120331048965454, "learning_rate": 8.995045324925378e-05, "loss": 0.013, "step": 24510 }, { "epoch": 36.7616191904048, "grad_norm": 0.1515861451625824, "learning_rate": 8.994050843715448e-05, "loss": 0.0138, "step": 24520 }, { "epoch": 36.776611694152926, "grad_norm": 0.19662034511566162, "learning_rate": 8.993055925722121e-05, "loss": 0.0137, "step": 24530 }, { "epoch": 36.79160419790105, "grad_norm": 0.2692579925060272, "learning_rate": 8.992060571054202e-05, "loss": 0.0143, "step": 24540 }, { "epoch": 36.80659670164918, "grad_norm": 0.2114100307226181, "learning_rate": 8.991064779820542e-05, "loss": 0.0148, "step": 24550 }, { "epoch": 36.8215892053973, "grad_norm": 0.2612091302871704, "learning_rate": 8.990068552130036e-05, "loss": 0.0128, "step": 24560 }, { "epoch": 36.83658170914543, "grad_norm": 0.238303542137146, "learning_rate": 8.989071888091634e-05, "loss": 0.0123, "step": 24570 }, { "epoch": 36.85157421289355, "grad_norm": 0.19621655344963074, "learning_rate": 8.988074787814329e-05, "loss": 0.0122, "step": 24580 }, { "epoch": 36.86656671664168, "grad_norm": 0.22290098667144775, "learning_rate": 8.987077251407158e-05, "loss": 0.0136, "step": 24590 }, { "epoch": 36.8815592203898, "grad_norm": 0.2409212440252304, "learning_rate": 8.986079278979216e-05, "loss": 0.0164, "step": 24600 }, { "epoch": 36.89655172413793, "grad_norm": 0.2851274609565735, "learning_rate": 8.985080870639635e-05, "loss": 0.0163, "step": 24610 }, { "epoch": 36.911544227886054, "grad_norm": 0.193709135055542, "learning_rate": 8.984082026497603e-05, "loss": 0.013, "step": 24620 }, { "epoch": 36.92653673163418, "grad_norm": 0.22227071225643158, "learning_rate": 8.98308274666235e-05, "loss": 0.0127, "step": 24630 }, { "epoch": 36.94152923538231, "grad_norm": 0.34216010570526123, "learning_rate": 8.982083031243155e-05, "loss": 0.0129, "step": 24640 }, { "epoch": 36.95652173913044, "grad_norm": 0.3060927987098694, "learning_rate": 8.98108288034935e-05, "loss": 0.0128, "step": 24650 }, { "epoch": 36.97151424287856, "grad_norm": 0.24318189918994904, "learning_rate": 8.980082294090305e-05, "loss": 0.0103, "step": 24660 }, { "epoch": 36.98650674662669, "grad_norm": 0.2930755019187927, "learning_rate": 8.979081272575443e-05, "loss": 0.0132, "step": 24670 }, { "epoch": 37.00149925037481, "grad_norm": 0.14122477173805237, "learning_rate": 8.978079815914236e-05, "loss": 0.0148, "step": 24680 }, { "epoch": 37.01649175412294, "grad_norm": 0.2512231469154358, "learning_rate": 8.977077924216202e-05, "loss": 0.0149, "step": 24690 }, { "epoch": 37.031484257871064, "grad_norm": 0.24777083098888397, "learning_rate": 8.976075597590905e-05, "loss": 0.0152, "step": 24700 }, { "epoch": 37.04647676161919, "grad_norm": 0.21278032660484314, "learning_rate": 8.975072836147958e-05, "loss": 0.0093, "step": 24710 }, { "epoch": 37.061469265367315, "grad_norm": 0.2436855286359787, "learning_rate": 8.974069639997025e-05, "loss": 0.0148, "step": 24720 }, { "epoch": 37.07646176911544, "grad_norm": 0.2943479120731354, "learning_rate": 8.973066009247808e-05, "loss": 0.0128, "step": 24730 }, { "epoch": 37.091454272863565, "grad_norm": 0.2640250027179718, "learning_rate": 8.972061944010066e-05, "loss": 0.0109, "step": 24740 }, { "epoch": 37.1064467766117, "grad_norm": 0.21943572163581848, "learning_rate": 8.971057444393603e-05, "loss": 0.0123, "step": 24750 }, { "epoch": 37.12143928035982, "grad_norm": 0.16832607984542847, "learning_rate": 8.970052510508268e-05, "loss": 0.0129, "step": 24760 }, { "epoch": 37.13643178410795, "grad_norm": 0.30734509229660034, "learning_rate": 8.969047142463959e-05, "loss": 0.0139, "step": 24770 }, { "epoch": 37.151424287856074, "grad_norm": 0.20029135048389435, "learning_rate": 8.968041340370621e-05, "loss": 0.0126, "step": 24780 }, { "epoch": 37.1664167916042, "grad_norm": 0.23073871433734894, "learning_rate": 8.96703510433825e-05, "loss": 0.0144, "step": 24790 }, { "epoch": 37.181409295352324, "grad_norm": 0.23742903769016266, "learning_rate": 8.966028434476883e-05, "loss": 0.0141, "step": 24800 }, { "epoch": 37.19640179910045, "grad_norm": 0.24378979206085205, "learning_rate": 8.96502133089661e-05, "loss": 0.0141, "step": 24810 }, { "epoch": 37.211394302848575, "grad_norm": 0.4329279065132141, "learning_rate": 8.964013793707564e-05, "loss": 0.0147, "step": 24820 }, { "epoch": 37.2263868065967, "grad_norm": 0.2073388695716858, "learning_rate": 8.963005823019932e-05, "loss": 0.0127, "step": 24830 }, { "epoch": 37.241379310344826, "grad_norm": 0.26367995142936707, "learning_rate": 8.961997418943939e-05, "loss": 0.0128, "step": 24840 }, { "epoch": 37.25637181409295, "grad_norm": 0.1790989637374878, "learning_rate": 8.960988581589865e-05, "loss": 0.0144, "step": 24850 }, { "epoch": 37.271364317841076, "grad_norm": 0.25765201449394226, "learning_rate": 8.959979311068037e-05, "loss": 0.0146, "step": 24860 }, { "epoch": 37.28635682158921, "grad_norm": 0.296853631734848, "learning_rate": 8.958969607488823e-05, "loss": 0.0129, "step": 24870 }, { "epoch": 37.301349325337334, "grad_norm": 0.19799041748046875, "learning_rate": 8.957959470962647e-05, "loss": 0.0122, "step": 24880 }, { "epoch": 37.31634182908546, "grad_norm": 0.3202142119407654, "learning_rate": 8.956948901599971e-05, "loss": 0.0131, "step": 24890 }, { "epoch": 37.331334332833585, "grad_norm": 0.34943678975105286, "learning_rate": 8.955937899511315e-05, "loss": 0.0148, "step": 24900 }, { "epoch": 37.34632683658171, "grad_norm": 0.25620514154434204, "learning_rate": 8.954926464807238e-05, "loss": 0.0114, "step": 24910 }, { "epoch": 37.361319340329835, "grad_norm": 0.2467290610074997, "learning_rate": 8.953914597598347e-05, "loss": 0.013, "step": 24920 }, { "epoch": 37.37631184407796, "grad_norm": 0.17559023201465607, "learning_rate": 8.952902297995303e-05, "loss": 0.0108, "step": 24930 }, { "epoch": 37.391304347826086, "grad_norm": 0.26916590332984924, "learning_rate": 8.951889566108804e-05, "loss": 0.0133, "step": 24940 }, { "epoch": 37.40629685157421, "grad_norm": 0.1740696281194687, "learning_rate": 8.950876402049606e-05, "loss": 0.0109, "step": 24950 }, { "epoch": 37.42128935532234, "grad_norm": 0.30197077989578247, "learning_rate": 8.949862805928504e-05, "loss": 0.0168, "step": 24960 }, { "epoch": 37.43628185907046, "grad_norm": 0.183079794049263, "learning_rate": 8.948848777856343e-05, "loss": 0.0115, "step": 24970 }, { "epoch": 37.45127436281859, "grad_norm": 0.2645774781703949, "learning_rate": 8.947834317944017e-05, "loss": 0.0202, "step": 24980 }, { "epoch": 37.46626686656672, "grad_norm": 0.2971608638763428, "learning_rate": 8.946819426302466e-05, "loss": 0.0121, "step": 24990 }, { "epoch": 37.481259370314845, "grad_norm": 0.1332934945821762, "learning_rate": 8.945804103042676e-05, "loss": 0.0116, "step": 25000 }, { "epoch": 37.49625187406297, "grad_norm": 0.2649727165699005, "learning_rate": 8.944788348275681e-05, "loss": 0.0146, "step": 25010 }, { "epoch": 37.511244377811096, "grad_norm": 0.18679283559322357, "learning_rate": 8.943772162112565e-05, "loss": 0.0122, "step": 25020 }, { "epoch": 37.52623688155922, "grad_norm": 0.1209469735622406, "learning_rate": 8.942755544664454e-05, "loss": 0.0134, "step": 25030 }, { "epoch": 37.541229385307346, "grad_norm": 0.25084319710731506, "learning_rate": 8.941738496042525e-05, "loss": 0.0108, "step": 25040 }, { "epoch": 37.55622188905547, "grad_norm": 0.20870588719844818, "learning_rate": 8.940721016357999e-05, "loss": 0.0112, "step": 25050 }, { "epoch": 37.5712143928036, "grad_norm": 0.2206580489873886, "learning_rate": 8.939703105722148e-05, "loss": 0.014, "step": 25060 }, { "epoch": 37.58620689655172, "grad_norm": 0.24910373985767365, "learning_rate": 8.93868476424629e-05, "loss": 0.0146, "step": 25070 }, { "epoch": 37.60119940029985, "grad_norm": 0.2323293387889862, "learning_rate": 8.937665992041786e-05, "loss": 0.0139, "step": 25080 }, { "epoch": 37.61619190404797, "grad_norm": 0.33858397603034973, "learning_rate": 8.93664678922005e-05, "loss": 0.0125, "step": 25090 }, { "epoch": 37.6311844077961, "grad_norm": 0.14850085973739624, "learning_rate": 8.93562715589254e-05, "loss": 0.0128, "step": 25100 }, { "epoch": 37.64617691154423, "grad_norm": 0.25476720929145813, "learning_rate": 8.934607092170762e-05, "loss": 0.0144, "step": 25110 }, { "epoch": 37.661169415292356, "grad_norm": 0.27236977219581604, "learning_rate": 8.933586598166266e-05, "loss": 0.0187, "step": 25120 }, { "epoch": 37.67616191904048, "grad_norm": 0.20531877875328064, "learning_rate": 8.932565673990655e-05, "loss": 0.0151, "step": 25130 }, { "epoch": 37.69115442278861, "grad_norm": 0.2136187106370926, "learning_rate": 8.931544319755574e-05, "loss": 0.0144, "step": 25140 }, { "epoch": 37.70614692653673, "grad_norm": 0.27885550260543823, "learning_rate": 8.930522535572718e-05, "loss": 0.0152, "step": 25150 }, { "epoch": 37.72113943028486, "grad_norm": 0.17067205905914307, "learning_rate": 8.929500321553826e-05, "loss": 0.0118, "step": 25160 }, { "epoch": 37.73613193403298, "grad_norm": 0.2141813039779663, "learning_rate": 8.928477677810686e-05, "loss": 0.0158, "step": 25170 }, { "epoch": 37.75112443778111, "grad_norm": 0.23840314149856567, "learning_rate": 8.927454604455137e-05, "loss": 0.0118, "step": 25180 }, { "epoch": 37.76611694152923, "grad_norm": 0.20933622121810913, "learning_rate": 8.926431101599053e-05, "loss": 0.0124, "step": 25190 }, { "epoch": 37.78110944527736, "grad_norm": 0.3127477169036865, "learning_rate": 8.925407169354369e-05, "loss": 0.0128, "step": 25200 }, { "epoch": 37.796101949025484, "grad_norm": 0.18998217582702637, "learning_rate": 8.92438280783306e-05, "loss": 0.0121, "step": 25210 }, { "epoch": 37.81109445277362, "grad_norm": 0.17968730628490448, "learning_rate": 8.923358017147146e-05, "loss": 0.01, "step": 25220 }, { "epoch": 37.82608695652174, "grad_norm": 0.2385084629058838, "learning_rate": 8.922332797408697e-05, "loss": 0.0137, "step": 25230 }, { "epoch": 37.84107946026987, "grad_norm": 0.20746640861034393, "learning_rate": 8.921307148729831e-05, "loss": 0.0118, "step": 25240 }, { "epoch": 37.85607196401799, "grad_norm": 0.26098236441612244, "learning_rate": 8.920281071222712e-05, "loss": 0.0165, "step": 25250 }, { "epoch": 37.87106446776612, "grad_norm": 0.2349965125322342, "learning_rate": 8.919254564999548e-05, "loss": 0.0117, "step": 25260 }, { "epoch": 37.88605697151424, "grad_norm": 0.23823703825473785, "learning_rate": 8.918227630172598e-05, "loss": 0.0132, "step": 25270 }, { "epoch": 37.90104947526237, "grad_norm": 0.28185564279556274, "learning_rate": 8.917200266854165e-05, "loss": 0.0148, "step": 25280 }, { "epoch": 37.916041979010494, "grad_norm": 0.2038586139678955, "learning_rate": 8.9161724751566e-05, "loss": 0.0123, "step": 25290 }, { "epoch": 37.93103448275862, "grad_norm": 0.19944117963314056, "learning_rate": 8.915144255192302e-05, "loss": 0.0116, "step": 25300 }, { "epoch": 37.946026986506745, "grad_norm": 0.20625020563602448, "learning_rate": 8.914115607073714e-05, "loss": 0.0142, "step": 25310 }, { "epoch": 37.96101949025487, "grad_norm": 0.19435177743434906, "learning_rate": 8.913086530913327e-05, "loss": 0.0139, "step": 25320 }, { "epoch": 37.976011994002995, "grad_norm": 0.2030119001865387, "learning_rate": 8.912057026823681e-05, "loss": 0.0137, "step": 25330 }, { "epoch": 37.99100449775113, "grad_norm": 0.12596717476844788, "learning_rate": 8.91102709491736e-05, "loss": 0.0094, "step": 25340 }, { "epoch": 38.00599700149925, "grad_norm": 0.2365567535161972, "learning_rate": 8.909996735306996e-05, "loss": 0.0126, "step": 25350 }, { "epoch": 38.02098950524738, "grad_norm": 0.20404760539531708, "learning_rate": 8.908965948105268e-05, "loss": 0.0137, "step": 25360 }, { "epoch": 38.035982008995504, "grad_norm": 0.22511796653270721, "learning_rate": 8.907934733424901e-05, "loss": 0.013, "step": 25370 }, { "epoch": 38.05097451274363, "grad_norm": 0.307394802570343, "learning_rate": 8.906903091378666e-05, "loss": 0.0163, "step": 25380 }, { "epoch": 38.065967016491754, "grad_norm": 0.28235214948654175, "learning_rate": 8.905871022079384e-05, "loss": 0.0156, "step": 25390 }, { "epoch": 38.08095952023988, "grad_norm": 0.21178863942623138, "learning_rate": 8.90483852563992e-05, "loss": 0.0118, "step": 25400 }, { "epoch": 38.095952023988005, "grad_norm": 0.3841294050216675, "learning_rate": 8.903805602173185e-05, "loss": 0.0126, "step": 25410 }, { "epoch": 38.11094452773613, "grad_norm": 0.2969829738140106, "learning_rate": 8.902772251792137e-05, "loss": 0.0118, "step": 25420 }, { "epoch": 38.125937031484256, "grad_norm": 0.2362118810415268, "learning_rate": 8.901738474609786e-05, "loss": 0.0178, "step": 25430 }, { "epoch": 38.14092953523238, "grad_norm": 0.3870662450790405, "learning_rate": 8.900704270739179e-05, "loss": 0.0184, "step": 25440 }, { "epoch": 38.155922038980506, "grad_norm": 0.2331283539533615, "learning_rate": 8.89966964029342e-05, "loss": 0.0137, "step": 25450 }, { "epoch": 38.17091454272864, "grad_norm": 0.30632883310317993, "learning_rate": 8.898634583385652e-05, "loss": 0.0208, "step": 25460 }, { "epoch": 38.185907046476764, "grad_norm": 0.2212841659784317, "learning_rate": 8.897599100129065e-05, "loss": 0.0142, "step": 25470 }, { "epoch": 38.20089955022489, "grad_norm": 0.2334103286266327, "learning_rate": 8.896563190636903e-05, "loss": 0.011, "step": 25480 }, { "epoch": 38.215892053973015, "grad_norm": 0.321363627910614, "learning_rate": 8.895526855022448e-05, "loss": 0.0121, "step": 25490 }, { "epoch": 38.23088455772114, "grad_norm": 0.2930218577384949, "learning_rate": 8.894490093399033e-05, "loss": 0.015, "step": 25500 }, { "epoch": 38.245877061469265, "grad_norm": 0.2499193251132965, "learning_rate": 8.893452905880035e-05, "loss": 0.0159, "step": 25510 }, { "epoch": 38.26086956521739, "grad_norm": 0.2516862452030182, "learning_rate": 8.892415292578883e-05, "loss": 0.0147, "step": 25520 }, { "epoch": 38.275862068965516, "grad_norm": 0.3093055188655853, "learning_rate": 8.891377253609046e-05, "loss": 0.015, "step": 25530 }, { "epoch": 38.29085457271364, "grad_norm": 0.44138747453689575, "learning_rate": 8.890338789084043e-05, "loss": 0.014, "step": 25540 }, { "epoch": 38.30584707646177, "grad_norm": 0.2086765021085739, "learning_rate": 8.88929989911744e-05, "loss": 0.0129, "step": 25550 }, { "epoch": 38.32083958020989, "grad_norm": 0.3303928077220917, "learning_rate": 8.888260583822847e-05, "loss": 0.0134, "step": 25560 }, { "epoch": 38.335832083958024, "grad_norm": 0.22547532618045807, "learning_rate": 8.887220843313921e-05, "loss": 0.0181, "step": 25570 }, { "epoch": 38.35082458770615, "grad_norm": 0.21323208510875702, "learning_rate": 8.88618067770437e-05, "loss": 0.0159, "step": 25580 }, { "epoch": 38.365817091454275, "grad_norm": 0.2435837835073471, "learning_rate": 8.885140087107942e-05, "loss": 0.0142, "step": 25590 }, { "epoch": 38.3808095952024, "grad_norm": 0.2150631844997406, "learning_rate": 8.884099071638436e-05, "loss": 0.0136, "step": 25600 }, { "epoch": 38.395802098950526, "grad_norm": 0.2845923602581024, "learning_rate": 8.883057631409695e-05, "loss": 0.0151, "step": 25610 }, { "epoch": 38.41079460269865, "grad_norm": 0.19829913973808289, "learning_rate": 8.882015766535608e-05, "loss": 0.0123, "step": 25620 }, { "epoch": 38.425787106446776, "grad_norm": 0.2766244411468506, "learning_rate": 8.880973477130115e-05, "loss": 0.0133, "step": 25630 }, { "epoch": 38.4407796101949, "grad_norm": 0.17495839297771454, "learning_rate": 8.879930763307197e-05, "loss": 0.0129, "step": 25640 }, { "epoch": 38.45577211394303, "grad_norm": 0.2524137794971466, "learning_rate": 8.878887625180884e-05, "loss": 0.0134, "step": 25650 }, { "epoch": 38.47076461769115, "grad_norm": 0.4875110685825348, "learning_rate": 8.877844062865253e-05, "loss": 0.0129, "step": 25660 }, { "epoch": 38.48575712143928, "grad_norm": 0.25747546553611755, "learning_rate": 8.876800076474424e-05, "loss": 0.0114, "step": 25670 }, { "epoch": 38.5007496251874, "grad_norm": 0.3167896568775177, "learning_rate": 8.875755666122568e-05, "loss": 0.0124, "step": 25680 }, { "epoch": 38.515742128935536, "grad_norm": 0.32840290665626526, "learning_rate": 8.8747108319239e-05, "loss": 0.0138, "step": 25690 }, { "epoch": 38.53073463268366, "grad_norm": 0.5198205709457397, "learning_rate": 8.87366557399268e-05, "loss": 0.0161, "step": 25700 }, { "epoch": 38.545727136431786, "grad_norm": 0.1950361579656601, "learning_rate": 8.872619892443217e-05, "loss": 0.0144, "step": 25710 }, { "epoch": 38.56071964017991, "grad_norm": 0.20728112757205963, "learning_rate": 8.871573787389865e-05, "loss": 0.0116, "step": 25720 }, { "epoch": 38.57571214392804, "grad_norm": 0.32293206453323364, "learning_rate": 8.870527258947024e-05, "loss": 0.0181, "step": 25730 }, { "epoch": 38.59070464767616, "grad_norm": 0.23703519999980927, "learning_rate": 8.869480307229143e-05, "loss": 0.0167, "step": 25740 }, { "epoch": 38.60569715142429, "grad_norm": 0.2727242112159729, "learning_rate": 8.868432932350712e-05, "loss": 0.0104, "step": 25750 }, { "epoch": 38.62068965517241, "grad_norm": 0.20928005874156952, "learning_rate": 8.867385134426272e-05, "loss": 0.0111, "step": 25760 }, { "epoch": 38.63568215892054, "grad_norm": 0.3028228282928467, "learning_rate": 8.866336913570407e-05, "loss": 0.0126, "step": 25770 }, { "epoch": 38.65067466266866, "grad_norm": 0.21284577250480652, "learning_rate": 8.865288269897751e-05, "loss": 0.0107, "step": 25780 }, { "epoch": 38.66566716641679, "grad_norm": 0.3599012792110443, "learning_rate": 8.864239203522981e-05, "loss": 0.0176, "step": 25790 }, { "epoch": 38.680659670164914, "grad_norm": 0.2148355096578598, "learning_rate": 8.863189714560822e-05, "loss": 0.0106, "step": 25800 }, { "epoch": 38.69565217391305, "grad_norm": 0.27333176136016846, "learning_rate": 8.862139803126043e-05, "loss": 0.0132, "step": 25810 }, { "epoch": 38.71064467766117, "grad_norm": 0.37364426255226135, "learning_rate": 8.861089469333463e-05, "loss": 0.0096, "step": 25820 }, { "epoch": 38.7256371814093, "grad_norm": 0.2224944531917572, "learning_rate": 8.860038713297944e-05, "loss": 0.0125, "step": 25830 }, { "epoch": 38.74062968515742, "grad_norm": 0.3188937306404114, "learning_rate": 8.858987535134394e-05, "loss": 0.0164, "step": 25840 }, { "epoch": 38.75562218890555, "grad_norm": 0.18335846066474915, "learning_rate": 8.857935934957769e-05, "loss": 0.0159, "step": 25850 }, { "epoch": 38.77061469265367, "grad_norm": 0.26694852113723755, "learning_rate": 8.856883912883071e-05, "loss": 0.0156, "step": 25860 }, { "epoch": 38.7856071964018, "grad_norm": 0.2454465627670288, "learning_rate": 8.855831469025346e-05, "loss": 0.0122, "step": 25870 }, { "epoch": 38.800599700149924, "grad_norm": 0.17728182673454285, "learning_rate": 8.854778603499689e-05, "loss": 0.0153, "step": 25880 }, { "epoch": 38.81559220389805, "grad_norm": 0.28311625123023987, "learning_rate": 8.85372531642124e-05, "loss": 0.0171, "step": 25890 }, { "epoch": 38.830584707646175, "grad_norm": 0.3748205602169037, "learning_rate": 8.852671607905185e-05, "loss": 0.0126, "step": 25900 }, { "epoch": 38.8455772113943, "grad_norm": 0.19564373791217804, "learning_rate": 8.851617478066754e-05, "loss": 0.0149, "step": 25910 }, { "epoch": 38.86056971514243, "grad_norm": 0.20688903331756592, "learning_rate": 8.850562927021227e-05, "loss": 0.0152, "step": 25920 }, { "epoch": 38.87556221889056, "grad_norm": 0.25347191095352173, "learning_rate": 8.849507954883928e-05, "loss": 0.0132, "step": 25930 }, { "epoch": 38.89055472263868, "grad_norm": 0.19855937361717224, "learning_rate": 8.848452561770226e-05, "loss": 0.0108, "step": 25940 }, { "epoch": 38.90554722638681, "grad_norm": 0.1912848949432373, "learning_rate": 8.847396747795538e-05, "loss": 0.014, "step": 25950 }, { "epoch": 38.920539730134934, "grad_norm": 0.3025127351284027, "learning_rate": 8.846340513075327e-05, "loss": 0.0096, "step": 25960 }, { "epoch": 38.93553223388306, "grad_norm": 0.21050845086574554, "learning_rate": 8.845283857725099e-05, "loss": 0.0115, "step": 25970 }, { "epoch": 38.950524737631184, "grad_norm": 0.21657508611679077, "learning_rate": 8.844226781860409e-05, "loss": 0.0128, "step": 25980 }, { "epoch": 38.96551724137931, "grad_norm": 0.16248726844787598, "learning_rate": 8.84316928559686e-05, "loss": 0.0122, "step": 25990 }, { "epoch": 38.980509745127435, "grad_norm": 0.2700672447681427, "learning_rate": 8.842111369050094e-05, "loss": 0.0156, "step": 26000 }, { "epoch": 38.99550224887556, "grad_norm": 0.2561070919036865, "learning_rate": 8.841053032335808e-05, "loss": 0.0125, "step": 26010 }, { "epoch": 39.010494752623686, "grad_norm": 0.2778618037700653, "learning_rate": 8.839994275569735e-05, "loss": 0.0108, "step": 26020 }, { "epoch": 39.02548725637181, "grad_norm": 0.2706651985645294, "learning_rate": 8.838935098867662e-05, "loss": 0.0193, "step": 26030 }, { "epoch": 39.04047976011994, "grad_norm": 0.49721282720565796, "learning_rate": 8.837875502345418e-05, "loss": 0.0128, "step": 26040 }, { "epoch": 39.05547226386807, "grad_norm": 0.4077483117580414, "learning_rate": 8.83681548611888e-05, "loss": 0.0109, "step": 26050 }, { "epoch": 39.070464767616194, "grad_norm": 0.22999222576618195, "learning_rate": 8.835755050303969e-05, "loss": 0.0111, "step": 26060 }, { "epoch": 39.08545727136432, "grad_norm": 0.247202530503273, "learning_rate": 8.834694195016653e-05, "loss": 0.0159, "step": 26070 }, { "epoch": 39.100449775112445, "grad_norm": 0.32773858308792114, "learning_rate": 8.833632920372942e-05, "loss": 0.0136, "step": 26080 }, { "epoch": 39.11544227886057, "grad_norm": 0.20408497750759125, "learning_rate": 8.832571226488903e-05, "loss": 0.0138, "step": 26090 }, { "epoch": 39.130434782608695, "grad_norm": 0.268587589263916, "learning_rate": 8.831509113480634e-05, "loss": 0.0145, "step": 26100 }, { "epoch": 39.14542728635682, "grad_norm": 0.17258287966251373, "learning_rate": 8.83044658146429e-05, "loss": 0.0143, "step": 26110 }, { "epoch": 39.160419790104946, "grad_norm": 0.36040234565734863, "learning_rate": 8.829383630556067e-05, "loss": 0.0152, "step": 26120 }, { "epoch": 39.17541229385307, "grad_norm": 0.20612730085849762, "learning_rate": 8.828320260872207e-05, "loss": 0.0126, "step": 26130 }, { "epoch": 39.1904047976012, "grad_norm": 0.3216765820980072, "learning_rate": 8.827256472529e-05, "loss": 0.0109, "step": 26140 }, { "epoch": 39.20539730134932, "grad_norm": 0.22213657200336456, "learning_rate": 8.826192265642778e-05, "loss": 0.0148, "step": 26150 }, { "epoch": 39.220389805097454, "grad_norm": 0.17459222674369812, "learning_rate": 8.825127640329923e-05, "loss": 0.0137, "step": 26160 }, { "epoch": 39.23538230884558, "grad_norm": 0.23432591557502747, "learning_rate": 8.824062596706861e-05, "loss": 0.0117, "step": 26170 }, { "epoch": 39.250374812593705, "grad_norm": 0.2851828336715698, "learning_rate": 8.822997134890062e-05, "loss": 0.015, "step": 26180 }, { "epoch": 39.26536731634183, "grad_norm": 0.22838342189788818, "learning_rate": 8.821931254996044e-05, "loss": 0.0133, "step": 26190 }, { "epoch": 39.280359820089956, "grad_norm": 0.2869029939174652, "learning_rate": 8.82086495714137e-05, "loss": 0.012, "step": 26200 }, { "epoch": 39.29535232383808, "grad_norm": 0.18933959305286407, "learning_rate": 8.81979824144265e-05, "loss": 0.0144, "step": 26210 }, { "epoch": 39.310344827586206, "grad_norm": 0.1689004898071289, "learning_rate": 8.818731108016536e-05, "loss": 0.0124, "step": 26220 }, { "epoch": 39.32533733133433, "grad_norm": 0.24078205227851868, "learning_rate": 8.81766355697973e-05, "loss": 0.0153, "step": 26230 }, { "epoch": 39.34032983508246, "grad_norm": 0.18266606330871582, "learning_rate": 8.816595588448977e-05, "loss": 0.0125, "step": 26240 }, { "epoch": 39.35532233883058, "grad_norm": 0.21570007503032684, "learning_rate": 8.81552720254107e-05, "loss": 0.0106, "step": 26250 }, { "epoch": 39.37031484257871, "grad_norm": 0.2459530234336853, "learning_rate": 8.814458399372842e-05, "loss": 0.0135, "step": 26260 }, { "epoch": 39.38530734632684, "grad_norm": 0.19317744672298431, "learning_rate": 8.813389179061181e-05, "loss": 0.0132, "step": 26270 }, { "epoch": 39.400299850074965, "grad_norm": 0.29682132601737976, "learning_rate": 8.812319541723012e-05, "loss": 0.0112, "step": 26280 }, { "epoch": 39.41529235382309, "grad_norm": 0.19466252624988556, "learning_rate": 8.811249487475309e-05, "loss": 0.0108, "step": 26290 }, { "epoch": 39.430284857571216, "grad_norm": 0.2934055030345917, "learning_rate": 8.810179016435092e-05, "loss": 0.0136, "step": 26300 }, { "epoch": 39.44527736131934, "grad_norm": 0.2412097156047821, "learning_rate": 8.809108128719428e-05, "loss": 0.0121, "step": 26310 }, { "epoch": 39.46026986506747, "grad_norm": 0.3655935227870941, "learning_rate": 8.808036824445424e-05, "loss": 0.0125, "step": 26320 }, { "epoch": 39.47526236881559, "grad_norm": 0.2941741645336151, "learning_rate": 8.806965103730238e-05, "loss": 0.0135, "step": 26330 }, { "epoch": 39.49025487256372, "grad_norm": 0.1583876758813858, "learning_rate": 8.805892966691074e-05, "loss": 0.0143, "step": 26340 }, { "epoch": 39.50524737631184, "grad_norm": 0.320341020822525, "learning_rate": 8.804820413445175e-05, "loss": 0.0123, "step": 26350 }, { "epoch": 39.52023988005997, "grad_norm": 0.26230672001838684, "learning_rate": 8.803747444109837e-05, "loss": 0.0138, "step": 26360 }, { "epoch": 39.53523238380809, "grad_norm": 0.22190643846988678, "learning_rate": 8.802674058802399e-05, "loss": 0.0117, "step": 26370 }, { "epoch": 39.55022488755622, "grad_norm": 0.3256739377975464, "learning_rate": 8.801600257640241e-05, "loss": 0.0157, "step": 26380 }, { "epoch": 39.56521739130435, "grad_norm": 0.22371309995651245, "learning_rate": 8.800526040740795e-05, "loss": 0.0124, "step": 26390 }, { "epoch": 39.58020989505248, "grad_norm": 0.11749076098203659, "learning_rate": 8.799451408221535e-05, "loss": 0.0112, "step": 26400 }, { "epoch": 39.5952023988006, "grad_norm": 0.2716562747955322, "learning_rate": 8.798376360199982e-05, "loss": 0.0129, "step": 26410 }, { "epoch": 39.61019490254873, "grad_norm": 0.19820339977741241, "learning_rate": 8.797300896793701e-05, "loss": 0.0139, "step": 26420 }, { "epoch": 39.62518740629685, "grad_norm": 0.2791268825531006, "learning_rate": 8.796225018120302e-05, "loss": 0.0126, "step": 26430 }, { "epoch": 39.64017991004498, "grad_norm": 0.21719709038734436, "learning_rate": 8.795148724297444e-05, "loss": 0.0123, "step": 26440 }, { "epoch": 39.6551724137931, "grad_norm": 0.28768861293792725, "learning_rate": 8.794072015442825e-05, "loss": 0.0174, "step": 26450 }, { "epoch": 39.67016491754123, "grad_norm": 0.2600875794887543, "learning_rate": 8.792994891674198e-05, "loss": 0.0148, "step": 26460 }, { "epoch": 39.685157421289354, "grad_norm": 0.25838595628738403, "learning_rate": 8.79191735310935e-05, "loss": 0.0124, "step": 26470 }, { "epoch": 39.70014992503748, "grad_norm": 0.3183877170085907, "learning_rate": 8.790839399866122e-05, "loss": 0.0118, "step": 26480 }, { "epoch": 39.715142428785605, "grad_norm": 0.20300009846687317, "learning_rate": 8.789761032062397e-05, "loss": 0.012, "step": 26490 }, { "epoch": 39.73013493253373, "grad_norm": 0.30483052134513855, "learning_rate": 8.788682249816103e-05, "loss": 0.0135, "step": 26500 }, { "epoch": 39.74512743628186, "grad_norm": 0.21002189815044403, "learning_rate": 8.787603053245215e-05, "loss": 0.012, "step": 26510 }, { "epoch": 39.76011994002999, "grad_norm": 0.19679927825927734, "learning_rate": 8.78652344246775e-05, "loss": 0.0178, "step": 26520 }, { "epoch": 39.77511244377811, "grad_norm": 0.2768580913543701, "learning_rate": 8.785443417601776e-05, "loss": 0.0132, "step": 26530 }, { "epoch": 39.79010494752624, "grad_norm": 0.33208635449409485, "learning_rate": 8.784362978765401e-05, "loss": 0.016, "step": 26540 }, { "epoch": 39.805097451274364, "grad_norm": 0.2673976421356201, "learning_rate": 8.783282126076779e-05, "loss": 0.0108, "step": 26550 }, { "epoch": 39.82008995502249, "grad_norm": 0.37392517924308777, "learning_rate": 8.782200859654112e-05, "loss": 0.014, "step": 26560 }, { "epoch": 39.835082458770614, "grad_norm": 0.2720625102519989, "learning_rate": 8.781119179615646e-05, "loss": 0.0128, "step": 26570 }, { "epoch": 39.85007496251874, "grad_norm": 0.3263017535209656, "learning_rate": 8.780037086079674e-05, "loss": 0.0169, "step": 26580 }, { "epoch": 39.865067466266865, "grad_norm": 0.34187939763069153, "learning_rate": 8.778954579164527e-05, "loss": 0.0122, "step": 26590 }, { "epoch": 39.88005997001499, "grad_norm": 0.24969132244586945, "learning_rate": 8.777871658988588e-05, "loss": 0.0139, "step": 26600 }, { "epoch": 39.895052473763116, "grad_norm": 0.16623899340629578, "learning_rate": 8.776788325670285e-05, "loss": 0.015, "step": 26610 }, { "epoch": 39.91004497751124, "grad_norm": 0.2969110310077667, "learning_rate": 8.775704579328089e-05, "loss": 0.0133, "step": 26620 }, { "epoch": 39.92503748125937, "grad_norm": 0.3028049170970917, "learning_rate": 8.774620420080517e-05, "loss": 0.0146, "step": 26630 }, { "epoch": 39.9400299850075, "grad_norm": 0.3834598958492279, "learning_rate": 8.773535848046131e-05, "loss": 0.0149, "step": 26640 }, { "epoch": 39.955022488755624, "grad_norm": 0.23165267705917358, "learning_rate": 8.772450863343538e-05, "loss": 0.0231, "step": 26650 }, { "epoch": 39.97001499250375, "grad_norm": 0.2962912321090698, "learning_rate": 8.77136546609139e-05, "loss": 0.0131, "step": 26660 }, { "epoch": 39.985007496251875, "grad_norm": 0.24907052516937256, "learning_rate": 8.770279656408385e-05, "loss": 0.0149, "step": 26670 }, { "epoch": 40.0, "grad_norm": 0.15279768407344818, "learning_rate": 8.769193434413265e-05, "loss": 0.0107, "step": 26680 }, { "epoch": 40.014992503748125, "grad_norm": 0.18121187388896942, "learning_rate": 8.76810680022482e-05, "loss": 0.013, "step": 26690 }, { "epoch": 40.02998500749625, "grad_norm": 0.25122568011283875, "learning_rate": 8.767019753961878e-05, "loss": 0.0107, "step": 26700 }, { "epoch": 40.044977511244376, "grad_norm": 0.21925337612628937, "learning_rate": 8.765932295743321e-05, "loss": 0.0126, "step": 26710 }, { "epoch": 40.0599700149925, "grad_norm": 0.2500912547111511, "learning_rate": 8.764844425688068e-05, "loss": 0.0147, "step": 26720 }, { "epoch": 40.07496251874063, "grad_norm": 0.20958994328975677, "learning_rate": 8.763756143915092e-05, "loss": 0.0113, "step": 26730 }, { "epoch": 40.08995502248876, "grad_norm": 0.2539735734462738, "learning_rate": 8.7626674505434e-05, "loss": 0.014, "step": 26740 }, { "epoch": 40.104947526236884, "grad_norm": 0.1719958782196045, "learning_rate": 8.761578345692053e-05, "loss": 0.0107, "step": 26750 }, { "epoch": 40.11994002998501, "grad_norm": 0.12147624790668488, "learning_rate": 8.760488829480156e-05, "loss": 0.0133, "step": 26760 }, { "epoch": 40.134932533733135, "grad_norm": 0.3097532093524933, "learning_rate": 8.759398902026854e-05, "loss": 0.0127, "step": 26770 }, { "epoch": 40.14992503748126, "grad_norm": 0.27454012632369995, "learning_rate": 8.758308563451339e-05, "loss": 0.0128, "step": 26780 }, { "epoch": 40.164917541229386, "grad_norm": 0.22262723743915558, "learning_rate": 8.75721781387285e-05, "loss": 0.0159, "step": 26790 }, { "epoch": 40.17991004497751, "grad_norm": 0.27870985865592957, "learning_rate": 8.75612665341067e-05, "loss": 0.0144, "step": 26800 }, { "epoch": 40.194902548725636, "grad_norm": 0.2763458490371704, "learning_rate": 8.755035082184126e-05, "loss": 0.0154, "step": 26810 }, { "epoch": 40.20989505247376, "grad_norm": 0.2962442934513092, "learning_rate": 8.753943100312592e-05, "loss": 0.0118, "step": 26820 }, { "epoch": 40.22488755622189, "grad_norm": 0.3672484755516052, "learning_rate": 8.752850707915484e-05, "loss": 0.0153, "step": 26830 }, { "epoch": 40.23988005997001, "grad_norm": 0.22727182507514954, "learning_rate": 8.751757905112264e-05, "loss": 0.0171, "step": 26840 }, { "epoch": 40.25487256371814, "grad_norm": 0.29327496886253357, "learning_rate": 8.75066469202244e-05, "loss": 0.0123, "step": 26850 }, { "epoch": 40.26986506746627, "grad_norm": 0.4256095290184021, "learning_rate": 8.749571068765567e-05, "loss": 0.017, "step": 26860 }, { "epoch": 40.284857571214395, "grad_norm": 0.1817990243434906, "learning_rate": 8.748477035461238e-05, "loss": 0.0173, "step": 26870 }, { "epoch": 40.29985007496252, "grad_norm": 0.36493828892707825, "learning_rate": 8.747382592229095e-05, "loss": 0.016, "step": 26880 }, { "epoch": 40.314842578710646, "grad_norm": 0.32509568333625793, "learning_rate": 8.746287739188828e-05, "loss": 0.0139, "step": 26890 }, { "epoch": 40.32983508245877, "grad_norm": 0.2532479166984558, "learning_rate": 8.745192476460165e-05, "loss": 0.0146, "step": 26900 }, { "epoch": 40.3448275862069, "grad_norm": 0.16778305172920227, "learning_rate": 8.744096804162882e-05, "loss": 0.0132, "step": 26910 }, { "epoch": 40.35982008995502, "grad_norm": 0.19900867342948914, "learning_rate": 8.743000722416804e-05, "loss": 0.0126, "step": 26920 }, { "epoch": 40.37481259370315, "grad_norm": 0.27061188220977783, "learning_rate": 8.741904231341793e-05, "loss": 0.011, "step": 26930 }, { "epoch": 40.38980509745127, "grad_norm": 0.2928910255432129, "learning_rate": 8.740807331057762e-05, "loss": 0.0137, "step": 26940 }, { "epoch": 40.4047976011994, "grad_norm": 0.2765190303325653, "learning_rate": 8.739710021684667e-05, "loss": 0.0172, "step": 26950 }, { "epoch": 40.41979010494752, "grad_norm": 0.21860864758491516, "learning_rate": 8.738612303342503e-05, "loss": 0.0115, "step": 26960 }, { "epoch": 40.43478260869565, "grad_norm": 0.7110356092453003, "learning_rate": 8.73751417615132e-05, "loss": 0.0124, "step": 26970 }, { "epoch": 40.44977511244378, "grad_norm": 0.16269312798976898, "learning_rate": 8.736415640231208e-05, "loss": 0.0109, "step": 26980 }, { "epoch": 40.46476761619191, "grad_norm": 0.3996422588825226, "learning_rate": 8.735316695702297e-05, "loss": 0.0176, "step": 26990 }, { "epoch": 40.47976011994003, "grad_norm": 0.21582405269145966, "learning_rate": 8.734217342684769e-05, "loss": 0.0142, "step": 27000 }, { "epoch": 40.49475262368816, "grad_norm": 0.2734130024909973, "learning_rate": 8.733117581298847e-05, "loss": 0.0151, "step": 27010 }, { "epoch": 40.50974512743628, "grad_norm": 0.2763662338256836, "learning_rate": 8.732017411664796e-05, "loss": 0.0149, "step": 27020 }, { "epoch": 40.52473763118441, "grad_norm": 0.2340482771396637, "learning_rate": 8.730916833902936e-05, "loss": 0.0119, "step": 27030 }, { "epoch": 40.53973013493253, "grad_norm": 0.47447919845581055, "learning_rate": 8.729815848133618e-05, "loss": 0.0117, "step": 27040 }, { "epoch": 40.55472263868066, "grad_norm": 0.24386128783226013, "learning_rate": 8.728714454477247e-05, "loss": 0.0149, "step": 27050 }, { "epoch": 40.569715142428784, "grad_norm": 0.26198866963386536, "learning_rate": 8.727612653054269e-05, "loss": 0.0132, "step": 27060 }, { "epoch": 40.58470764617691, "grad_norm": 0.24251264333724976, "learning_rate": 8.726510443985176e-05, "loss": 0.0136, "step": 27070 }, { "epoch": 40.599700149925035, "grad_norm": 0.3183712065219879, "learning_rate": 8.725407827390503e-05, "loss": 0.0129, "step": 27080 }, { "epoch": 40.61469265367316, "grad_norm": 0.16857510805130005, "learning_rate": 8.724304803390833e-05, "loss": 0.0124, "step": 27090 }, { "epoch": 40.62968515742129, "grad_norm": 0.24205872416496277, "learning_rate": 8.723201372106788e-05, "loss": 0.0166, "step": 27100 }, { "epoch": 40.64467766116942, "grad_norm": 0.259814590215683, "learning_rate": 8.722097533659038e-05, "loss": 0.0159, "step": 27110 }, { "epoch": 40.65967016491754, "grad_norm": 0.3604113459587097, "learning_rate": 8.720993288168299e-05, "loss": 0.0129, "step": 27120 }, { "epoch": 40.67466266866567, "grad_norm": 0.15051355957984924, "learning_rate": 8.719888635755327e-05, "loss": 0.0126, "step": 27130 }, { "epoch": 40.689655172413794, "grad_norm": 0.23621919751167297, "learning_rate": 8.718783576540928e-05, "loss": 0.0143, "step": 27140 }, { "epoch": 40.70464767616192, "grad_norm": 0.3087030053138733, "learning_rate": 8.717678110645948e-05, "loss": 0.0138, "step": 27150 }, { "epoch": 40.719640179910044, "grad_norm": 0.3413284718990326, "learning_rate": 8.716572238191279e-05, "loss": 0.0129, "step": 27160 }, { "epoch": 40.73463268365817, "grad_norm": 0.27164775133132935, "learning_rate": 8.715465959297857e-05, "loss": 0.0119, "step": 27170 }, { "epoch": 40.749625187406295, "grad_norm": 0.23358722031116486, "learning_rate": 8.714359274086665e-05, "loss": 0.0143, "step": 27180 }, { "epoch": 40.76461769115442, "grad_norm": 0.2464781254529953, "learning_rate": 8.713252182678726e-05, "loss": 0.0148, "step": 27190 }, { "epoch": 40.779610194902546, "grad_norm": 0.3031567931175232, "learning_rate": 8.712144685195112e-05, "loss": 0.0145, "step": 27200 }, { "epoch": 40.79460269865068, "grad_norm": 0.2580181360244751, "learning_rate": 8.711036781756936e-05, "loss": 0.0106, "step": 27210 }, { "epoch": 40.8095952023988, "grad_norm": 0.1559525430202484, "learning_rate": 8.709928472485357e-05, "loss": 0.014, "step": 27220 }, { "epoch": 40.82458770614693, "grad_norm": 0.18279114365577698, "learning_rate": 8.708819757501579e-05, "loss": 0.011, "step": 27230 }, { "epoch": 40.839580209895054, "grad_norm": 0.2929425537586212, "learning_rate": 8.707710636926846e-05, "loss": 0.0143, "step": 27240 }, { "epoch": 40.85457271364318, "grad_norm": 0.24347756803035736, "learning_rate": 8.706601110882455e-05, "loss": 0.0161, "step": 27250 }, { "epoch": 40.869565217391305, "grad_norm": 0.32305654883384705, "learning_rate": 8.705491179489738e-05, "loss": 0.0168, "step": 27260 }, { "epoch": 40.88455772113943, "grad_norm": 0.1975625604391098, "learning_rate": 8.704380842870077e-05, "loss": 0.0206, "step": 27270 }, { "epoch": 40.899550224887555, "grad_norm": 0.25147855281829834, "learning_rate": 8.703270101144895e-05, "loss": 0.0256, "step": 27280 }, { "epoch": 40.91454272863568, "grad_norm": 0.293550580739975, "learning_rate": 8.702158954435664e-05, "loss": 0.0217, "step": 27290 }, { "epoch": 40.929535232383806, "grad_norm": 0.2459123134613037, "learning_rate": 8.701047402863896e-05, "loss": 0.0177, "step": 27300 }, { "epoch": 40.94452773613193, "grad_norm": 0.20198233425617218, "learning_rate": 8.699935446551148e-05, "loss": 0.0167, "step": 27310 }, { "epoch": 40.95952023988006, "grad_norm": 0.3882687985897064, "learning_rate": 8.698823085619022e-05, "loss": 0.0156, "step": 27320 }, { "epoch": 40.97451274362819, "grad_norm": 0.20729273557662964, "learning_rate": 8.697710320189166e-05, "loss": 0.0162, "step": 27330 }, { "epoch": 40.989505247376314, "grad_norm": 0.2877335846424103, "learning_rate": 8.696597150383268e-05, "loss": 0.0128, "step": 27340 }, { "epoch": 41.00449775112444, "grad_norm": 0.24660460650920868, "learning_rate": 8.695483576323063e-05, "loss": 0.0155, "step": 27350 }, { "epoch": 41.019490254872565, "grad_norm": 0.2747378349304199, "learning_rate": 8.69436959813033e-05, "loss": 0.016, "step": 27360 }, { "epoch": 41.03448275862069, "grad_norm": 0.22606638073921204, "learning_rate": 8.693255215926892e-05, "loss": 0.015, "step": 27370 }, { "epoch": 41.049475262368816, "grad_norm": 0.2528829872608185, "learning_rate": 8.692140429834617e-05, "loss": 0.0143, "step": 27380 }, { "epoch": 41.06446776611694, "grad_norm": 0.2972695827484131, "learning_rate": 8.691025239975415e-05, "loss": 0.0125, "step": 27390 }, { "epoch": 41.079460269865066, "grad_norm": 0.1856459081172943, "learning_rate": 8.689909646471243e-05, "loss": 0.0163, "step": 27400 }, { "epoch": 41.09445277361319, "grad_norm": 0.25419721007347107, "learning_rate": 8.688793649444099e-05, "loss": 0.0146, "step": 27410 }, { "epoch": 41.10944527736132, "grad_norm": 0.19566835463047028, "learning_rate": 8.687677249016029e-05, "loss": 0.0131, "step": 27420 }, { "epoch": 41.12443778110944, "grad_norm": 0.1900397539138794, "learning_rate": 8.686560445309118e-05, "loss": 0.0139, "step": 27430 }, { "epoch": 41.13943028485757, "grad_norm": 0.2698601186275482, "learning_rate": 8.685443238445499e-05, "loss": 0.0136, "step": 27440 }, { "epoch": 41.1544227886057, "grad_norm": 0.7523044943809509, "learning_rate": 8.68432562854735e-05, "loss": 0.0128, "step": 27450 }, { "epoch": 41.169415292353825, "grad_norm": 0.27399614453315735, "learning_rate": 8.683207615736887e-05, "loss": 0.0117, "step": 27460 }, { "epoch": 41.18440779610195, "grad_norm": 0.3642607033252716, "learning_rate": 8.682089200136379e-05, "loss": 0.0188, "step": 27470 }, { "epoch": 41.199400299850076, "grad_norm": 0.17462284862995148, "learning_rate": 8.680970381868132e-05, "loss": 0.0117, "step": 27480 }, { "epoch": 41.2143928035982, "grad_norm": 0.17657729983329773, "learning_rate": 8.679851161054498e-05, "loss": 0.0149, "step": 27490 }, { "epoch": 41.22938530734633, "grad_norm": 0.22009225189685822, "learning_rate": 8.678731537817873e-05, "loss": 0.0158, "step": 27500 }, { "epoch": 41.24437781109445, "grad_norm": 0.2934180796146393, "learning_rate": 8.677611512280697e-05, "loss": 0.014, "step": 27510 }, { "epoch": 41.25937031484258, "grad_norm": 0.3735320568084717, "learning_rate": 8.676491084565457e-05, "loss": 0.0145, "step": 27520 }, { "epoch": 41.2743628185907, "grad_norm": 0.26255959272384644, "learning_rate": 8.675370254794678e-05, "loss": 0.012, "step": 27530 }, { "epoch": 41.28935532233883, "grad_norm": 0.16842111945152283, "learning_rate": 8.674249023090935e-05, "loss": 0.0118, "step": 27540 }, { "epoch": 41.30434782608695, "grad_norm": 0.18005937337875366, "learning_rate": 8.673127389576843e-05, "loss": 0.0112, "step": 27550 }, { "epoch": 41.319340329835086, "grad_norm": 0.17633920907974243, "learning_rate": 8.67200535437506e-05, "loss": 0.0141, "step": 27560 }, { "epoch": 41.33433283358321, "grad_norm": 0.24538041651248932, "learning_rate": 8.670882917608296e-05, "loss": 0.0142, "step": 27570 }, { "epoch": 41.34932533733134, "grad_norm": 0.2883470058441162, "learning_rate": 8.669760079399292e-05, "loss": 0.0111, "step": 27580 }, { "epoch": 41.36431784107946, "grad_norm": 0.2265806347131729, "learning_rate": 8.668636839870845e-05, "loss": 0.0116, "step": 27590 }, { "epoch": 41.37931034482759, "grad_norm": 0.2644895911216736, "learning_rate": 8.667513199145789e-05, "loss": 0.0122, "step": 27600 }, { "epoch": 41.39430284857571, "grad_norm": 0.23230516910552979, "learning_rate": 8.666389157347002e-05, "loss": 0.0136, "step": 27610 }, { "epoch": 41.40929535232384, "grad_norm": 0.14090262353420258, "learning_rate": 8.66526471459741e-05, "loss": 0.0103, "step": 27620 }, { "epoch": 41.42428785607196, "grad_norm": 0.1680595427751541, "learning_rate": 8.66413987101998e-05, "loss": 0.0116, "step": 27630 }, { "epoch": 41.43928035982009, "grad_norm": 0.2005806416273117, "learning_rate": 8.663014626737723e-05, "loss": 0.0133, "step": 27640 }, { "epoch": 41.454272863568214, "grad_norm": 0.18626768887043, "learning_rate": 8.661888981873691e-05, "loss": 0.0128, "step": 27650 }, { "epoch": 41.46926536731634, "grad_norm": 0.1366668939590454, "learning_rate": 8.660762936550988e-05, "loss": 0.0127, "step": 27660 }, { "epoch": 41.484257871064464, "grad_norm": 0.23622414469718933, "learning_rate": 8.659636490892753e-05, "loss": 0.0121, "step": 27670 }, { "epoch": 41.4992503748126, "grad_norm": 0.17860503494739532, "learning_rate": 8.658509645022174e-05, "loss": 0.0148, "step": 27680 }, { "epoch": 41.51424287856072, "grad_norm": 0.3180817663669586, "learning_rate": 8.657382399062481e-05, "loss": 0.0132, "step": 27690 }, { "epoch": 41.52923538230885, "grad_norm": 0.24961136281490326, "learning_rate": 8.656254753136946e-05, "loss": 0.0151, "step": 27700 }, { "epoch": 41.54422788605697, "grad_norm": 0.26347193121910095, "learning_rate": 8.655126707368891e-05, "loss": 0.011, "step": 27710 }, { "epoch": 41.5592203898051, "grad_norm": 0.3203998804092407, "learning_rate": 8.653998261881672e-05, "loss": 0.0132, "step": 27720 }, { "epoch": 41.574212893553224, "grad_norm": 0.2288985401391983, "learning_rate": 8.652869416798699e-05, "loss": 0.0118, "step": 27730 }, { "epoch": 41.58920539730135, "grad_norm": 0.3068487048149109, "learning_rate": 8.651740172243417e-05, "loss": 0.0175, "step": 27740 }, { "epoch": 41.604197901049474, "grad_norm": 0.2166975438594818, "learning_rate": 8.65061052833932e-05, "loss": 0.0138, "step": 27750 }, { "epoch": 41.6191904047976, "grad_norm": 0.40385702252388, "learning_rate": 8.649480485209945e-05, "loss": 0.0154, "step": 27760 }, { "epoch": 41.634182908545725, "grad_norm": 0.20590876042842865, "learning_rate": 8.64835004297887e-05, "loss": 0.0137, "step": 27770 }, { "epoch": 41.64917541229385, "grad_norm": 0.18816566467285156, "learning_rate": 8.64721920176972e-05, "loss": 0.0105, "step": 27780 }, { "epoch": 41.664167916041976, "grad_norm": 0.23846139013767242, "learning_rate": 8.646087961706164e-05, "loss": 0.0124, "step": 27790 }, { "epoch": 41.67916041979011, "grad_norm": 0.20691677927970886, "learning_rate": 8.644956322911908e-05, "loss": 0.0118, "step": 27800 }, { "epoch": 41.69415292353823, "grad_norm": 0.3085325360298157, "learning_rate": 8.643824285510709e-05, "loss": 0.012, "step": 27810 }, { "epoch": 41.70914542728636, "grad_norm": 0.272038996219635, "learning_rate": 8.642691849626364e-05, "loss": 0.014, "step": 27820 }, { "epoch": 41.724137931034484, "grad_norm": 0.29167553782463074, "learning_rate": 8.641559015382717e-05, "loss": 0.0146, "step": 27830 }, { "epoch": 41.73913043478261, "grad_norm": 0.24489878118038177, "learning_rate": 8.640425782903649e-05, "loss": 0.0113, "step": 27840 }, { "epoch": 41.754122938530735, "grad_norm": 0.21801519393920898, "learning_rate": 8.639292152313091e-05, "loss": 0.0112, "step": 27850 }, { "epoch": 41.76911544227886, "grad_norm": 0.21522203087806702, "learning_rate": 8.638158123735015e-05, "loss": 0.0112, "step": 27860 }, { "epoch": 41.784107946026985, "grad_norm": 0.2128908932209015, "learning_rate": 8.637023697293436e-05, "loss": 0.0121, "step": 27870 }, { "epoch": 41.79910044977511, "grad_norm": 0.3415522277355194, "learning_rate": 8.635888873112414e-05, "loss": 0.0102, "step": 27880 }, { "epoch": 41.814092953523236, "grad_norm": 0.18791799247264862, "learning_rate": 8.634753651316052e-05, "loss": 0.0138, "step": 27890 }, { "epoch": 41.82908545727136, "grad_norm": 0.6161677241325378, "learning_rate": 8.633618032028496e-05, "loss": 0.0146, "step": 27900 }, { "epoch": 41.844077961019494, "grad_norm": 0.30070164799690247, "learning_rate": 8.632482015373934e-05, "loss": 0.0123, "step": 27910 }, { "epoch": 41.85907046476762, "grad_norm": 0.2841334342956543, "learning_rate": 8.6313456014766e-05, "loss": 0.0147, "step": 27920 }, { "epoch": 41.874062968515744, "grad_norm": 0.23490042984485626, "learning_rate": 8.630208790460771e-05, "loss": 0.0163, "step": 27930 }, { "epoch": 41.88905547226387, "grad_norm": 0.21319301426410675, "learning_rate": 8.629071582450768e-05, "loss": 0.0132, "step": 27940 }, { "epoch": 41.904047976011995, "grad_norm": 0.25247737765312195, "learning_rate": 8.62793397757095e-05, "loss": 0.0139, "step": 27950 }, { "epoch": 41.91904047976012, "grad_norm": 0.19129757583141327, "learning_rate": 8.626795975945729e-05, "loss": 0.011, "step": 27960 }, { "epoch": 41.934032983508246, "grad_norm": 0.17847681045532227, "learning_rate": 8.625657577699551e-05, "loss": 0.0106, "step": 27970 }, { "epoch": 41.94902548725637, "grad_norm": 0.15392382442951202, "learning_rate": 8.624518782956914e-05, "loss": 0.0105, "step": 27980 }, { "epoch": 41.964017991004496, "grad_norm": 0.3003459572792053, "learning_rate": 8.62337959184235e-05, "loss": 0.0115, "step": 27990 }, { "epoch": 41.97901049475262, "grad_norm": 0.2655664384365082, "learning_rate": 8.622240004480441e-05, "loss": 0.0087, "step": 28000 }, { "epoch": 41.99400299850075, "grad_norm": 0.1293507069349289, "learning_rate": 8.621100020995814e-05, "loss": 0.0105, "step": 28010 }, { "epoch": 42.00899550224887, "grad_norm": 0.16184639930725098, "learning_rate": 8.619959641513132e-05, "loss": 0.0113, "step": 28020 }, { "epoch": 42.023988005997005, "grad_norm": 0.20516256988048553, "learning_rate": 8.618818866157105e-05, "loss": 0.013, "step": 28030 }, { "epoch": 42.03898050974513, "grad_norm": 0.19495287537574768, "learning_rate": 8.617677695052487e-05, "loss": 0.0108, "step": 28040 }, { "epoch": 42.053973013493255, "grad_norm": 0.23387132585048676, "learning_rate": 8.616536128324078e-05, "loss": 0.0107, "step": 28050 }, { "epoch": 42.06896551724138, "grad_norm": 0.38783568143844604, "learning_rate": 8.615394166096712e-05, "loss": 0.0107, "step": 28060 }, { "epoch": 42.083958020989506, "grad_norm": 0.18921653926372528, "learning_rate": 8.614251808495279e-05, "loss": 0.01, "step": 28070 }, { "epoch": 42.09895052473763, "grad_norm": 0.30615952610969543, "learning_rate": 8.6131090556447e-05, "loss": 0.0125, "step": 28080 }, { "epoch": 42.11394302848576, "grad_norm": 0.21488235890865326, "learning_rate": 8.611965907669947e-05, "loss": 0.0109, "step": 28090 }, { "epoch": 42.12893553223388, "grad_norm": 0.34485480189323425, "learning_rate": 8.610822364696034e-05, "loss": 0.0145, "step": 28100 }, { "epoch": 42.14392803598201, "grad_norm": 0.2552005648612976, "learning_rate": 8.609678426848015e-05, "loss": 0.013, "step": 28110 }, { "epoch": 42.15892053973013, "grad_norm": 0.20363128185272217, "learning_rate": 8.60853409425099e-05, "loss": 0.0117, "step": 28120 }, { "epoch": 42.17391304347826, "grad_norm": 0.21641217172145844, "learning_rate": 8.607389367030104e-05, "loss": 0.0098, "step": 28130 }, { "epoch": 42.18890554722638, "grad_norm": 0.3147115409374237, "learning_rate": 8.606244245310538e-05, "loss": 0.0111, "step": 28140 }, { "epoch": 42.203898050974516, "grad_norm": 0.1285727471113205, "learning_rate": 8.605098729217525e-05, "loss": 0.0117, "step": 28150 }, { "epoch": 42.21889055472264, "grad_norm": 0.1781904399394989, "learning_rate": 8.603952818876335e-05, "loss": 0.0104, "step": 28160 }, { "epoch": 42.23388305847077, "grad_norm": 0.1572854369878769, "learning_rate": 8.602806514412281e-05, "loss": 0.0119, "step": 28170 }, { "epoch": 42.24887556221889, "grad_norm": 0.4356028139591217, "learning_rate": 8.601659815950726e-05, "loss": 0.0092, "step": 28180 }, { "epoch": 42.26386806596702, "grad_norm": 0.19019486010074615, "learning_rate": 8.600512723617067e-05, "loss": 0.0115, "step": 28190 }, { "epoch": 42.27886056971514, "grad_norm": 0.43131619691848755, "learning_rate": 8.59936523753675e-05, "loss": 0.0113, "step": 28200 }, { "epoch": 42.29385307346327, "grad_norm": 0.18324622511863708, "learning_rate": 8.598217357835264e-05, "loss": 0.0118, "step": 28210 }, { "epoch": 42.30884557721139, "grad_norm": 0.24842557311058044, "learning_rate": 8.597069084638135e-05, "loss": 0.01, "step": 28220 }, { "epoch": 42.32383808095952, "grad_norm": 0.1651831865310669, "learning_rate": 8.595920418070939e-05, "loss": 0.015, "step": 28230 }, { "epoch": 42.338830584707644, "grad_norm": 0.2635008990764618, "learning_rate": 8.594771358259295e-05, "loss": 0.0156, "step": 28240 }, { "epoch": 42.35382308845577, "grad_norm": 0.1936604529619217, "learning_rate": 8.593621905328858e-05, "loss": 0.0129, "step": 28250 }, { "epoch": 42.3688155922039, "grad_norm": 0.2168579399585724, "learning_rate": 8.592472059405333e-05, "loss": 0.013, "step": 28260 }, { "epoch": 42.38380809595203, "grad_norm": 0.3017420470714569, "learning_rate": 8.591321820614464e-05, "loss": 0.0152, "step": 28270 }, { "epoch": 42.39880059970015, "grad_norm": 0.22786925733089447, "learning_rate": 8.590171189082041e-05, "loss": 0.0123, "step": 28280 }, { "epoch": 42.41379310344828, "grad_norm": 0.2656331956386566, "learning_rate": 8.589020164933894e-05, "loss": 0.0113, "step": 28290 }, { "epoch": 42.4287856071964, "grad_norm": 0.25928995013237, "learning_rate": 8.587868748295898e-05, "loss": 0.0102, "step": 28300 }, { "epoch": 42.44377811094453, "grad_norm": 0.2938583195209503, "learning_rate": 8.586716939293971e-05, "loss": 0.0109, "step": 28310 }, { "epoch": 42.458770614692654, "grad_norm": 0.2585102319717407, "learning_rate": 8.58556473805407e-05, "loss": 0.0126, "step": 28320 }, { "epoch": 42.47376311844078, "grad_norm": 0.2606047987937927, "learning_rate": 8.584412144702202e-05, "loss": 0.0166, "step": 28330 }, { "epoch": 42.488755622188904, "grad_norm": 0.2149938941001892, "learning_rate": 8.58325915936441e-05, "loss": 0.0106, "step": 28340 }, { "epoch": 42.50374812593703, "grad_norm": 0.19720770418643951, "learning_rate": 8.582105782166783e-05, "loss": 0.0134, "step": 28350 }, { "epoch": 42.518740629685155, "grad_norm": 0.19981056451797485, "learning_rate": 8.580952013235455e-05, "loss": 0.0139, "step": 28360 }, { "epoch": 42.53373313343328, "grad_norm": 0.2202586680650711, "learning_rate": 8.579797852696596e-05, "loss": 0.0129, "step": 28370 }, { "epoch": 42.54872563718141, "grad_norm": 0.4453160762786865, "learning_rate": 8.578643300676428e-05, "loss": 0.0169, "step": 28380 }, { "epoch": 42.56371814092954, "grad_norm": 0.26462534070014954, "learning_rate": 8.577488357301209e-05, "loss": 0.0122, "step": 28390 }, { "epoch": 42.57871064467766, "grad_norm": 0.28643280267715454, "learning_rate": 8.576333022697242e-05, "loss": 0.0161, "step": 28400 }, { "epoch": 42.59370314842579, "grad_norm": 0.20797684788703918, "learning_rate": 8.575177296990873e-05, "loss": 0.0141, "step": 28410 }, { "epoch": 42.608695652173914, "grad_norm": 0.3163876235485077, "learning_rate": 8.574021180308489e-05, "loss": 0.0167, "step": 28420 }, { "epoch": 42.62368815592204, "grad_norm": 0.19693998992443085, "learning_rate": 8.572864672776523e-05, "loss": 0.0128, "step": 28430 }, { "epoch": 42.638680659670165, "grad_norm": 0.3055918514728546, "learning_rate": 8.571707774521447e-05, "loss": 0.0116, "step": 28440 }, { "epoch": 42.65367316341829, "grad_norm": 0.16441798210144043, "learning_rate": 8.57055048566978e-05, "loss": 0.0137, "step": 28450 }, { "epoch": 42.668665667166415, "grad_norm": 0.2798101305961609, "learning_rate": 8.569392806348078e-05, "loss": 0.0136, "step": 28460 }, { "epoch": 42.68365817091454, "grad_norm": 0.2005445957183838, "learning_rate": 8.568234736682947e-05, "loss": 0.0126, "step": 28470 }, { "epoch": 42.698650674662666, "grad_norm": 0.1866406947374344, "learning_rate": 8.567076276801029e-05, "loss": 0.0122, "step": 28480 }, { "epoch": 42.71364317841079, "grad_norm": 0.17234738171100616, "learning_rate": 8.565917426829013e-05, "loss": 0.0127, "step": 28490 }, { "epoch": 42.728635682158924, "grad_norm": 0.21463201940059662, "learning_rate": 8.564758186893628e-05, "loss": 0.0149, "step": 28500 }, { "epoch": 42.74362818590705, "grad_norm": 0.21123173832893372, "learning_rate": 8.563598557121649e-05, "loss": 0.0134, "step": 28510 }, { "epoch": 42.758620689655174, "grad_norm": 0.30014142394065857, "learning_rate": 8.562438537639888e-05, "loss": 0.0131, "step": 28520 }, { "epoch": 42.7736131934033, "grad_norm": 0.3093585968017578, "learning_rate": 8.561278128575206e-05, "loss": 0.0142, "step": 28530 }, { "epoch": 42.788605697151425, "grad_norm": 0.2558705806732178, "learning_rate": 8.5601173300545e-05, "loss": 0.0111, "step": 28540 }, { "epoch": 42.80359820089955, "grad_norm": 0.28785276412963867, "learning_rate": 8.558956142204717e-05, "loss": 0.0148, "step": 28550 }, { "epoch": 42.818590704647676, "grad_norm": 0.2682984471321106, "learning_rate": 8.55779456515284e-05, "loss": 0.0103, "step": 28560 }, { "epoch": 42.8335832083958, "grad_norm": 0.24543076753616333, "learning_rate": 8.556632599025898e-05, "loss": 0.0109, "step": 28570 }, { "epoch": 42.848575712143926, "grad_norm": 0.24882525205612183, "learning_rate": 8.555470243950964e-05, "loss": 0.0119, "step": 28580 }, { "epoch": 42.86356821589205, "grad_norm": 0.2634905278682709, "learning_rate": 8.554307500055148e-05, "loss": 0.0103, "step": 28590 }, { "epoch": 42.87856071964018, "grad_norm": 0.2257535457611084, "learning_rate": 8.553144367465609e-05, "loss": 0.0136, "step": 28600 }, { "epoch": 42.89355322338831, "grad_norm": 0.21135570108890533, "learning_rate": 8.551980846309544e-05, "loss": 0.0115, "step": 28610 }, { "epoch": 42.908545727136435, "grad_norm": 0.21207581460475922, "learning_rate": 8.550816936714193e-05, "loss": 0.0162, "step": 28620 }, { "epoch": 42.92353823088456, "grad_norm": 0.2881166636943817, "learning_rate": 8.549652638806841e-05, "loss": 0.0139, "step": 28630 }, { "epoch": 42.938530734632685, "grad_norm": 0.15618813037872314, "learning_rate": 8.548487952714812e-05, "loss": 0.0126, "step": 28640 }, { "epoch": 42.95352323838081, "grad_norm": 0.221165269613266, "learning_rate": 8.547322878565478e-05, "loss": 0.0128, "step": 28650 }, { "epoch": 42.968515742128936, "grad_norm": 0.17379969358444214, "learning_rate": 8.546157416486245e-05, "loss": 0.0102, "step": 28660 }, { "epoch": 42.98350824587706, "grad_norm": 0.29451414942741394, "learning_rate": 8.54499156660457e-05, "loss": 0.012, "step": 28670 }, { "epoch": 42.99850074962519, "grad_norm": 0.17270947992801666, "learning_rate": 8.543825329047947e-05, "loss": 0.0128, "step": 28680 }, { "epoch": 43.01349325337331, "grad_norm": 0.14600184559822083, "learning_rate": 8.542658703943913e-05, "loss": 0.0121, "step": 28690 }, { "epoch": 43.02848575712144, "grad_norm": 0.21695315837860107, "learning_rate": 8.541491691420051e-05, "loss": 0.0109, "step": 28700 }, { "epoch": 43.04347826086956, "grad_norm": 0.2295953929424286, "learning_rate": 8.54032429160398e-05, "loss": 0.0118, "step": 28710 }, { "epoch": 43.05847076461769, "grad_norm": 0.2141137570142746, "learning_rate": 8.539156504623369e-05, "loss": 0.0139, "step": 28720 }, { "epoch": 43.07346326836582, "grad_norm": 0.1717028021812439, "learning_rate": 8.537988330605923e-05, "loss": 0.0119, "step": 28730 }, { "epoch": 43.088455772113946, "grad_norm": 0.22573526203632355, "learning_rate": 8.536819769679393e-05, "loss": 0.0091, "step": 28740 }, { "epoch": 43.10344827586207, "grad_norm": 0.2407395988702774, "learning_rate": 8.53565082197157e-05, "loss": 0.0128, "step": 28750 }, { "epoch": 43.1184407796102, "grad_norm": 0.36092865467071533, "learning_rate": 8.534481487610289e-05, "loss": 0.0138, "step": 28760 }, { "epoch": 43.13343328335832, "grad_norm": 0.3354366719722748, "learning_rate": 8.533311766723428e-05, "loss": 0.0146, "step": 28770 }, { "epoch": 43.14842578710645, "grad_norm": 0.30416786670684814, "learning_rate": 8.532141659438901e-05, "loss": 0.014, "step": 28780 }, { "epoch": 43.16341829085457, "grad_norm": 0.2512427866458893, "learning_rate": 8.530971165884675e-05, "loss": 0.0122, "step": 28790 }, { "epoch": 43.1784107946027, "grad_norm": 0.24360524117946625, "learning_rate": 8.529800286188752e-05, "loss": 0.0108, "step": 28800 }, { "epoch": 43.19340329835082, "grad_norm": 0.2593710720539093, "learning_rate": 8.528629020479175e-05, "loss": 0.0126, "step": 28810 }, { "epoch": 43.20839580209895, "grad_norm": 0.1272999793291092, "learning_rate": 8.527457368884033e-05, "loss": 0.0107, "step": 28820 }, { "epoch": 43.223388305847074, "grad_norm": 0.2594238519668579, "learning_rate": 8.526285331531458e-05, "loss": 0.01, "step": 28830 }, { "epoch": 43.2383808095952, "grad_norm": 0.16503599286079407, "learning_rate": 8.525112908549621e-05, "loss": 0.0124, "step": 28840 }, { "epoch": 43.25337331334333, "grad_norm": 0.38474711775779724, "learning_rate": 8.523940100066735e-05, "loss": 0.0124, "step": 28850 }, { "epoch": 43.26836581709146, "grad_norm": 0.2520099878311157, "learning_rate": 8.52276690621106e-05, "loss": 0.0114, "step": 28860 }, { "epoch": 43.28335832083958, "grad_norm": 0.17527970671653748, "learning_rate": 8.521593327110889e-05, "loss": 0.0111, "step": 28870 }, { "epoch": 43.29835082458771, "grad_norm": 0.2982214391231537, "learning_rate": 8.520419362894569e-05, "loss": 0.0139, "step": 28880 }, { "epoch": 43.31334332833583, "grad_norm": 0.2589065730571747, "learning_rate": 8.51924501369048e-05, "loss": 0.0111, "step": 28890 }, { "epoch": 43.32833583208396, "grad_norm": 0.1899457573890686, "learning_rate": 8.518070279627047e-05, "loss": 0.0149, "step": 28900 }, { "epoch": 43.343328335832084, "grad_norm": 0.26158103346824646, "learning_rate": 8.516895160832737e-05, "loss": 0.0096, "step": 28910 }, { "epoch": 43.35832083958021, "grad_norm": 0.31207379698753357, "learning_rate": 8.515719657436061e-05, "loss": 0.0131, "step": 28920 }, { "epoch": 43.373313343328334, "grad_norm": 0.199725940823555, "learning_rate": 8.514543769565568e-05, "loss": 0.0174, "step": 28930 }, { "epoch": 43.38830584707646, "grad_norm": 0.2010023295879364, "learning_rate": 8.513367497349853e-05, "loss": 0.0085, "step": 28940 }, { "epoch": 43.403298350824585, "grad_norm": 0.2676487863063812, "learning_rate": 8.51219084091755e-05, "loss": 0.0105, "step": 28950 }, { "epoch": 43.41829085457271, "grad_norm": 0.17802618443965912, "learning_rate": 8.511013800397338e-05, "loss": 0.0091, "step": 28960 }, { "epoch": 43.43328335832084, "grad_norm": 0.1738441288471222, "learning_rate": 8.509836375917937e-05, "loss": 0.0097, "step": 28970 }, { "epoch": 43.44827586206897, "grad_norm": 0.17739354074001312, "learning_rate": 8.508658567608104e-05, "loss": 0.011, "step": 28980 }, { "epoch": 43.46326836581709, "grad_norm": 0.2030685395002365, "learning_rate": 8.507480375596647e-05, "loss": 0.0145, "step": 28990 }, { "epoch": 43.47826086956522, "grad_norm": 0.23211222887039185, "learning_rate": 8.506301800012408e-05, "loss": 0.0162, "step": 29000 }, { "epoch": 43.493253373313344, "grad_norm": 0.2117229551076889, "learning_rate": 8.505122840984278e-05, "loss": 0.0137, "step": 29010 }, { "epoch": 43.50824587706147, "grad_norm": 0.19512954354286194, "learning_rate": 8.503943498641182e-05, "loss": 0.0116, "step": 29020 }, { "epoch": 43.523238380809595, "grad_norm": 0.2217610776424408, "learning_rate": 8.502763773112095e-05, "loss": 0.0109, "step": 29030 }, { "epoch": 43.53823088455772, "grad_norm": 0.23434405028820038, "learning_rate": 8.501583664526026e-05, "loss": 0.01, "step": 29040 }, { "epoch": 43.553223388305845, "grad_norm": 0.24995717406272888, "learning_rate": 8.500403173012032e-05, "loss": 0.0118, "step": 29050 }, { "epoch": 43.56821589205397, "grad_norm": 0.22179031372070312, "learning_rate": 8.499222298699211e-05, "loss": 0.0109, "step": 29060 }, { "epoch": 43.583208395802096, "grad_norm": 0.26509127020835876, "learning_rate": 8.498041041716701e-05, "loss": 0.0153, "step": 29070 }, { "epoch": 43.59820089955023, "grad_norm": 0.17235597968101501, "learning_rate": 8.496859402193681e-05, "loss": 0.0102, "step": 29080 }, { "epoch": 43.613193403298354, "grad_norm": 0.18094313144683838, "learning_rate": 8.495677380259374e-05, "loss": 0.0164, "step": 29090 }, { "epoch": 43.62818590704648, "grad_norm": 1.6809500455856323, "learning_rate": 8.494494976043045e-05, "loss": 0.0139, "step": 29100 }, { "epoch": 43.643178410794604, "grad_norm": 0.24128104746341705, "learning_rate": 8.493312189673998e-05, "loss": 0.0152, "step": 29110 }, { "epoch": 43.65817091454273, "grad_norm": 0.2789647877216339, "learning_rate": 8.492129021281584e-05, "loss": 0.0138, "step": 29120 }, { "epoch": 43.673163418290855, "grad_norm": 0.1655338853597641, "learning_rate": 8.490945470995188e-05, "loss": 0.0122, "step": 29130 }, { "epoch": 43.68815592203898, "grad_norm": 0.19251221418380737, "learning_rate": 8.489761538944247e-05, "loss": 0.0135, "step": 29140 }, { "epoch": 43.703148425787106, "grad_norm": 0.20956099033355713, "learning_rate": 8.48857722525823e-05, "loss": 0.0131, "step": 29150 }, { "epoch": 43.71814092953523, "grad_norm": 0.2536960244178772, "learning_rate": 8.487392530066652e-05, "loss": 0.0128, "step": 29160 }, { "epoch": 43.733133433283356, "grad_norm": 0.2186245173215866, "learning_rate": 8.486207453499069e-05, "loss": 0.011, "step": 29170 }, { "epoch": 43.74812593703148, "grad_norm": 0.2651139795780182, "learning_rate": 8.485021995685082e-05, "loss": 0.0141, "step": 29180 }, { "epoch": 43.76311844077961, "grad_norm": 0.3355078399181366, "learning_rate": 8.483836156754328e-05, "loss": 0.0122, "step": 29190 }, { "epoch": 43.77811094452774, "grad_norm": 0.26707711815834045, "learning_rate": 8.482649936836491e-05, "loss": 0.0158, "step": 29200 }, { "epoch": 43.793103448275865, "grad_norm": 0.2702946066856384, "learning_rate": 8.481463336061293e-05, "loss": 0.0133, "step": 29210 }, { "epoch": 43.80809595202399, "grad_norm": 0.26772230863571167, "learning_rate": 8.480276354558496e-05, "loss": 0.0143, "step": 29220 }, { "epoch": 43.823088455772115, "grad_norm": 0.18412134051322937, "learning_rate": 8.479088992457913e-05, "loss": 0.0153, "step": 29230 }, { "epoch": 43.83808095952024, "grad_norm": 0.2240930050611496, "learning_rate": 8.477901249889387e-05, "loss": 0.0124, "step": 29240 }, { "epoch": 43.853073463268366, "grad_norm": 0.31494367122650146, "learning_rate": 8.47671312698281e-05, "loss": 0.0094, "step": 29250 }, { "epoch": 43.86806596701649, "grad_norm": 0.2439279854297638, "learning_rate": 8.475524623868112e-05, "loss": 0.0133, "step": 29260 }, { "epoch": 43.88305847076462, "grad_norm": 0.3868088126182556, "learning_rate": 8.474335740675266e-05, "loss": 0.0159, "step": 29270 }, { "epoch": 43.89805097451274, "grad_norm": 0.35599228739738464, "learning_rate": 8.473146477534289e-05, "loss": 0.0122, "step": 29280 }, { "epoch": 43.91304347826087, "grad_norm": 0.21724525094032288, "learning_rate": 8.471956834575232e-05, "loss": 0.0148, "step": 29290 }, { "epoch": 43.92803598200899, "grad_norm": 0.2132461816072464, "learning_rate": 8.470766811928197e-05, "loss": 0.0102, "step": 29300 }, { "epoch": 43.94302848575712, "grad_norm": 0.2200510948896408, "learning_rate": 8.469576409723323e-05, "loss": 0.012, "step": 29310 }, { "epoch": 43.95802098950525, "grad_norm": 0.22157283127307892, "learning_rate": 8.468385628090788e-05, "loss": 0.0097, "step": 29320 }, { "epoch": 43.973013493253376, "grad_norm": 0.2748124301433563, "learning_rate": 8.467194467160815e-05, "loss": 0.0141, "step": 29330 }, { "epoch": 43.9880059970015, "grad_norm": 0.2083691954612732, "learning_rate": 8.466002927063667e-05, "loss": 0.0104, "step": 29340 }, { "epoch": 44.00299850074963, "grad_norm": 0.33233141899108887, "learning_rate": 8.464811007929651e-05, "loss": 0.0136, "step": 29350 }, { "epoch": 44.01799100449775, "grad_norm": 0.24797867238521576, "learning_rate": 8.463618709889114e-05, "loss": 0.0123, "step": 29360 }, { "epoch": 44.03298350824588, "grad_norm": 0.15755288302898407, "learning_rate": 8.462426033072442e-05, "loss": 0.0113, "step": 29370 }, { "epoch": 44.047976011994, "grad_norm": 0.24683959782123566, "learning_rate": 8.461232977610061e-05, "loss": 0.0107, "step": 29380 }, { "epoch": 44.06296851574213, "grad_norm": 0.2131742686033249, "learning_rate": 8.46003954363245e-05, "loss": 0.015, "step": 29390 }, { "epoch": 44.07796101949025, "grad_norm": 0.28708064556121826, "learning_rate": 8.458845731270115e-05, "loss": 0.0127, "step": 29400 }, { "epoch": 44.09295352323838, "grad_norm": 0.24520011246204376, "learning_rate": 8.45765154065361e-05, "loss": 0.0126, "step": 29410 }, { "epoch": 44.107946026986504, "grad_norm": 0.995794951915741, "learning_rate": 8.456456971913532e-05, "loss": 0.0129, "step": 29420 }, { "epoch": 44.12293853073463, "grad_norm": 0.20015551149845123, "learning_rate": 8.455262025180517e-05, "loss": 0.0124, "step": 29430 }, { "epoch": 44.13793103448276, "grad_norm": 0.18491750955581665, "learning_rate": 8.454066700585242e-05, "loss": 0.0138, "step": 29440 }, { "epoch": 44.15292353823089, "grad_norm": 0.2520187199115753, "learning_rate": 8.452870998258423e-05, "loss": 0.0106, "step": 29450 }, { "epoch": 44.16791604197901, "grad_norm": 0.23290607333183289, "learning_rate": 8.451674918330825e-05, "loss": 0.0143, "step": 29460 }, { "epoch": 44.18290854572714, "grad_norm": 0.2050306499004364, "learning_rate": 8.450478460933246e-05, "loss": 0.0126, "step": 29470 }, { "epoch": 44.19790104947526, "grad_norm": 0.1919034868478775, "learning_rate": 8.449281626196532e-05, "loss": 0.0117, "step": 29480 }, { "epoch": 44.21289355322339, "grad_norm": 0.2327147275209427, "learning_rate": 8.448084414251564e-05, "loss": 0.0117, "step": 29490 }, { "epoch": 44.22788605697151, "grad_norm": 0.16873185336589813, "learning_rate": 8.446886825229271e-05, "loss": 0.0105, "step": 29500 }, { "epoch": 44.24287856071964, "grad_norm": 0.20033936202526093, "learning_rate": 8.445688859260615e-05, "loss": 0.0131, "step": 29510 }, { "epoch": 44.257871064467764, "grad_norm": 0.2194972038269043, "learning_rate": 8.444490516476606e-05, "loss": 0.0143, "step": 29520 }, { "epoch": 44.27286356821589, "grad_norm": 0.2981928586959839, "learning_rate": 8.443291797008293e-05, "loss": 0.0135, "step": 29530 }, { "epoch": 44.287856071964015, "grad_norm": 0.17802734673023224, "learning_rate": 8.442092700986765e-05, "loss": 0.012, "step": 29540 }, { "epoch": 44.30284857571215, "grad_norm": 0.296615332365036, "learning_rate": 8.440893228543156e-05, "loss": 0.0138, "step": 29550 }, { "epoch": 44.31784107946027, "grad_norm": 0.1954117715358734, "learning_rate": 8.439693379808638e-05, "loss": 0.0123, "step": 29560 }, { "epoch": 44.3328335832084, "grad_norm": 0.29102954268455505, "learning_rate": 8.43849315491442e-05, "loss": 0.013, "step": 29570 }, { "epoch": 44.34782608695652, "grad_norm": 0.48954957723617554, "learning_rate": 8.437292553991763e-05, "loss": 0.0129, "step": 29580 }, { "epoch": 44.36281859070465, "grad_norm": 0.30353471636772156, "learning_rate": 8.436091577171959e-05, "loss": 0.014, "step": 29590 }, { "epoch": 44.377811094452774, "grad_norm": 0.2040223479270935, "learning_rate": 8.434890224586347e-05, "loss": 0.0117, "step": 29600 }, { "epoch": 44.3928035982009, "grad_norm": 0.27604252099990845, "learning_rate": 8.433688496366303e-05, "loss": 0.0118, "step": 29610 }, { "epoch": 44.407796101949025, "grad_norm": 0.28368884325027466, "learning_rate": 8.432486392643248e-05, "loss": 0.0106, "step": 29620 }, { "epoch": 44.42278860569715, "grad_norm": 0.31408873200416565, "learning_rate": 8.431283913548643e-05, "loss": 0.0095, "step": 29630 }, { "epoch": 44.437781109445275, "grad_norm": 0.2310047149658203, "learning_rate": 8.430081059213985e-05, "loss": 0.0092, "step": 29640 }, { "epoch": 44.4527736131934, "grad_norm": 0.2246720790863037, "learning_rate": 8.428877829770823e-05, "loss": 0.012, "step": 29650 }, { "epoch": 44.467766116941526, "grad_norm": 0.27164819836616516, "learning_rate": 8.427674225350735e-05, "loss": 0.011, "step": 29660 }, { "epoch": 44.48275862068966, "grad_norm": 0.1891101896762848, "learning_rate": 8.426470246085347e-05, "loss": 0.0108, "step": 29670 }, { "epoch": 44.497751124437784, "grad_norm": 0.3247525990009308, "learning_rate": 8.425265892106324e-05, "loss": 0.0135, "step": 29680 }, { "epoch": 44.51274362818591, "grad_norm": 0.5466464757919312, "learning_rate": 8.424061163545374e-05, "loss": 0.0142, "step": 29690 }, { "epoch": 44.527736131934034, "grad_norm": 0.20045550167560577, "learning_rate": 8.422856060534243e-05, "loss": 0.0104, "step": 29700 }, { "epoch": 44.54272863568216, "grad_norm": 0.30197614431381226, "learning_rate": 8.421650583204718e-05, "loss": 0.0112, "step": 29710 }, { "epoch": 44.557721139430285, "grad_norm": 0.414074569940567, "learning_rate": 8.420444731688633e-05, "loss": 0.0128, "step": 29720 }, { "epoch": 44.57271364317841, "grad_norm": 0.42603421211242676, "learning_rate": 8.419238506117852e-05, "loss": 0.0125, "step": 29730 }, { "epoch": 44.587706146926536, "grad_norm": 0.23019595444202423, "learning_rate": 8.418031906624289e-05, "loss": 0.0115, "step": 29740 }, { "epoch": 44.60269865067466, "grad_norm": 0.3835048973560333, "learning_rate": 8.416824933339898e-05, "loss": 0.0125, "step": 29750 }, { "epoch": 44.617691154422786, "grad_norm": 0.3076625466346741, "learning_rate": 8.415617586396667e-05, "loss": 0.014, "step": 29760 }, { "epoch": 44.63268365817091, "grad_norm": 0.2668096721172333, "learning_rate": 8.414409865926632e-05, "loss": 0.0102, "step": 29770 }, { "epoch": 44.64767616191904, "grad_norm": 0.2914525866508484, "learning_rate": 8.413201772061867e-05, "loss": 0.0106, "step": 29780 }, { "epoch": 44.66266866566717, "grad_norm": 0.16078713536262512, "learning_rate": 8.411993304934488e-05, "loss": 0.0113, "step": 29790 }, { "epoch": 44.677661169415295, "grad_norm": 0.25688987970352173, "learning_rate": 8.410784464676654e-05, "loss": 0.0145, "step": 29800 }, { "epoch": 44.69265367316342, "grad_norm": 0.2718423306941986, "learning_rate": 8.409575251420556e-05, "loss": 0.0128, "step": 29810 }, { "epoch": 44.707646176911545, "grad_norm": 0.16514453291893005, "learning_rate": 8.408365665298435e-05, "loss": 0.0139, "step": 29820 }, { "epoch": 44.72263868065967, "grad_norm": 0.2535862922668457, "learning_rate": 8.40715570644257e-05, "loss": 0.0113, "step": 29830 }, { "epoch": 44.737631184407796, "grad_norm": 0.21793383359909058, "learning_rate": 8.40594537498528e-05, "loss": 0.0146, "step": 29840 }, { "epoch": 44.75262368815592, "grad_norm": 0.1859692633152008, "learning_rate": 8.404734671058924e-05, "loss": 0.0149, "step": 29850 }, { "epoch": 44.76761619190405, "grad_norm": 0.2818312346935272, "learning_rate": 8.403523594795902e-05, "loss": 0.0134, "step": 29860 }, { "epoch": 44.78260869565217, "grad_norm": 0.2730455696582794, "learning_rate": 8.402312146328659e-05, "loss": 0.0166, "step": 29870 }, { "epoch": 44.7976011994003, "grad_norm": 0.2567644715309143, "learning_rate": 8.401100325789675e-05, "loss": 0.013, "step": 29880 }, { "epoch": 44.81259370314842, "grad_norm": 0.2570500373840332, "learning_rate": 8.399888133311472e-05, "loss": 0.0097, "step": 29890 }, { "epoch": 44.827586206896555, "grad_norm": 0.2491624355316162, "learning_rate": 8.398675569026613e-05, "loss": 0.0119, "step": 29900 }, { "epoch": 44.84257871064468, "grad_norm": 0.2935493290424347, "learning_rate": 8.397462633067705e-05, "loss": 0.0112, "step": 29910 }, { "epoch": 44.857571214392806, "grad_norm": 0.24114133417606354, "learning_rate": 8.396249325567392e-05, "loss": 0.0121, "step": 29920 }, { "epoch": 44.87256371814093, "grad_norm": 0.3156646192073822, "learning_rate": 8.395035646658357e-05, "loss": 0.0114, "step": 29930 }, { "epoch": 44.88755622188906, "grad_norm": 0.18649475276470184, "learning_rate": 8.39382159647333e-05, "loss": 0.0142, "step": 29940 }, { "epoch": 44.90254872563718, "grad_norm": 0.2551449239253998, "learning_rate": 8.392607175145075e-05, "loss": 0.0122, "step": 29950 }, { "epoch": 44.91754122938531, "grad_norm": 0.30327850580215454, "learning_rate": 8.3913923828064e-05, "loss": 0.0142, "step": 29960 }, { "epoch": 44.93253373313343, "grad_norm": 0.19201667606830597, "learning_rate": 8.390177219590152e-05, "loss": 0.0106, "step": 29970 }, { "epoch": 44.94752623688156, "grad_norm": 0.2929057478904724, "learning_rate": 8.388961685629222e-05, "loss": 0.0115, "step": 29980 }, { "epoch": 44.96251874062968, "grad_norm": 0.17164359986782074, "learning_rate": 8.387745781056536e-05, "loss": 0.0118, "step": 29990 }, { "epoch": 44.97751124437781, "grad_norm": 0.24322181940078735, "learning_rate": 8.386529506005065e-05, "loss": 0.0133, "step": 30000 }, { "epoch": 44.992503748125934, "grad_norm": 0.28756844997406006, "learning_rate": 8.38531286060782e-05, "loss": 0.0171, "step": 30010 }, { "epoch": 45.007496251874066, "grad_norm": 0.19965696334838867, "learning_rate": 8.384095844997849e-05, "loss": 0.0122, "step": 30020 }, { "epoch": 45.02248875562219, "grad_norm": 0.339333713054657, "learning_rate": 8.382878459308245e-05, "loss": 0.0111, "step": 30030 }, { "epoch": 45.03748125937032, "grad_norm": 0.23412589728832245, "learning_rate": 8.381660703672138e-05, "loss": 0.0127, "step": 30040 }, { "epoch": 45.05247376311844, "grad_norm": 0.2563346028327942, "learning_rate": 8.380442578222702e-05, "loss": 0.0133, "step": 30050 }, { "epoch": 45.06746626686657, "grad_norm": 0.14495567977428436, "learning_rate": 8.379224083093146e-05, "loss": 0.0107, "step": 30060 }, { "epoch": 45.08245877061469, "grad_norm": 0.37110862135887146, "learning_rate": 8.378005218416727e-05, "loss": 0.0113, "step": 30070 }, { "epoch": 45.09745127436282, "grad_norm": 0.2546071410179138, "learning_rate": 8.376785984326735e-05, "loss": 0.011, "step": 30080 }, { "epoch": 45.11244377811094, "grad_norm": 0.2418600171804428, "learning_rate": 8.375566380956506e-05, "loss": 0.0106, "step": 30090 }, { "epoch": 45.12743628185907, "grad_norm": 0.18582580983638763, "learning_rate": 8.374346408439411e-05, "loss": 0.012, "step": 30100 }, { "epoch": 45.142428785607194, "grad_norm": 0.2576355040073395, "learning_rate": 8.373126066908868e-05, "loss": 0.0155, "step": 30110 }, { "epoch": 45.15742128935532, "grad_norm": 0.19859834015369415, "learning_rate": 8.371905356498326e-05, "loss": 0.0144, "step": 30120 }, { "epoch": 45.172413793103445, "grad_norm": 0.29196643829345703, "learning_rate": 8.370684277341288e-05, "loss": 0.0102, "step": 30130 }, { "epoch": 45.18740629685158, "grad_norm": 0.25035524368286133, "learning_rate": 8.369462829571282e-05, "loss": 0.014, "step": 30140 }, { "epoch": 45.2023988005997, "grad_norm": 0.2506848871707916, "learning_rate": 8.36824101332189e-05, "loss": 0.0189, "step": 30150 }, { "epoch": 45.21739130434783, "grad_norm": 0.14620159566402435, "learning_rate": 8.367018828726721e-05, "loss": 0.0136, "step": 30160 }, { "epoch": 45.23238380809595, "grad_norm": 0.19029240310192108, "learning_rate": 8.365796275919438e-05, "loss": 0.0152, "step": 30170 }, { "epoch": 45.24737631184408, "grad_norm": 0.1892470121383667, "learning_rate": 8.364573355033734e-05, "loss": 0.0134, "step": 30180 }, { "epoch": 45.262368815592204, "grad_norm": 0.15412373840808868, "learning_rate": 8.363350066203346e-05, "loss": 0.017, "step": 30190 }, { "epoch": 45.27736131934033, "grad_norm": 0.36505335569381714, "learning_rate": 8.362126409562053e-05, "loss": 0.015, "step": 30200 }, { "epoch": 45.292353823088455, "grad_norm": 0.24688749015331268, "learning_rate": 8.360902385243667e-05, "loss": 0.0138, "step": 30210 }, { "epoch": 45.30734632683658, "grad_norm": 0.27136528491973877, "learning_rate": 8.359677993382052e-05, "loss": 0.0127, "step": 30220 }, { "epoch": 45.322338830584705, "grad_norm": 0.2566235363483429, "learning_rate": 8.358453234111103e-05, "loss": 0.0158, "step": 30230 }, { "epoch": 45.33733133433283, "grad_norm": 0.2036592662334442, "learning_rate": 8.357228107564756e-05, "loss": 0.0126, "step": 30240 }, { "epoch": 45.35232383808096, "grad_norm": 0.24401076138019562, "learning_rate": 8.356002613876993e-05, "loss": 0.0116, "step": 30250 }, { "epoch": 45.36731634182909, "grad_norm": 0.3296780586242676, "learning_rate": 8.35477675318183e-05, "loss": 0.0122, "step": 30260 }, { "epoch": 45.382308845577214, "grad_norm": 0.12033168226480484, "learning_rate": 8.353550525613323e-05, "loss": 0.0137, "step": 30270 }, { "epoch": 45.39730134932534, "grad_norm": 0.17940598726272583, "learning_rate": 8.352323931305572e-05, "loss": 0.0126, "step": 30280 }, { "epoch": 45.412293853073464, "grad_norm": 0.1469985693693161, "learning_rate": 8.351096970392717e-05, "loss": 0.0112, "step": 30290 }, { "epoch": 45.42728635682159, "grad_norm": 0.2561826705932617, "learning_rate": 8.349869643008937e-05, "loss": 0.0131, "step": 30300 }, { "epoch": 45.442278860569715, "grad_norm": 0.2393208146095276, "learning_rate": 8.348641949288449e-05, "loss": 0.0148, "step": 30310 }, { "epoch": 45.45727136431784, "grad_norm": 0.29010453820228577, "learning_rate": 8.34741388936551e-05, "loss": 0.0123, "step": 30320 }, { "epoch": 45.472263868065966, "grad_norm": 0.15802434086799622, "learning_rate": 8.346185463374423e-05, "loss": 0.0114, "step": 30330 }, { "epoch": 45.48725637181409, "grad_norm": 0.32242780923843384, "learning_rate": 8.344956671449524e-05, "loss": 0.0128, "step": 30340 }, { "epoch": 45.502248875562216, "grad_norm": 0.22390803694725037, "learning_rate": 8.343727513725192e-05, "loss": 0.0109, "step": 30350 }, { "epoch": 45.51724137931034, "grad_norm": 0.1277250349521637, "learning_rate": 8.342497990335847e-05, "loss": 0.0108, "step": 30360 }, { "epoch": 45.532233883058474, "grad_norm": 0.19295734167099, "learning_rate": 8.341268101415946e-05, "loss": 0.0127, "step": 30370 }, { "epoch": 45.5472263868066, "grad_norm": 0.2400302290916443, "learning_rate": 8.34003784709999e-05, "loss": 0.0143, "step": 30380 }, { "epoch": 45.562218890554725, "grad_norm": 0.22289322316646576, "learning_rate": 8.338807227522516e-05, "loss": 0.0126, "step": 30390 }, { "epoch": 45.57721139430285, "grad_norm": 0.45170891284942627, "learning_rate": 8.337576242818103e-05, "loss": 0.0145, "step": 30400 }, { "epoch": 45.592203898050975, "grad_norm": 0.23559176921844482, "learning_rate": 8.336344893121372e-05, "loss": 0.0111, "step": 30410 }, { "epoch": 45.6071964017991, "grad_norm": 0.3331295847892761, "learning_rate": 8.335113178566977e-05, "loss": 0.0122, "step": 30420 }, { "epoch": 45.622188905547226, "grad_norm": 0.25316134095191956, "learning_rate": 8.33388109928962e-05, "loss": 0.0131, "step": 30430 }, { "epoch": 45.63718140929535, "grad_norm": 0.2667602002620697, "learning_rate": 8.33264865542404e-05, "loss": 0.0148, "step": 30440 }, { "epoch": 45.65217391304348, "grad_norm": 0.2348894476890564, "learning_rate": 8.331415847105013e-05, "loss": 0.0113, "step": 30450 }, { "epoch": 45.6671664167916, "grad_norm": 0.3131861388683319, "learning_rate": 8.330182674467357e-05, "loss": 0.0124, "step": 30460 }, { "epoch": 45.68215892053973, "grad_norm": 0.18704001605510712, "learning_rate": 8.32894913764593e-05, "loss": 0.0163, "step": 30470 }, { "epoch": 45.69715142428785, "grad_norm": 0.17347468435764313, "learning_rate": 8.327715236775633e-05, "loss": 0.0097, "step": 30480 }, { "epoch": 45.712143928035985, "grad_norm": 0.10805173963308334, "learning_rate": 8.326480971991398e-05, "loss": 0.0135, "step": 30490 }, { "epoch": 45.72713643178411, "grad_norm": 0.27747803926467896, "learning_rate": 8.325246343428206e-05, "loss": 0.0113, "step": 30500 }, { "epoch": 45.742128935532236, "grad_norm": 0.169337198138237, "learning_rate": 8.324011351221072e-05, "loss": 0.0113, "step": 30510 }, { "epoch": 45.75712143928036, "grad_norm": 0.3173060417175293, "learning_rate": 8.322775995505057e-05, "loss": 0.011, "step": 30520 }, { "epoch": 45.77211394302849, "grad_norm": 0.29741793870925903, "learning_rate": 8.321540276415254e-05, "loss": 0.0154, "step": 30530 }, { "epoch": 45.78710644677661, "grad_norm": 0.5224310755729675, "learning_rate": 8.320304194086798e-05, "loss": 0.0144, "step": 30540 }, { "epoch": 45.80209895052474, "grad_norm": 0.28576818108558655, "learning_rate": 8.31906774865487e-05, "loss": 0.0115, "step": 30550 }, { "epoch": 45.81709145427286, "grad_norm": 0.2764967083930969, "learning_rate": 8.317830940254682e-05, "loss": 0.0133, "step": 30560 }, { "epoch": 45.83208395802099, "grad_norm": 0.38873082399368286, "learning_rate": 8.316593769021491e-05, "loss": 0.0105, "step": 30570 }, { "epoch": 45.84707646176911, "grad_norm": 0.1648796647787094, "learning_rate": 8.315356235090592e-05, "loss": 0.0148, "step": 30580 }, { "epoch": 45.86206896551724, "grad_norm": 0.5371176600456238, "learning_rate": 8.314118338597319e-05, "loss": 0.0125, "step": 30590 }, { "epoch": 45.87706146926537, "grad_norm": 0.19314204156398773, "learning_rate": 8.312880079677048e-05, "loss": 0.0124, "step": 30600 }, { "epoch": 45.892053973013496, "grad_norm": 0.3022628426551819, "learning_rate": 8.311641458465191e-05, "loss": 0.0158, "step": 30610 }, { "epoch": 45.90704647676162, "grad_norm": 0.2320326566696167, "learning_rate": 8.310402475097205e-05, "loss": 0.0118, "step": 30620 }, { "epoch": 45.92203898050975, "grad_norm": 0.151781365275383, "learning_rate": 8.309163129708581e-05, "loss": 0.0107, "step": 30630 }, { "epoch": 45.93703148425787, "grad_norm": 0.13811245560646057, "learning_rate": 8.307923422434852e-05, "loss": 0.0082, "step": 30640 }, { "epoch": 45.952023988006, "grad_norm": 0.3457493484020233, "learning_rate": 8.30668335341159e-05, "loss": 0.0118, "step": 30650 }, { "epoch": 45.96701649175412, "grad_norm": 0.3596700131893158, "learning_rate": 8.305442922774408e-05, "loss": 0.0116, "step": 30660 }, { "epoch": 45.98200899550225, "grad_norm": 0.237000972032547, "learning_rate": 8.304202130658959e-05, "loss": 0.0132, "step": 30670 }, { "epoch": 45.99700149925037, "grad_norm": 0.20535628497600555, "learning_rate": 8.302960977200931e-05, "loss": 0.0099, "step": 30680 }, { "epoch": 46.0119940029985, "grad_norm": 0.47097891569137573, "learning_rate": 8.301719462536058e-05, "loss": 0.0133, "step": 30690 }, { "epoch": 46.026986506746624, "grad_norm": 0.25564152002334595, "learning_rate": 8.300477586800108e-05, "loss": 0.0138, "step": 30700 }, { "epoch": 46.04197901049475, "grad_norm": 0.2616446912288666, "learning_rate": 8.299235350128892e-05, "loss": 0.0109, "step": 30710 }, { "epoch": 46.05697151424288, "grad_norm": 0.2771615982055664, "learning_rate": 8.297992752658258e-05, "loss": 0.0141, "step": 30720 }, { "epoch": 46.07196401799101, "grad_norm": 0.5964570045471191, "learning_rate": 8.296749794524094e-05, "loss": 0.0151, "step": 30730 }, { "epoch": 46.08695652173913, "grad_norm": 0.2468319535255432, "learning_rate": 8.295506475862332e-05, "loss": 0.0126, "step": 30740 }, { "epoch": 46.10194902548726, "grad_norm": 0.2507488429546356, "learning_rate": 8.294262796808933e-05, "loss": 0.0132, "step": 30750 }, { "epoch": 46.11694152923538, "grad_norm": 0.4336037337779999, "learning_rate": 8.293018757499909e-05, "loss": 0.0124, "step": 30760 }, { "epoch": 46.13193403298351, "grad_norm": 0.30519232153892517, "learning_rate": 8.291774358071305e-05, "loss": 0.0114, "step": 30770 }, { "epoch": 46.146926536731634, "grad_norm": 0.20503123104572296, "learning_rate": 8.290529598659205e-05, "loss": 0.0172, "step": 30780 }, { "epoch": 46.16191904047976, "grad_norm": 0.38051658868789673, "learning_rate": 8.289284479399738e-05, "loss": 0.0123, "step": 30790 }, { "epoch": 46.176911544227885, "grad_norm": 0.22284430265426636, "learning_rate": 8.288039000429064e-05, "loss": 0.0156, "step": 30800 }, { "epoch": 46.19190404797601, "grad_norm": 0.2734052538871765, "learning_rate": 8.286793161883388e-05, "loss": 0.0108, "step": 30810 }, { "epoch": 46.206896551724135, "grad_norm": 0.19151794910430908, "learning_rate": 8.285546963898954e-05, "loss": 0.0087, "step": 30820 }, { "epoch": 46.22188905547226, "grad_norm": 0.22311361134052277, "learning_rate": 8.284300406612044e-05, "loss": 0.0133, "step": 30830 }, { "epoch": 46.23688155922039, "grad_norm": 0.22810474038124084, "learning_rate": 8.283053490158978e-05, "loss": 0.0149, "step": 30840 }, { "epoch": 46.25187406296852, "grad_norm": 0.215501606464386, "learning_rate": 8.28180621467612e-05, "loss": 0.013, "step": 30850 }, { "epoch": 46.266866566716644, "grad_norm": 0.19378024339675903, "learning_rate": 8.280558580299867e-05, "loss": 0.0155, "step": 30860 }, { "epoch": 46.28185907046477, "grad_norm": 0.2812073826789856, "learning_rate": 8.279310587166661e-05, "loss": 0.0131, "step": 30870 }, { "epoch": 46.296851574212894, "grad_norm": 0.21113452315330505, "learning_rate": 8.278062235412978e-05, "loss": 0.0176, "step": 30880 }, { "epoch": 46.31184407796102, "grad_norm": 0.22021044790744781, "learning_rate": 8.276813525175339e-05, "loss": 0.012, "step": 30890 }, { "epoch": 46.326836581709145, "grad_norm": 0.24739529192447662, "learning_rate": 8.2755644565903e-05, "loss": 0.0108, "step": 30900 }, { "epoch": 46.34182908545727, "grad_norm": 0.25359129905700684, "learning_rate": 8.274315029794454e-05, "loss": 0.0107, "step": 30910 }, { "epoch": 46.356821589205396, "grad_norm": 0.19349627196788788, "learning_rate": 8.273065244924443e-05, "loss": 0.0144, "step": 30920 }, { "epoch": 46.37181409295352, "grad_norm": 0.1785888969898224, "learning_rate": 8.271815102116936e-05, "loss": 0.0103, "step": 30930 }, { "epoch": 46.386806596701646, "grad_norm": 0.23358891904354095, "learning_rate": 8.270564601508648e-05, "loss": 0.0105, "step": 30940 }, { "epoch": 46.40179910044977, "grad_norm": 0.22547611594200134, "learning_rate": 8.269313743236333e-05, "loss": 0.0148, "step": 30950 }, { "epoch": 46.416791604197904, "grad_norm": 0.23277023434638977, "learning_rate": 8.268062527436783e-05, "loss": 0.0126, "step": 30960 }, { "epoch": 46.43178410794603, "grad_norm": 0.2105124145746231, "learning_rate": 8.266810954246828e-05, "loss": 0.0112, "step": 30970 }, { "epoch": 46.446776611694155, "grad_norm": 0.1789001077413559, "learning_rate": 8.265559023803338e-05, "loss": 0.0132, "step": 30980 }, { "epoch": 46.46176911544228, "grad_norm": 0.15684330463409424, "learning_rate": 8.264306736243223e-05, "loss": 0.0114, "step": 30990 }, { "epoch": 46.476761619190405, "grad_norm": 0.2535557746887207, "learning_rate": 8.263054091703432e-05, "loss": 0.0138, "step": 31000 }, { "epoch": 46.49175412293853, "grad_norm": 0.29422736167907715, "learning_rate": 8.26180109032095e-05, "loss": 0.0141, "step": 31010 }, { "epoch": 46.506746626686656, "grad_norm": 0.24260707199573517, "learning_rate": 8.260547732232807e-05, "loss": 0.0115, "step": 31020 }, { "epoch": 46.52173913043478, "grad_norm": 0.18743233382701874, "learning_rate": 8.259294017576063e-05, "loss": 0.0117, "step": 31030 }, { "epoch": 46.53673163418291, "grad_norm": 0.11185196042060852, "learning_rate": 8.258039946487828e-05, "loss": 0.0122, "step": 31040 }, { "epoch": 46.55172413793103, "grad_norm": 0.2518625855445862, "learning_rate": 8.256785519105241e-05, "loss": 0.0128, "step": 31050 }, { "epoch": 46.56671664167916, "grad_norm": 0.2290107011795044, "learning_rate": 8.255530735565488e-05, "loss": 0.0107, "step": 31060 }, { "epoch": 46.58170914542729, "grad_norm": 0.246283620595932, "learning_rate": 8.254275596005787e-05, "loss": 0.0116, "step": 31070 }, { "epoch": 46.596701649175415, "grad_norm": 0.28462734818458557, "learning_rate": 8.2530201005634e-05, "loss": 0.0139, "step": 31080 }, { "epoch": 46.61169415292354, "grad_norm": 0.19758744537830353, "learning_rate": 8.251764249375626e-05, "loss": 0.014, "step": 31090 }, { "epoch": 46.626686656671666, "grad_norm": 0.18555134534835815, "learning_rate": 8.250508042579803e-05, "loss": 0.0133, "step": 31100 }, { "epoch": 46.64167916041979, "grad_norm": 0.27766722440719604, "learning_rate": 8.249251480313307e-05, "loss": 0.0143, "step": 31110 }, { "epoch": 46.656671664167916, "grad_norm": 0.2711593210697174, "learning_rate": 8.247994562713555e-05, "loss": 0.0157, "step": 31120 }, { "epoch": 46.67166416791604, "grad_norm": 0.223036989569664, "learning_rate": 8.246737289917998e-05, "loss": 0.0125, "step": 31130 }, { "epoch": 46.68665667166417, "grad_norm": 0.347471684217453, "learning_rate": 8.245479662064135e-05, "loss": 0.014, "step": 31140 }, { "epoch": 46.70164917541229, "grad_norm": 0.24344155192375183, "learning_rate": 8.244221679289496e-05, "loss": 0.0147, "step": 31150 }, { "epoch": 46.71664167916042, "grad_norm": 0.24826085567474365, "learning_rate": 8.242963341731652e-05, "loss": 0.0161, "step": 31160 }, { "epoch": 46.73163418290854, "grad_norm": 0.17233508825302124, "learning_rate": 8.24170464952821e-05, "loss": 0.0105, "step": 31170 }, { "epoch": 46.74662668665667, "grad_norm": 0.2216089814901352, "learning_rate": 8.240445602816824e-05, "loss": 0.0138, "step": 31180 }, { "epoch": 46.7616191904048, "grad_norm": 0.1966402679681778, "learning_rate": 8.239186201735179e-05, "loss": 0.0098, "step": 31190 }, { "epoch": 46.776611694152926, "grad_norm": 0.20651741325855255, "learning_rate": 8.237926446420998e-05, "loss": 0.0129, "step": 31200 }, { "epoch": 46.79160419790105, "grad_norm": 0.2304162085056305, "learning_rate": 8.236666337012052e-05, "loss": 0.0121, "step": 31210 }, { "epoch": 46.80659670164918, "grad_norm": 0.2872285544872284, "learning_rate": 8.23540587364614e-05, "loss": 0.0105, "step": 31220 }, { "epoch": 46.8215892053973, "grad_norm": 0.28277260065078735, "learning_rate": 8.234145056461107e-05, "loss": 0.0129, "step": 31230 }, { "epoch": 46.83658170914543, "grad_norm": 0.4036863148212433, "learning_rate": 8.232883885594832e-05, "loss": 0.0123, "step": 31240 }, { "epoch": 46.85157421289355, "grad_norm": 0.2633000612258911, "learning_rate": 8.231622361185236e-05, "loss": 0.0145, "step": 31250 }, { "epoch": 46.86656671664168, "grad_norm": 0.23259200155735016, "learning_rate": 8.230360483370278e-05, "loss": 0.0115, "step": 31260 }, { "epoch": 46.8815592203898, "grad_norm": 0.2856266498565674, "learning_rate": 8.229098252287953e-05, "loss": 0.0127, "step": 31270 }, { "epoch": 46.89655172413793, "grad_norm": 0.17551378905773163, "learning_rate": 8.2278356680763e-05, "loss": 0.0105, "step": 31280 }, { "epoch": 46.911544227886054, "grad_norm": 0.3007446825504303, "learning_rate": 8.22657273087339e-05, "loss": 0.0131, "step": 31290 }, { "epoch": 46.92653673163418, "grad_norm": 0.22602874040603638, "learning_rate": 8.225309440817336e-05, "loss": 0.0128, "step": 31300 }, { "epoch": 46.94152923538231, "grad_norm": 0.29975631833076477, "learning_rate": 8.224045798046293e-05, "loss": 0.0113, "step": 31310 }, { "epoch": 46.95652173913044, "grad_norm": 0.22904673218727112, "learning_rate": 8.22278180269845e-05, "loss": 0.0089, "step": 31320 }, { "epoch": 46.97151424287856, "grad_norm": 0.3107677400112152, "learning_rate": 8.221517454912031e-05, "loss": 0.012, "step": 31330 }, { "epoch": 46.98650674662669, "grad_norm": 0.2622070014476776, "learning_rate": 8.220252754825308e-05, "loss": 0.0119, "step": 31340 }, { "epoch": 47.00149925037481, "grad_norm": 0.27138984203338623, "learning_rate": 8.218987702576586e-05, "loss": 0.0087, "step": 31350 }, { "epoch": 47.01649175412294, "grad_norm": 0.2208234667778015, "learning_rate": 8.217722298304207e-05, "loss": 0.0134, "step": 31360 }, { "epoch": 47.031484257871064, "grad_norm": 0.2001781016588211, "learning_rate": 8.216456542146557e-05, "loss": 0.013, "step": 31370 }, { "epoch": 47.04647676161919, "grad_norm": 0.1797333061695099, "learning_rate": 8.215190434242055e-05, "loss": 0.0122, "step": 31380 }, { "epoch": 47.061469265367315, "grad_norm": 0.20235030353069305, "learning_rate": 8.213923974729161e-05, "loss": 0.0116, "step": 31390 }, { "epoch": 47.07646176911544, "grad_norm": 0.1757947951555252, "learning_rate": 8.212657163746373e-05, "loss": 0.0113, "step": 31400 }, { "epoch": 47.091454272863565, "grad_norm": 0.24436460435390472, "learning_rate": 8.211390001432227e-05, "loss": 0.0169, "step": 31410 }, { "epoch": 47.1064467766117, "grad_norm": 0.23157605528831482, "learning_rate": 8.210122487925297e-05, "loss": 0.0084, "step": 31420 }, { "epoch": 47.12143928035982, "grad_norm": 0.16502642631530762, "learning_rate": 8.208854623364202e-05, "loss": 0.0107, "step": 31430 }, { "epoch": 47.13643178410795, "grad_norm": 0.3122841417789459, "learning_rate": 8.207586407887589e-05, "loss": 0.0109, "step": 31440 }, { "epoch": 47.151424287856074, "grad_norm": 0.28742873668670654, "learning_rate": 8.206317841634148e-05, "loss": 0.0153, "step": 31450 }, { "epoch": 47.1664167916042, "grad_norm": 0.17512045800685883, "learning_rate": 8.205048924742609e-05, "loss": 0.01, "step": 31460 }, { "epoch": 47.181409295352324, "grad_norm": 0.1846773624420166, "learning_rate": 8.203779657351738e-05, "loss": 0.0114, "step": 31470 }, { "epoch": 47.19640179910045, "grad_norm": 0.20446325838565826, "learning_rate": 8.20251003960034e-05, "loss": 0.0131, "step": 31480 }, { "epoch": 47.211394302848575, "grad_norm": 0.17251639068126678, "learning_rate": 8.201240071627258e-05, "loss": 0.0112, "step": 31490 }, { "epoch": 47.2263868065967, "grad_norm": 0.16284330189228058, "learning_rate": 8.199969753571377e-05, "loss": 0.0123, "step": 31500 }, { "epoch": 47.241379310344826, "grad_norm": 0.22886064648628235, "learning_rate": 8.198699085571615e-05, "loss": 0.013, "step": 31510 }, { "epoch": 47.25637181409295, "grad_norm": 0.19547995924949646, "learning_rate": 8.197428067766928e-05, "loss": 0.0091, "step": 31520 }, { "epoch": 47.271364317841076, "grad_norm": 0.2266787737607956, "learning_rate": 8.196156700296316e-05, "loss": 0.0104, "step": 31530 }, { "epoch": 47.28635682158921, "grad_norm": 0.23246721923351288, "learning_rate": 8.194884983298814e-05, "loss": 0.0135, "step": 31540 }, { "epoch": 47.301349325337334, "grad_norm": 0.1885473132133484, "learning_rate": 8.193612916913491e-05, "loss": 0.0093, "step": 31550 }, { "epoch": 47.31634182908546, "grad_norm": 0.2678382992744446, "learning_rate": 8.192340501279463e-05, "loss": 0.0131, "step": 31560 }, { "epoch": 47.331334332833585, "grad_norm": 0.2993246018886566, "learning_rate": 8.191067736535876e-05, "loss": 0.0128, "step": 31570 }, { "epoch": 47.34632683658171, "grad_norm": 0.25315845012664795, "learning_rate": 8.18979462282192e-05, "loss": 0.0142, "step": 31580 }, { "epoch": 47.361319340329835, "grad_norm": 0.17758126556873322, "learning_rate": 8.188521160276819e-05, "loss": 0.0104, "step": 31590 }, { "epoch": 47.37631184407796, "grad_norm": 0.4381685256958008, "learning_rate": 8.187247349039837e-05, "loss": 0.0151, "step": 31600 }, { "epoch": 47.391304347826086, "grad_norm": 0.25079992413520813, "learning_rate": 8.185973189250278e-05, "loss": 0.0154, "step": 31610 }, { "epoch": 47.40629685157421, "grad_norm": 0.17963844537734985, "learning_rate": 8.184698681047482e-05, "loss": 0.0146, "step": 31620 }, { "epoch": 47.42128935532234, "grad_norm": 0.3479737937450409, "learning_rate": 8.183423824570827e-05, "loss": 0.0121, "step": 31630 }, { "epoch": 47.43628185907046, "grad_norm": 0.2728017270565033, "learning_rate": 8.182148619959725e-05, "loss": 0.0145, "step": 31640 }, { "epoch": 47.45127436281859, "grad_norm": 0.3757742941379547, "learning_rate": 8.180873067353636e-05, "loss": 0.0146, "step": 31650 }, { "epoch": 47.46626686656672, "grad_norm": 0.21401090919971466, "learning_rate": 8.179597166892052e-05, "loss": 0.0114, "step": 31660 }, { "epoch": 47.481259370314845, "grad_norm": 0.2997826337814331, "learning_rate": 8.178320918714501e-05, "loss": 0.0149, "step": 31670 }, { "epoch": 47.49625187406297, "grad_norm": 0.1978636384010315, "learning_rate": 8.177044322960554e-05, "loss": 0.0121, "step": 31680 }, { "epoch": 47.511244377811096, "grad_norm": 0.2649438679218292, "learning_rate": 8.175767379769816e-05, "loss": 0.0132, "step": 31690 }, { "epoch": 47.52623688155922, "grad_norm": 0.20075386762619019, "learning_rate": 8.174490089281932e-05, "loss": 0.013, "step": 31700 }, { "epoch": 47.541229385307346, "grad_norm": 0.207570418715477, "learning_rate": 8.173212451636584e-05, "loss": 0.0121, "step": 31710 }, { "epoch": 47.55622188905547, "grad_norm": 0.2926475405693054, "learning_rate": 8.171934466973493e-05, "loss": 0.0163, "step": 31720 }, { "epoch": 47.5712143928036, "grad_norm": 0.26766136288642883, "learning_rate": 8.170656135432418e-05, "loss": 0.0159, "step": 31730 }, { "epoch": 47.58620689655172, "grad_norm": 0.2854841351509094, "learning_rate": 8.169377457153155e-05, "loss": 0.0103, "step": 31740 }, { "epoch": 47.60119940029985, "grad_norm": 0.18434126675128937, "learning_rate": 8.168098432275539e-05, "loss": 0.0131, "step": 31750 }, { "epoch": 47.61619190404797, "grad_norm": 0.22601841390132904, "learning_rate": 8.166819060939442e-05, "loss": 0.0104, "step": 31760 }, { "epoch": 47.6311844077961, "grad_norm": 0.2025926560163498, "learning_rate": 8.165539343284772e-05, "loss": 0.0105, "step": 31770 }, { "epoch": 47.64617691154423, "grad_norm": 0.196281298995018, "learning_rate": 8.16425927945148e-05, "loss": 0.0117, "step": 31780 }, { "epoch": 47.661169415292356, "grad_norm": 0.24754339456558228, "learning_rate": 8.162978869579551e-05, "loss": 0.0117, "step": 31790 }, { "epoch": 47.67616191904048, "grad_norm": 0.15654262900352478, "learning_rate": 8.161698113809007e-05, "loss": 0.0141, "step": 31800 }, { "epoch": 47.69115442278861, "grad_norm": 0.2552112340927124, "learning_rate": 8.160417012279911e-05, "loss": 0.0093, "step": 31810 }, { "epoch": 47.70614692653673, "grad_norm": 0.18478186428546906, "learning_rate": 8.159135565132363e-05, "loss": 0.0092, "step": 31820 }, { "epoch": 47.72113943028486, "grad_norm": 0.40306684374809265, "learning_rate": 8.157853772506498e-05, "loss": 0.0137, "step": 31830 }, { "epoch": 47.73613193403298, "grad_norm": 0.254842609167099, "learning_rate": 8.156571634542494e-05, "loss": 0.012, "step": 31840 }, { "epoch": 47.75112443778111, "grad_norm": 0.2158108800649643, "learning_rate": 8.15528915138056e-05, "loss": 0.0141, "step": 31850 }, { "epoch": 47.76611694152923, "grad_norm": 0.4071996808052063, "learning_rate": 8.154006323160949e-05, "loss": 0.0116, "step": 31860 }, { "epoch": 47.78110944527736, "grad_norm": 0.2039041668176651, "learning_rate": 8.152723150023949e-05, "loss": 0.0165, "step": 31870 }, { "epoch": 47.796101949025484, "grad_norm": 0.21048599481582642, "learning_rate": 8.151439632109886e-05, "loss": 0.0118, "step": 31880 }, { "epoch": 47.81109445277362, "grad_norm": 0.17194874584674835, "learning_rate": 8.150155769559122e-05, "loss": 0.0127, "step": 31890 }, { "epoch": 47.82608695652174, "grad_norm": 0.23765431344509125, "learning_rate": 8.148871562512058e-05, "loss": 0.0123, "step": 31900 }, { "epoch": 47.84107946026987, "grad_norm": 0.14385102689266205, "learning_rate": 8.147587011109136e-05, "loss": 0.0111, "step": 31910 }, { "epoch": 47.85607196401799, "grad_norm": 0.2790621817111969, "learning_rate": 8.14630211549083e-05, "loss": 0.0108, "step": 31920 }, { "epoch": 47.87106446776612, "grad_norm": 0.17791716754436493, "learning_rate": 8.145016875797655e-05, "loss": 0.0101, "step": 31930 }, { "epoch": 47.88605697151424, "grad_norm": 0.27205657958984375, "learning_rate": 8.143731292170164e-05, "loss": 0.0118, "step": 31940 }, { "epoch": 47.90104947526237, "grad_norm": 0.26846256852149963, "learning_rate": 8.142445364748944e-05, "loss": 0.0111, "step": 31950 }, { "epoch": 47.916041979010494, "grad_norm": 0.21059434115886688, "learning_rate": 8.141159093674624e-05, "loss": 0.0129, "step": 31960 }, { "epoch": 47.93103448275862, "grad_norm": 0.27642887830734253, "learning_rate": 8.139872479087869e-05, "loss": 0.011, "step": 31970 }, { "epoch": 47.946026986506745, "grad_norm": 0.23355525732040405, "learning_rate": 8.13858552112938e-05, "loss": 0.0133, "step": 31980 }, { "epoch": 47.96101949025487, "grad_norm": 0.165940061211586, "learning_rate": 8.137298219939895e-05, "loss": 0.0121, "step": 31990 }, { "epoch": 47.976011994002995, "grad_norm": 0.2364901602268219, "learning_rate": 8.136010575660196e-05, "loss": 0.0098, "step": 32000 }, { "epoch": 47.99100449775113, "grad_norm": 0.15817007422447205, "learning_rate": 8.134722588431095e-05, "loss": 0.0131, "step": 32010 }, { "epoch": 48.00599700149925, "grad_norm": 0.13518023490905762, "learning_rate": 8.133434258393444e-05, "loss": 0.0103, "step": 32020 }, { "epoch": 48.02098950524738, "grad_norm": 0.17537221312522888, "learning_rate": 8.132145585688134e-05, "loss": 0.0114, "step": 32030 }, { "epoch": 48.035982008995504, "grad_norm": 0.15691044926643372, "learning_rate": 8.130856570456093e-05, "loss": 0.0112, "step": 32040 }, { "epoch": 48.05097451274363, "grad_norm": 0.3508695363998413, "learning_rate": 8.129567212838283e-05, "loss": 0.0102, "step": 32050 }, { "epoch": 48.065967016491754, "grad_norm": 0.15227779746055603, "learning_rate": 8.128277512975708e-05, "loss": 0.0123, "step": 32060 }, { "epoch": 48.08095952023988, "grad_norm": 0.15825580060482025, "learning_rate": 8.126987471009408e-05, "loss": 0.0088, "step": 32070 }, { "epoch": 48.095952023988005, "grad_norm": 0.23945918679237366, "learning_rate": 8.125697087080459e-05, "loss": 0.0102, "step": 32080 }, { "epoch": 48.11094452773613, "grad_norm": 0.1831277310848236, "learning_rate": 8.124406361329976e-05, "loss": 0.0113, "step": 32090 }, { "epoch": 48.125937031484256, "grad_norm": 0.24627457559108734, "learning_rate": 8.123115293899111e-05, "loss": 0.0137, "step": 32100 }, { "epoch": 48.14092953523238, "grad_norm": 0.3146553337574005, "learning_rate": 8.121823884929055e-05, "loss": 0.014, "step": 32110 }, { "epoch": 48.155922038980506, "grad_norm": 0.2119915932416916, "learning_rate": 8.12053213456103e-05, "loss": 0.0125, "step": 32120 }, { "epoch": 48.17091454272864, "grad_norm": 0.43052566051483154, "learning_rate": 8.119240042936303e-05, "loss": 0.0141, "step": 32130 }, { "epoch": 48.185907046476764, "grad_norm": 0.2806972563266754, "learning_rate": 8.117947610196175e-05, "loss": 0.0143, "step": 32140 }, { "epoch": 48.20089955022489, "grad_norm": 0.16534654796123505, "learning_rate": 8.116654836481982e-05, "loss": 0.0096, "step": 32150 }, { "epoch": 48.215892053973015, "grad_norm": 0.17193636298179626, "learning_rate": 8.115361721935106e-05, "loss": 0.0138, "step": 32160 }, { "epoch": 48.23088455772114, "grad_norm": 0.43096446990966797, "learning_rate": 8.114068266696953e-05, "loss": 0.0144, "step": 32170 }, { "epoch": 48.245877061469265, "grad_norm": 0.512781023979187, "learning_rate": 8.112774470908978e-05, "loss": 0.0186, "step": 32180 }, { "epoch": 48.26086956521739, "grad_norm": 0.24552416801452637, "learning_rate": 8.111480334712665e-05, "loss": 0.0133, "step": 32190 }, { "epoch": 48.275862068965516, "grad_norm": 0.22975678741931915, "learning_rate": 8.110185858249542e-05, "loss": 0.0102, "step": 32200 }, { "epoch": 48.29085457271364, "grad_norm": 0.27629023790359497, "learning_rate": 8.108891041661168e-05, "loss": 0.0141, "step": 32210 }, { "epoch": 48.30584707646177, "grad_norm": 0.2449333369731903, "learning_rate": 8.107595885089146e-05, "loss": 0.0112, "step": 32220 }, { "epoch": 48.32083958020989, "grad_norm": 0.19008077681064606, "learning_rate": 8.106300388675108e-05, "loss": 0.0109, "step": 32230 }, { "epoch": 48.335832083958024, "grad_norm": 0.45623689889907837, "learning_rate": 8.105004552560731e-05, "loss": 0.0116, "step": 32240 }, { "epoch": 48.35082458770615, "grad_norm": 0.1496497243642807, "learning_rate": 8.103708376887724e-05, "loss": 0.0128, "step": 32250 }, { "epoch": 48.365817091454275, "grad_norm": 0.23281070590019226, "learning_rate": 8.102411861797836e-05, "loss": 0.0084, "step": 32260 }, { "epoch": 48.3808095952024, "grad_norm": 0.18110981583595276, "learning_rate": 8.101115007432851e-05, "loss": 0.0107, "step": 32270 }, { "epoch": 48.395802098950526, "grad_norm": 0.23858624696731567, "learning_rate": 8.09981781393459e-05, "loss": 0.0108, "step": 32280 }, { "epoch": 48.41079460269865, "grad_norm": 0.17385905981063843, "learning_rate": 8.098520281444915e-05, "loss": 0.0115, "step": 32290 }, { "epoch": 48.425787106446776, "grad_norm": 0.15889205038547516, "learning_rate": 8.09722241010572e-05, "loss": 0.0094, "step": 32300 }, { "epoch": 48.4407796101949, "grad_norm": 0.23356369137763977, "learning_rate": 8.095924200058939e-05, "loss": 0.0117, "step": 32310 }, { "epoch": 48.45577211394303, "grad_norm": 0.28240805864334106, "learning_rate": 8.094625651446541e-05, "loss": 0.0118, "step": 32320 }, { "epoch": 48.47076461769115, "grad_norm": 0.3430540859699249, "learning_rate": 8.093326764410536e-05, "loss": 0.0104, "step": 32330 }, { "epoch": 48.48575712143928, "grad_norm": 0.3294772803783417, "learning_rate": 8.092027539092966e-05, "loss": 0.0118, "step": 32340 }, { "epoch": 48.5007496251874, "grad_norm": 0.15642839670181274, "learning_rate": 8.090727975635913e-05, "loss": 0.0124, "step": 32350 }, { "epoch": 48.515742128935536, "grad_norm": 0.22590085864067078, "learning_rate": 8.089428074181497e-05, "loss": 0.0102, "step": 32360 }, { "epoch": 48.53073463268366, "grad_norm": 0.18032456934452057, "learning_rate": 8.088127834871871e-05, "loss": 0.0139, "step": 32370 }, { "epoch": 48.545727136431786, "grad_norm": 0.2488425076007843, "learning_rate": 8.086827257849226e-05, "loss": 0.013, "step": 32380 }, { "epoch": 48.56071964017991, "grad_norm": 0.17983579635620117, "learning_rate": 8.085526343255795e-05, "loss": 0.015, "step": 32390 }, { "epoch": 48.57571214392804, "grad_norm": 0.19091647863388062, "learning_rate": 8.084225091233842e-05, "loss": 0.0133, "step": 32400 }, { "epoch": 48.59070464767616, "grad_norm": 0.1747322827577591, "learning_rate": 8.082923501925668e-05, "loss": 0.0121, "step": 32410 }, { "epoch": 48.60569715142429, "grad_norm": 0.2392604649066925, "learning_rate": 8.081621575473617e-05, "loss": 0.013, "step": 32420 }, { "epoch": 48.62068965517241, "grad_norm": 0.2394065409898758, "learning_rate": 8.080319312020064e-05, "loss": 0.0131, "step": 32430 }, { "epoch": 48.63568215892054, "grad_norm": 0.2699820399284363, "learning_rate": 8.079016711707421e-05, "loss": 0.011, "step": 32440 }, { "epoch": 48.65067466266866, "grad_norm": 0.16570815443992615, "learning_rate": 8.077713774678139e-05, "loss": 0.0125, "step": 32450 }, { "epoch": 48.66566716641679, "grad_norm": 0.1716301590204239, "learning_rate": 8.076410501074707e-05, "loss": 0.0126, "step": 32460 }, { "epoch": 48.680659670164914, "grad_norm": 0.23592166602611542, "learning_rate": 8.075106891039647e-05, "loss": 0.0088, "step": 32470 }, { "epoch": 48.69565217391305, "grad_norm": 0.1677088886499405, "learning_rate": 8.073802944715523e-05, "loss": 0.0084, "step": 32480 }, { "epoch": 48.71064467766117, "grad_norm": 0.3169427812099457, "learning_rate": 8.072498662244929e-05, "loss": 0.01, "step": 32490 }, { "epoch": 48.7256371814093, "grad_norm": 0.14333249628543854, "learning_rate": 8.0711940437705e-05, "loss": 0.0166, "step": 32500 }, { "epoch": 48.74062968515742, "grad_norm": 0.25171542167663574, "learning_rate": 8.06988908943491e-05, "loss": 0.0112, "step": 32510 }, { "epoch": 48.75562218890555, "grad_norm": 0.3178611397743225, "learning_rate": 8.068583799380863e-05, "loss": 0.0124, "step": 32520 }, { "epoch": 48.77061469265367, "grad_norm": 0.2620420455932617, "learning_rate": 8.067278173751104e-05, "loss": 0.0129, "step": 32530 }, { "epoch": 48.7856071964018, "grad_norm": 0.2899312674999237, "learning_rate": 8.065972212688417e-05, "loss": 0.0126, "step": 32540 }, { "epoch": 48.800599700149924, "grad_norm": 0.31549134850502014, "learning_rate": 8.064665916335618e-05, "loss": 0.0139, "step": 32550 }, { "epoch": 48.81559220389805, "grad_norm": 0.3040305972099304, "learning_rate": 8.063359284835564e-05, "loss": 0.0142, "step": 32560 }, { "epoch": 48.830584707646175, "grad_norm": 0.3089202046394348, "learning_rate": 8.062052318331142e-05, "loss": 0.01, "step": 32570 }, { "epoch": 48.8455772113943, "grad_norm": 0.33204978704452515, "learning_rate": 8.060745016965283e-05, "loss": 0.0196, "step": 32580 }, { "epoch": 48.86056971514243, "grad_norm": 0.29838132858276367, "learning_rate": 8.059437380880952e-05, "loss": 0.0147, "step": 32590 }, { "epoch": 48.87556221889056, "grad_norm": 0.30079400539398193, "learning_rate": 8.058129410221146e-05, "loss": 0.0158, "step": 32600 }, { "epoch": 48.89055472263868, "grad_norm": 0.2615508437156677, "learning_rate": 8.056821105128908e-05, "loss": 0.0112, "step": 32610 }, { "epoch": 48.90554722638681, "grad_norm": 0.1961820125579834, "learning_rate": 8.05551246574731e-05, "loss": 0.0102, "step": 32620 }, { "epoch": 48.920539730134934, "grad_norm": 0.2520214021205902, "learning_rate": 8.05420349221946e-05, "loss": 0.0123, "step": 32630 }, { "epoch": 48.93553223388306, "grad_norm": 0.21015316247940063, "learning_rate": 8.05289418468851e-05, "loss": 0.0103, "step": 32640 }, { "epoch": 48.950524737631184, "grad_norm": 0.23302313685417175, "learning_rate": 8.051584543297642e-05, "loss": 0.0147, "step": 32650 }, { "epoch": 48.96551724137931, "grad_norm": 0.25772005319595337, "learning_rate": 8.050274568190074e-05, "loss": 0.015, "step": 32660 }, { "epoch": 48.980509745127435, "grad_norm": 0.3666170537471771, "learning_rate": 8.048964259509067e-05, "loss": 0.0115, "step": 32670 }, { "epoch": 48.99550224887556, "grad_norm": 0.24644765257835388, "learning_rate": 8.047653617397914e-05, "loss": 0.0124, "step": 32680 }, { "epoch": 49.010494752623686, "grad_norm": 0.20666475594043732, "learning_rate": 8.046342641999941e-05, "loss": 0.0095, "step": 32690 }, { "epoch": 49.02548725637181, "grad_norm": 0.22381189465522766, "learning_rate": 8.045031333458517e-05, "loss": 0.0124, "step": 32700 }, { "epoch": 49.04047976011994, "grad_norm": 0.1628364771604538, "learning_rate": 8.043719691917047e-05, "loss": 0.0122, "step": 32710 }, { "epoch": 49.05547226386807, "grad_norm": 0.2193315178155899, "learning_rate": 8.042407717518966e-05, "loss": 0.0124, "step": 32720 }, { "epoch": 49.070464767616194, "grad_norm": 0.20783616602420807, "learning_rate": 8.041095410407751e-05, "loss": 0.0125, "step": 32730 }, { "epoch": 49.08545727136432, "grad_norm": 0.2705659866333008, "learning_rate": 8.039782770726913e-05, "loss": 0.0102, "step": 32740 }, { "epoch": 49.100449775112445, "grad_norm": 0.21647395193576813, "learning_rate": 8.038469798620004e-05, "loss": 0.0109, "step": 32750 }, { "epoch": 49.11544227886057, "grad_norm": 0.22425779700279236, "learning_rate": 8.037156494230604e-05, "loss": 0.0102, "step": 32760 }, { "epoch": 49.130434782608695, "grad_norm": 0.17207182943820953, "learning_rate": 8.035842857702338e-05, "loss": 0.0108, "step": 32770 }, { "epoch": 49.14542728635682, "grad_norm": 0.21973921358585358, "learning_rate": 8.03452888917886e-05, "loss": 0.0142, "step": 32780 }, { "epoch": 49.160419790104946, "grad_norm": 0.21434065699577332, "learning_rate": 8.033214588803866e-05, "loss": 0.015, "step": 32790 }, { "epoch": 49.17541229385307, "grad_norm": 0.19044244289398193, "learning_rate": 8.031899956721083e-05, "loss": 0.01, "step": 32800 }, { "epoch": 49.1904047976012, "grad_norm": 0.25105613470077515, "learning_rate": 8.030584993074282e-05, "loss": 0.0142, "step": 32810 }, { "epoch": 49.20539730134932, "grad_norm": 0.1675923615694046, "learning_rate": 8.02926969800726e-05, "loss": 0.0105, "step": 32820 }, { "epoch": 49.220389805097454, "grad_norm": 0.2478402554988861, "learning_rate": 8.027954071663859e-05, "loss": 0.0111, "step": 32830 }, { "epoch": 49.23538230884558, "grad_norm": 0.23007981479167938, "learning_rate": 8.026638114187954e-05, "loss": 0.0113, "step": 32840 }, { "epoch": 49.250374812593705, "grad_norm": 0.1472739726305008, "learning_rate": 8.025321825723456e-05, "loss": 0.0132, "step": 32850 }, { "epoch": 49.26536731634183, "grad_norm": 0.1890087127685547, "learning_rate": 8.02400520641431e-05, "loss": 0.0124, "step": 32860 }, { "epoch": 49.280359820089956, "grad_norm": 0.1994089037179947, "learning_rate": 8.022688256404501e-05, "loss": 0.0114, "step": 32870 }, { "epoch": 49.29535232383808, "grad_norm": 0.3291913568973541, "learning_rate": 8.02137097583805e-05, "loss": 0.0115, "step": 32880 }, { "epoch": 49.310344827586206, "grad_norm": 0.1969902217388153, "learning_rate": 8.02005336485901e-05, "loss": 0.0105, "step": 32890 }, { "epoch": 49.32533733133433, "grad_norm": 0.16720788180828094, "learning_rate": 8.018735423611476e-05, "loss": 0.0122, "step": 32900 }, { "epoch": 49.34032983508246, "grad_norm": 0.23656918108463287, "learning_rate": 8.017417152239574e-05, "loss": 0.0108, "step": 32910 }, { "epoch": 49.35532233883058, "grad_norm": 0.20574603974819183, "learning_rate": 8.01609855088747e-05, "loss": 0.01, "step": 32920 }, { "epoch": 49.37031484257871, "grad_norm": 0.1429399847984314, "learning_rate": 8.014779619699362e-05, "loss": 0.0072, "step": 32930 }, { "epoch": 49.38530734632684, "grad_norm": 0.1871434599161148, "learning_rate": 8.013460358819489e-05, "loss": 0.0086, "step": 32940 }, { "epoch": 49.400299850074965, "grad_norm": 0.18258357048034668, "learning_rate": 8.01214076839212e-05, "loss": 0.0129, "step": 32950 }, { "epoch": 49.41529235382309, "grad_norm": 0.23554566502571106, "learning_rate": 8.010820848561565e-05, "loss": 0.0151, "step": 32960 }, { "epoch": 49.430284857571216, "grad_norm": 0.3630795180797577, "learning_rate": 8.009500599472171e-05, "loss": 0.0142, "step": 32970 }, { "epoch": 49.44527736131934, "grad_norm": 0.13623906672000885, "learning_rate": 8.008180021268314e-05, "loss": 0.0089, "step": 32980 }, { "epoch": 49.46026986506747, "grad_norm": 0.2132451981306076, "learning_rate": 8.006859114094414e-05, "loss": 0.0113, "step": 32990 }, { "epoch": 49.47526236881559, "grad_norm": 0.17810550332069397, "learning_rate": 8.005537878094921e-05, "loss": 0.0081, "step": 33000 }, { "epoch": 49.49025487256372, "grad_norm": 0.2106214314699173, "learning_rate": 8.004216313414323e-05, "loss": 0.0117, "step": 33010 }, { "epoch": 49.50524737631184, "grad_norm": 0.2695084810256958, "learning_rate": 8.002894420197149e-05, "loss": 0.0104, "step": 33020 }, { "epoch": 49.52023988005997, "grad_norm": 0.15103058516979218, "learning_rate": 8.001572198587954e-05, "loss": 0.009, "step": 33030 }, { "epoch": 49.53523238380809, "grad_norm": 0.21773679554462433, "learning_rate": 8.000249648731338e-05, "loss": 0.0097, "step": 33040 }, { "epoch": 49.55022488755622, "grad_norm": 0.31011131405830383, "learning_rate": 7.998926770771928e-05, "loss": 0.0094, "step": 33050 }, { "epoch": 49.56521739130435, "grad_norm": 0.18395879864692688, "learning_rate": 7.997603564854397e-05, "loss": 0.0116, "step": 33060 }, { "epoch": 49.58020989505248, "grad_norm": 0.17859604954719543, "learning_rate": 7.996280031123448e-05, "loss": 0.0112, "step": 33070 }, { "epoch": 49.5952023988006, "grad_norm": 0.4535830318927765, "learning_rate": 7.994956169723818e-05, "loss": 0.0111, "step": 33080 }, { "epoch": 49.61019490254873, "grad_norm": 0.16116811335086823, "learning_rate": 7.993631980800285e-05, "loss": 0.0092, "step": 33090 }, { "epoch": 49.62518740629685, "grad_norm": 0.17024269700050354, "learning_rate": 7.992307464497659e-05, "loss": 0.0106, "step": 33100 }, { "epoch": 49.64017991004498, "grad_norm": 0.20797482132911682, "learning_rate": 7.990982620960787e-05, "loss": 0.0105, "step": 33110 }, { "epoch": 49.6551724137931, "grad_norm": 0.2616359293460846, "learning_rate": 7.989657450334554e-05, "loss": 0.0091, "step": 33120 }, { "epoch": 49.67016491754123, "grad_norm": 0.30297040939331055, "learning_rate": 7.988331952763877e-05, "loss": 0.0113, "step": 33130 }, { "epoch": 49.685157421289354, "grad_norm": 0.23027317225933075, "learning_rate": 7.987006128393709e-05, "loss": 0.0105, "step": 33140 }, { "epoch": 49.70014992503748, "grad_norm": 0.44129693508148193, "learning_rate": 7.985679977369043e-05, "loss": 0.0147, "step": 33150 }, { "epoch": 49.715142428785605, "grad_norm": 0.2948586046695709, "learning_rate": 7.984353499834902e-05, "loss": 0.0112, "step": 33160 }, { "epoch": 49.73013493253373, "grad_norm": 0.2750416100025177, "learning_rate": 7.983026695936351e-05, "loss": 0.0104, "step": 33170 }, { "epoch": 49.74512743628186, "grad_norm": 0.30734920501708984, "learning_rate": 7.981699565818486e-05, "loss": 0.0114, "step": 33180 }, { "epoch": 49.76011994002999, "grad_norm": 0.17345841228961945, "learning_rate": 7.980372109626437e-05, "loss": 0.0115, "step": 33190 }, { "epoch": 49.77511244377811, "grad_norm": 0.18068218231201172, "learning_rate": 7.979044327505375e-05, "loss": 0.0171, "step": 33200 }, { "epoch": 49.79010494752624, "grad_norm": 0.23840264976024628, "learning_rate": 7.977716219600506e-05, "loss": 0.0124, "step": 33210 }, { "epoch": 49.805097451274364, "grad_norm": 0.2548663020133972, "learning_rate": 7.97638778605707e-05, "loss": 0.0102, "step": 33220 }, { "epoch": 49.82008995502249, "grad_norm": 0.2985728681087494, "learning_rate": 7.975059027020338e-05, "loss": 0.01, "step": 33230 }, { "epoch": 49.835082458770614, "grad_norm": 0.17675665020942688, "learning_rate": 7.973729942635623e-05, "loss": 0.0117, "step": 33240 }, { "epoch": 49.85007496251874, "grad_norm": 0.21894222497940063, "learning_rate": 7.972400533048273e-05, "loss": 0.011, "step": 33250 }, { "epoch": 49.865067466266865, "grad_norm": 0.18037904798984528, "learning_rate": 7.97107079840367e-05, "loss": 0.0107, "step": 33260 }, { "epoch": 49.88005997001499, "grad_norm": 0.14094863831996918, "learning_rate": 7.969740738847231e-05, "loss": 0.0106, "step": 33270 }, { "epoch": 49.895052473763116, "grad_norm": 0.19294142723083496, "learning_rate": 7.968410354524411e-05, "loss": 0.0158, "step": 33280 }, { "epoch": 49.91004497751124, "grad_norm": 0.25984835624694824, "learning_rate": 7.967079645580697e-05, "loss": 0.011, "step": 33290 }, { "epoch": 49.92503748125937, "grad_norm": 0.22795595228672028, "learning_rate": 7.965748612161612e-05, "loss": 0.0104, "step": 33300 }, { "epoch": 49.9400299850075, "grad_norm": 0.2879939079284668, "learning_rate": 7.96441725441272e-05, "loss": 0.0111, "step": 33310 }, { "epoch": 49.955022488755624, "grad_norm": 0.2668819725513458, "learning_rate": 7.963085572479614e-05, "loss": 0.0112, "step": 33320 }, { "epoch": 49.97001499250375, "grad_norm": 0.23071683943271637, "learning_rate": 7.961753566507924e-05, "loss": 0.0129, "step": 33330 }, { "epoch": 49.985007496251875, "grad_norm": 0.23820717632770538, "learning_rate": 7.960421236643316e-05, "loss": 0.0118, "step": 33340 }, { "epoch": 50.0, "grad_norm": 0.1790047585964203, "learning_rate": 7.959088583031496e-05, "loss": 0.0084, "step": 33350 }, { "epoch": 50.014992503748125, "grad_norm": 0.1812543272972107, "learning_rate": 7.957755605818194e-05, "loss": 0.0103, "step": 33360 }, { "epoch": 50.02998500749625, "grad_norm": 0.3436427712440491, "learning_rate": 7.956422305149185e-05, "loss": 0.0124, "step": 33370 }, { "epoch": 50.044977511244376, "grad_norm": 0.2568313479423523, "learning_rate": 7.95508868117028e-05, "loss": 0.0178, "step": 33380 }, { "epoch": 50.0599700149925, "grad_norm": 0.34386906027793884, "learning_rate": 7.953754734027318e-05, "loss": 0.0137, "step": 33390 }, { "epoch": 50.07496251874063, "grad_norm": 0.13139747083187103, "learning_rate": 7.952420463866182e-05, "loss": 0.0121, "step": 33400 }, { "epoch": 50.08995502248876, "grad_norm": 0.3292088508605957, "learning_rate": 7.951085870832782e-05, "loss": 0.0134, "step": 33410 }, { "epoch": 50.104947526236884, "grad_norm": 0.18430198729038239, "learning_rate": 7.949750955073067e-05, "loss": 0.0102, "step": 33420 }, { "epoch": 50.11994002998501, "grad_norm": 0.24285706877708435, "learning_rate": 7.948415716733022e-05, "loss": 0.0109, "step": 33430 }, { "epoch": 50.134932533733135, "grad_norm": 0.2428882122039795, "learning_rate": 7.947080155958669e-05, "loss": 0.011, "step": 33440 }, { "epoch": 50.14992503748126, "grad_norm": 0.22921715676784515, "learning_rate": 7.94574427289606e-05, "loss": 0.0082, "step": 33450 }, { "epoch": 50.164917541229386, "grad_norm": 0.31200042366981506, "learning_rate": 7.944408067691284e-05, "loss": 0.0113, "step": 33460 }, { "epoch": 50.17991004497751, "grad_norm": 0.21266013383865356, "learning_rate": 7.943071540490473e-05, "loss": 0.0117, "step": 33470 }, { "epoch": 50.194902548725636, "grad_norm": 0.4549363851547241, "learning_rate": 7.94173469143978e-05, "loss": 0.01, "step": 33480 }, { "epoch": 50.20989505247376, "grad_norm": 0.20471011102199554, "learning_rate": 7.940397520685406e-05, "loss": 0.0112, "step": 33490 }, { "epoch": 50.22488755622189, "grad_norm": 0.1743817925453186, "learning_rate": 7.939060028373577e-05, "loss": 0.0119, "step": 33500 }, { "epoch": 50.23988005997001, "grad_norm": 0.2936621308326721, "learning_rate": 7.937722214650565e-05, "loss": 0.0116, "step": 33510 }, { "epoch": 50.25487256371814, "grad_norm": 0.21484427154064178, "learning_rate": 7.936384079662666e-05, "loss": 0.0114, "step": 33520 }, { "epoch": 50.26986506746627, "grad_norm": 0.20679353177547455, "learning_rate": 7.93504562355622e-05, "loss": 0.0104, "step": 33530 }, { "epoch": 50.284857571214395, "grad_norm": 0.2843646705150604, "learning_rate": 7.933706846477599e-05, "loss": 0.0081, "step": 33540 }, { "epoch": 50.29985007496252, "grad_norm": 0.1782398521900177, "learning_rate": 7.932367748573206e-05, "loss": 0.0115, "step": 33550 }, { "epoch": 50.314842578710646, "grad_norm": 0.32405561208724976, "learning_rate": 7.931028329989485e-05, "loss": 0.0107, "step": 33560 }, { "epoch": 50.32983508245877, "grad_norm": 0.23668357729911804, "learning_rate": 7.929688590872913e-05, "loss": 0.0096, "step": 33570 }, { "epoch": 50.3448275862069, "grad_norm": 0.1812407523393631, "learning_rate": 7.928348531370003e-05, "loss": 0.0109, "step": 33580 }, { "epoch": 50.35982008995502, "grad_norm": 0.3000277876853943, "learning_rate": 7.927008151627297e-05, "loss": 0.0123, "step": 33590 }, { "epoch": 50.37481259370315, "grad_norm": 0.21460695564746857, "learning_rate": 7.925667451791383e-05, "loss": 0.016, "step": 33600 }, { "epoch": 50.38980509745127, "grad_norm": 0.23909738659858704, "learning_rate": 7.924326432008874e-05, "loss": 0.0096, "step": 33610 }, { "epoch": 50.4047976011994, "grad_norm": 0.1859009563922882, "learning_rate": 7.922985092426422e-05, "loss": 0.0107, "step": 33620 }, { "epoch": 50.41979010494752, "grad_norm": 0.3641067445278168, "learning_rate": 7.921643433190717e-05, "loss": 0.0165, "step": 33630 }, { "epoch": 50.43478260869565, "grad_norm": 0.2204233556985855, "learning_rate": 7.920301454448478e-05, "loss": 0.0119, "step": 33640 }, { "epoch": 50.44977511244378, "grad_norm": 0.22058244049549103, "learning_rate": 7.918959156346461e-05, "loss": 0.0117, "step": 33650 }, { "epoch": 50.46476761619191, "grad_norm": 0.20117105543613434, "learning_rate": 7.91761653903146e-05, "loss": 0.0125, "step": 33660 }, { "epoch": 50.47976011994003, "grad_norm": 0.2821025252342224, "learning_rate": 7.916273602650302e-05, "loss": 0.0124, "step": 33670 }, { "epoch": 50.49475262368816, "grad_norm": 0.2953006327152252, "learning_rate": 7.914930347349847e-05, "loss": 0.0106, "step": 33680 }, { "epoch": 50.50974512743628, "grad_norm": 0.21762673556804657, "learning_rate": 7.913586773276992e-05, "loss": 0.0091, "step": 33690 }, { "epoch": 50.52473763118441, "grad_norm": 0.2878868281841278, "learning_rate": 7.912242880578667e-05, "loss": 0.0128, "step": 33700 }, { "epoch": 50.53973013493253, "grad_norm": 0.150319442152977, "learning_rate": 7.910898669401839e-05, "loss": 0.0105, "step": 33710 }, { "epoch": 50.55472263868066, "grad_norm": 0.5884001851081848, "learning_rate": 7.909554139893511e-05, "loss": 0.0093, "step": 33720 }, { "epoch": 50.569715142428784, "grad_norm": 0.18770499527454376, "learning_rate": 7.908209292200715e-05, "loss": 0.0097, "step": 33730 }, { "epoch": 50.58470764617691, "grad_norm": 0.23841242492198944, "learning_rate": 7.906864126470523e-05, "loss": 0.0103, "step": 33740 }, { "epoch": 50.599700149925035, "grad_norm": 0.2192791998386383, "learning_rate": 7.905518642850041e-05, "loss": 0.0154, "step": 33750 }, { "epoch": 50.61469265367316, "grad_norm": 0.25292542576789856, "learning_rate": 7.904172841486409e-05, "loss": 0.0106, "step": 33760 }, { "epoch": 50.62968515742129, "grad_norm": 0.13252010941505432, "learning_rate": 7.902826722526801e-05, "loss": 0.0108, "step": 33770 }, { "epoch": 50.64467766116942, "grad_norm": 0.15542854368686676, "learning_rate": 7.901480286118427e-05, "loss": 0.0114, "step": 33780 }, { "epoch": 50.65967016491754, "grad_norm": 0.20224882662296295, "learning_rate": 7.900133532408531e-05, "loss": 0.0105, "step": 33790 }, { "epoch": 50.67466266866567, "grad_norm": 0.2126312553882599, "learning_rate": 7.898786461544395e-05, "loss": 0.0122, "step": 33800 }, { "epoch": 50.689655172413794, "grad_norm": 0.34514325857162476, "learning_rate": 7.897439073673325e-05, "loss": 0.0131, "step": 33810 }, { "epoch": 50.70464767616192, "grad_norm": 0.2623671293258667, "learning_rate": 7.896091368942677e-05, "loss": 0.0126, "step": 33820 }, { "epoch": 50.719640179910044, "grad_norm": 0.3531026840209961, "learning_rate": 7.894743347499832e-05, "loss": 0.0099, "step": 33830 }, { "epoch": 50.73463268365817, "grad_norm": 0.28094252943992615, "learning_rate": 7.893395009492203e-05, "loss": 0.0116, "step": 33840 }, { "epoch": 50.749625187406295, "grad_norm": 0.24138368666172028, "learning_rate": 7.892046355067248e-05, "loss": 0.0127, "step": 33850 }, { "epoch": 50.76461769115442, "grad_norm": 0.25110742449760437, "learning_rate": 7.890697384372451e-05, "loss": 0.0115, "step": 33860 }, { "epoch": 50.779610194902546, "grad_norm": 0.26230770349502563, "learning_rate": 7.889348097555336e-05, "loss": 0.0097, "step": 33870 }, { "epoch": 50.79460269865068, "grad_norm": 0.19460515677928925, "learning_rate": 7.887998494763455e-05, "loss": 0.0102, "step": 33880 }, { "epoch": 50.8095952023988, "grad_norm": 0.2495623528957367, "learning_rate": 7.886648576144404e-05, "loss": 0.0105, "step": 33890 }, { "epoch": 50.82458770614693, "grad_norm": 0.14089365303516388, "learning_rate": 7.885298341845802e-05, "loss": 0.0102, "step": 33900 }, { "epoch": 50.839580209895054, "grad_norm": 0.19463978707790375, "learning_rate": 7.883947792015311e-05, "loss": 0.0101, "step": 33910 }, { "epoch": 50.85457271364318, "grad_norm": 0.14340966939926147, "learning_rate": 7.882596926800628e-05, "loss": 0.0124, "step": 33920 }, { "epoch": 50.869565217391305, "grad_norm": 0.18568767607212067, "learning_rate": 7.881245746349477e-05, "loss": 0.0098, "step": 33930 }, { "epoch": 50.88455772113943, "grad_norm": 0.1442442238330841, "learning_rate": 7.879894250809623e-05, "loss": 0.0093, "step": 33940 }, { "epoch": 50.899550224887555, "grad_norm": 0.24830245971679688, "learning_rate": 7.878542440328865e-05, "loss": 0.0114, "step": 33950 }, { "epoch": 50.91454272863568, "grad_norm": 0.20278725028038025, "learning_rate": 7.877190315055031e-05, "loss": 0.0099, "step": 33960 }, { "epoch": 50.929535232383806, "grad_norm": 0.2060861587524414, "learning_rate": 7.875837875135991e-05, "loss": 0.0123, "step": 33970 }, { "epoch": 50.94452773613193, "grad_norm": 0.15965256094932556, "learning_rate": 7.874485120719646e-05, "loss": 0.0104, "step": 33980 }, { "epoch": 50.95952023988006, "grad_norm": 0.2752973437309265, "learning_rate": 7.873132051953928e-05, "loss": 0.0128, "step": 33990 }, { "epoch": 50.97451274362819, "grad_norm": 0.2003718763589859, "learning_rate": 7.87177866898681e-05, "loss": 0.0106, "step": 34000 }, { "epoch": 50.989505247376314, "grad_norm": 0.2072000801563263, "learning_rate": 7.870424971966294e-05, "loss": 0.0127, "step": 34010 }, { "epoch": 51.00449775112444, "grad_norm": 0.2158426195383072, "learning_rate": 7.869070961040419e-05, "loss": 0.0086, "step": 34020 }, { "epoch": 51.019490254872565, "grad_norm": 0.18682396411895752, "learning_rate": 7.867716636357257e-05, "loss": 0.0134, "step": 34030 }, { "epoch": 51.03448275862069, "grad_norm": 0.20385421812534332, "learning_rate": 7.866361998064915e-05, "loss": 0.0098, "step": 34040 }, { "epoch": 51.049475262368816, "grad_norm": 0.4000371992588043, "learning_rate": 7.865007046311534e-05, "loss": 0.0128, "step": 34050 }, { "epoch": 51.06446776611694, "grad_norm": 0.165254145860672, "learning_rate": 7.86365178124529e-05, "loss": 0.0083, "step": 34060 }, { "epoch": 51.079460269865066, "grad_norm": 0.28697681427001953, "learning_rate": 7.862296203014394e-05, "loss": 0.0129, "step": 34070 }, { "epoch": 51.09445277361319, "grad_norm": 0.27798593044281006, "learning_rate": 7.860940311767088e-05, "loss": 0.012, "step": 34080 }, { "epoch": 51.10944527736132, "grad_norm": 0.21262133121490479, "learning_rate": 7.85958410765165e-05, "loss": 0.01, "step": 34090 }, { "epoch": 51.12443778110944, "grad_norm": 0.32856085896492004, "learning_rate": 7.858227590816394e-05, "loss": 0.009, "step": 34100 }, { "epoch": 51.13943028485757, "grad_norm": 0.2533486783504486, "learning_rate": 7.856870761409664e-05, "loss": 0.0095, "step": 34110 }, { "epoch": 51.1544227886057, "grad_norm": 0.23188449442386627, "learning_rate": 7.855513619579846e-05, "loss": 0.0138, "step": 34120 }, { "epoch": 51.169415292353825, "grad_norm": 0.18171687424182892, "learning_rate": 7.85415616547535e-05, "loss": 0.0093, "step": 34130 }, { "epoch": 51.18440779610195, "grad_norm": 0.22166131436824799, "learning_rate": 7.852798399244627e-05, "loss": 0.0094, "step": 34140 }, { "epoch": 51.199400299850076, "grad_norm": 0.15730705857276917, "learning_rate": 7.851440321036161e-05, "loss": 0.0131, "step": 34150 }, { "epoch": 51.2143928035982, "grad_norm": 0.19408948719501495, "learning_rate": 7.850081930998468e-05, "loss": 0.0116, "step": 34160 }, { "epoch": 51.22938530734633, "grad_norm": 0.17481687664985657, "learning_rate": 7.8487232292801e-05, "loss": 0.0098, "step": 34170 }, { "epoch": 51.24437781109445, "grad_norm": 0.27173134684562683, "learning_rate": 7.847364216029642e-05, "loss": 0.0099, "step": 34180 }, { "epoch": 51.25937031484258, "grad_norm": 0.2644079029560089, "learning_rate": 7.846004891395716e-05, "loss": 0.011, "step": 34190 }, { "epoch": 51.2743628185907, "grad_norm": 0.17939770221710205, "learning_rate": 7.844645255526972e-05, "loss": 0.0113, "step": 34200 }, { "epoch": 51.28935532233883, "grad_norm": 0.2677222788333893, "learning_rate": 7.843285308572101e-05, "loss": 0.0127, "step": 34210 }, { "epoch": 51.30434782608695, "grad_norm": 0.199852854013443, "learning_rate": 7.841925050679823e-05, "loss": 0.0132, "step": 34220 }, { "epoch": 51.319340329835086, "grad_norm": 0.20549345016479492, "learning_rate": 7.840564481998895e-05, "loss": 0.0097, "step": 34230 }, { "epoch": 51.33433283358321, "grad_norm": 0.21396088600158691, "learning_rate": 7.839203602678105e-05, "loss": 0.0099, "step": 34240 }, { "epoch": 51.34932533733134, "grad_norm": 0.21427388489246368, "learning_rate": 7.837842412866279e-05, "loss": 0.0102, "step": 34250 }, { "epoch": 51.36431784107946, "grad_norm": 0.18276409804821014, "learning_rate": 7.836480912712272e-05, "loss": 0.0105, "step": 34260 }, { "epoch": 51.37931034482759, "grad_norm": 0.37936562299728394, "learning_rate": 7.835119102364976e-05, "loss": 0.0098, "step": 34270 }, { "epoch": 51.39430284857571, "grad_norm": 0.1989976316690445, "learning_rate": 7.83375698197332e-05, "loss": 0.0133, "step": 34280 }, { "epoch": 51.40929535232384, "grad_norm": 0.3707186281681061, "learning_rate": 7.83239455168626e-05, "loss": 0.0119, "step": 34290 }, { "epoch": 51.42428785607196, "grad_norm": 0.517500638961792, "learning_rate": 7.83103181165279e-05, "loss": 0.0116, "step": 34300 }, { "epoch": 51.43928035982009, "grad_norm": 0.261483371257782, "learning_rate": 7.829668762021937e-05, "loss": 0.0121, "step": 34310 }, { "epoch": 51.454272863568214, "grad_norm": 0.30420422554016113, "learning_rate": 7.828305402942764e-05, "loss": 0.0132, "step": 34320 }, { "epoch": 51.46926536731634, "grad_norm": 0.1597432643175125, "learning_rate": 7.826941734564363e-05, "loss": 0.0133, "step": 34330 }, { "epoch": 51.484257871064464, "grad_norm": 0.2050953209400177, "learning_rate": 7.825577757035865e-05, "loss": 0.0138, "step": 34340 }, { "epoch": 51.4992503748126, "grad_norm": 0.27855345606803894, "learning_rate": 7.824213470506431e-05, "loss": 0.0145, "step": 34350 }, { "epoch": 51.51424287856072, "grad_norm": 0.2867400348186493, "learning_rate": 7.822848875125257e-05, "loss": 0.0137, "step": 34360 }, { "epoch": 51.52923538230885, "grad_norm": 0.39721283316612244, "learning_rate": 7.821483971041576e-05, "loss": 0.0118, "step": 34370 }, { "epoch": 51.54422788605697, "grad_norm": 0.15180346369743347, "learning_rate": 7.820118758404649e-05, "loss": 0.0113, "step": 34380 }, { "epoch": 51.5592203898051, "grad_norm": 0.23939193785190582, "learning_rate": 7.818753237363776e-05, "loss": 0.0126, "step": 34390 }, { "epoch": 51.574212893553224, "grad_norm": 0.2004176676273346, "learning_rate": 7.817387408068286e-05, "loss": 0.0104, "step": 34400 }, { "epoch": 51.58920539730135, "grad_norm": 0.2029499113559723, "learning_rate": 7.816021270667544e-05, "loss": 0.0096, "step": 34410 }, { "epoch": 51.604197901049474, "grad_norm": 0.22925764322280884, "learning_rate": 7.81465482531095e-05, "loss": 0.0133, "step": 34420 }, { "epoch": 51.6191904047976, "grad_norm": 0.2660066485404968, "learning_rate": 7.813288072147938e-05, "loss": 0.0098, "step": 34430 }, { "epoch": 51.634182908545725, "grad_norm": 0.23004432022571564, "learning_rate": 7.811921011327972e-05, "loss": 0.012, "step": 34440 }, { "epoch": 51.64917541229385, "grad_norm": 0.16606849431991577, "learning_rate": 7.810553643000549e-05, "loss": 0.0107, "step": 34450 }, { "epoch": 51.664167916041976, "grad_norm": 0.24115055799484253, "learning_rate": 7.809185967315206e-05, "loss": 0.0156, "step": 34460 }, { "epoch": 51.67916041979011, "grad_norm": 0.27987951040267944, "learning_rate": 7.80781798442151e-05, "loss": 0.0128, "step": 34470 }, { "epoch": 51.69415292353823, "grad_norm": 0.32986173033714294, "learning_rate": 7.806449694469061e-05, "loss": 0.0099, "step": 34480 }, { "epoch": 51.70914542728636, "grad_norm": 0.2917136549949646, "learning_rate": 7.805081097607492e-05, "loss": 0.0107, "step": 34490 }, { "epoch": 51.724137931034484, "grad_norm": 0.3295027017593384, "learning_rate": 7.803712193986474e-05, "loss": 0.0146, "step": 34500 }, { "epoch": 51.73913043478261, "grad_norm": 0.2862671911716461, "learning_rate": 7.802342983755702e-05, "loss": 0.0138, "step": 34510 }, { "epoch": 51.754122938530735, "grad_norm": 0.2911105453968048, "learning_rate": 7.800973467064918e-05, "loss": 0.0089, "step": 34520 }, { "epoch": 51.76911544227886, "grad_norm": 0.216589093208313, "learning_rate": 7.799603644063884e-05, "loss": 0.0107, "step": 34530 }, { "epoch": 51.784107946026985, "grad_norm": 0.19290316104888916, "learning_rate": 7.798233514902405e-05, "loss": 0.0086, "step": 34540 }, { "epoch": 51.79910044977511, "grad_norm": 0.576133131980896, "learning_rate": 7.796863079730318e-05, "loss": 0.0087, "step": 34550 }, { "epoch": 51.814092953523236, "grad_norm": 0.22660793364048004, "learning_rate": 7.795492338697488e-05, "loss": 0.0116, "step": 34560 }, { "epoch": 51.82908545727136, "grad_norm": 0.23223833739757538, "learning_rate": 7.794121291953819e-05, "loss": 0.0109, "step": 34570 }, { "epoch": 51.844077961019494, "grad_norm": 0.3520214855670929, "learning_rate": 7.792749939649246e-05, "loss": 0.0115, "step": 34580 }, { "epoch": 51.85907046476762, "grad_norm": 0.2935956120491028, "learning_rate": 7.79137828193374e-05, "loss": 0.0112, "step": 34590 }, { "epoch": 51.874062968515744, "grad_norm": 0.23933255672454834, "learning_rate": 7.790006318957301e-05, "loss": 0.0164, "step": 34600 }, { "epoch": 51.88905547226387, "grad_norm": 0.2705352306365967, "learning_rate": 7.788634050869965e-05, "loss": 0.0108, "step": 34610 }, { "epoch": 51.904047976011995, "grad_norm": 0.3730883002281189, "learning_rate": 7.787261477821803e-05, "loss": 0.0113, "step": 34620 }, { "epoch": 51.91904047976012, "grad_norm": 0.24888817965984344, "learning_rate": 7.785888599962916e-05, "loss": 0.0161, "step": 34630 }, { "epoch": 51.934032983508246, "grad_norm": 0.21692949533462524, "learning_rate": 7.784515417443439e-05, "loss": 0.0117, "step": 34640 }, { "epoch": 51.94902548725637, "grad_norm": 0.21821126341819763, "learning_rate": 7.783141930413545e-05, "loss": 0.0096, "step": 34650 }, { "epoch": 51.964017991004496, "grad_norm": 0.19471105933189392, "learning_rate": 7.78176813902343e-05, "loss": 0.0118, "step": 34660 }, { "epoch": 51.97901049475262, "grad_norm": 0.23160617053508759, "learning_rate": 7.780394043423336e-05, "loss": 0.0106, "step": 34670 }, { "epoch": 51.99400299850075, "grad_norm": 0.24010741710662842, "learning_rate": 7.77901964376353e-05, "loss": 0.0084, "step": 34680 }, { "epoch": 52.00899550224887, "grad_norm": 0.1987786889076233, "learning_rate": 7.777644940194316e-05, "loss": 0.0112, "step": 34690 }, { "epoch": 52.023988005997005, "grad_norm": 0.17262698709964752, "learning_rate": 7.776269932866023e-05, "loss": 0.009, "step": 34700 }, { "epoch": 52.03898050974513, "grad_norm": 0.2752428948879242, "learning_rate": 7.774894621929026e-05, "loss": 0.01, "step": 34710 }, { "epoch": 52.053973013493255, "grad_norm": 0.14468920230865479, "learning_rate": 7.773519007533725e-05, "loss": 0.0078, "step": 34720 }, { "epoch": 52.06896551724138, "grad_norm": 0.20357726514339447, "learning_rate": 7.772143089830556e-05, "loss": 0.0116, "step": 34730 }, { "epoch": 52.083958020989506, "grad_norm": 0.2665312886238098, "learning_rate": 7.770766868969985e-05, "loss": 0.0123, "step": 34740 }, { "epoch": 52.09895052473763, "grad_norm": 0.1974448263645172, "learning_rate": 7.769390345102518e-05, "loss": 0.0135, "step": 34750 }, { "epoch": 52.11394302848576, "grad_norm": 0.18566522002220154, "learning_rate": 7.768013518378683e-05, "loss": 0.0083, "step": 34760 }, { "epoch": 52.12893553223388, "grad_norm": 0.22115281224250793, "learning_rate": 7.766636388949053e-05, "loss": 0.0106, "step": 34770 }, { "epoch": 52.14392803598201, "grad_norm": 0.16729918122291565, "learning_rate": 7.765258956964229e-05, "loss": 0.0121, "step": 34780 }, { "epoch": 52.15892053973013, "grad_norm": 0.19428037106990814, "learning_rate": 7.76388122257484e-05, "loss": 0.0096, "step": 34790 }, { "epoch": 52.17391304347826, "grad_norm": 0.30690857768058777, "learning_rate": 7.762503185931558e-05, "loss": 0.012, "step": 34800 }, { "epoch": 52.18890554722638, "grad_norm": 0.1495491862297058, "learning_rate": 7.76112484718508e-05, "loss": 0.0124, "step": 34810 }, { "epoch": 52.203898050974516, "grad_norm": 0.18445277214050293, "learning_rate": 7.75974620648614e-05, "loss": 0.0104, "step": 34820 }, { "epoch": 52.21889055472264, "grad_norm": 0.17101745307445526, "learning_rate": 7.758367263985503e-05, "loss": 0.0112, "step": 34830 }, { "epoch": 52.23388305847077, "grad_norm": 0.222154900431633, "learning_rate": 7.75698801983397e-05, "loss": 0.0101, "step": 34840 }, { "epoch": 52.24887556221889, "grad_norm": 0.32287484407424927, "learning_rate": 7.755608474182372e-05, "loss": 0.0123, "step": 34850 }, { "epoch": 52.26386806596702, "grad_norm": 0.22650986909866333, "learning_rate": 7.754228627181574e-05, "loss": 0.0127, "step": 34860 }, { "epoch": 52.27886056971514, "grad_norm": 0.17833493649959564, "learning_rate": 7.752848478982476e-05, "loss": 0.0098, "step": 34870 }, { "epoch": 52.29385307346327, "grad_norm": 0.32651597261428833, "learning_rate": 7.751468029736006e-05, "loss": 0.0094, "step": 34880 }, { "epoch": 52.30884557721139, "grad_norm": 0.20687124133110046, "learning_rate": 7.750087279593129e-05, "loss": 0.0105, "step": 34890 }, { "epoch": 52.32383808095952, "grad_norm": 0.3231196999549866, "learning_rate": 7.748706228704843e-05, "loss": 0.011, "step": 34900 }, { "epoch": 52.338830584707644, "grad_norm": 0.17594438791275024, "learning_rate": 7.747324877222176e-05, "loss": 0.0094, "step": 34910 }, { "epoch": 52.35382308845577, "grad_norm": 0.2923637628555298, "learning_rate": 7.745943225296188e-05, "loss": 0.0133, "step": 34920 }, { "epoch": 52.3688155922039, "grad_norm": 0.24627739191055298, "learning_rate": 7.744561273077981e-05, "loss": 0.0115, "step": 34930 }, { "epoch": 52.38380809595203, "grad_norm": 0.17774884402751923, "learning_rate": 7.743179020718678e-05, "loss": 0.0117, "step": 34940 }, { "epoch": 52.39880059970015, "grad_norm": 0.20858877897262573, "learning_rate": 7.741796468369443e-05, "loss": 0.0118, "step": 34950 }, { "epoch": 52.41379310344828, "grad_norm": 0.2190912812948227, "learning_rate": 7.740413616181466e-05, "loss": 0.0105, "step": 34960 }, { "epoch": 52.4287856071964, "grad_norm": 0.19005067646503448, "learning_rate": 7.739030464305978e-05, "loss": 0.011, "step": 34970 }, { "epoch": 52.44377811094453, "grad_norm": 0.3002726435661316, "learning_rate": 7.737647012894235e-05, "loss": 0.011, "step": 34980 }, { "epoch": 52.458770614692654, "grad_norm": 0.16359063982963562, "learning_rate": 7.736263262097532e-05, "loss": 0.013, "step": 34990 }, { "epoch": 52.47376311844078, "grad_norm": 0.18037189543247223, "learning_rate": 7.734879212067192e-05, "loss": 0.0099, "step": 35000 }, { "epoch": 52.488755622188904, "grad_norm": 0.13961036503314972, "learning_rate": 7.733494862954573e-05, "loss": 0.0093, "step": 35010 }, { "epoch": 52.50374812593703, "grad_norm": 0.20245079696178436, "learning_rate": 7.732110214911066e-05, "loss": 0.0094, "step": 35020 }, { "epoch": 52.518740629685155, "grad_norm": 0.3495345711708069, "learning_rate": 7.730725268088094e-05, "loss": 0.0113, "step": 35030 }, { "epoch": 52.53373313343328, "grad_norm": 0.2259935885667801, "learning_rate": 7.729340022637112e-05, "loss": 0.0121, "step": 35040 }, { "epoch": 52.54872563718141, "grad_norm": 0.2562864422798157, "learning_rate": 7.727954478709607e-05, "loss": 0.0137, "step": 35050 }, { "epoch": 52.56371814092954, "grad_norm": 0.8894442915916443, "learning_rate": 7.726568636457103e-05, "loss": 0.0124, "step": 35060 }, { "epoch": 52.57871064467766, "grad_norm": 0.40702417492866516, "learning_rate": 7.725182496031153e-05, "loss": 0.0133, "step": 35070 }, { "epoch": 52.59370314842579, "grad_norm": 0.5072850584983826, "learning_rate": 7.723796057583341e-05, "loss": 0.0116, "step": 35080 }, { "epoch": 52.608695652173914, "grad_norm": 0.26303404569625854, "learning_rate": 7.722409321265287e-05, "loss": 0.0105, "step": 35090 }, { "epoch": 52.62368815592204, "grad_norm": 0.16865915060043335, "learning_rate": 7.721022287228645e-05, "loss": 0.0092, "step": 35100 }, { "epoch": 52.638680659670165, "grad_norm": 0.2315879613161087, "learning_rate": 7.719634955625094e-05, "loss": 0.0104, "step": 35110 }, { "epoch": 52.65367316341829, "grad_norm": 0.20389728248119354, "learning_rate": 7.718247326606355e-05, "loss": 0.0106, "step": 35120 }, { "epoch": 52.668665667166415, "grad_norm": 0.25454169511795044, "learning_rate": 7.716859400324175e-05, "loss": 0.0129, "step": 35130 }, { "epoch": 52.68365817091454, "grad_norm": 0.20792917907238007, "learning_rate": 7.715471176930335e-05, "loss": 0.0122, "step": 35140 }, { "epoch": 52.698650674662666, "grad_norm": 0.34949547052383423, "learning_rate": 7.714082656576651e-05, "loss": 0.0145, "step": 35150 }, { "epoch": 52.71364317841079, "grad_norm": 0.2578732669353485, "learning_rate": 7.712693839414968e-05, "loss": 0.012, "step": 35160 }, { "epoch": 52.728635682158924, "grad_norm": 0.18542620539665222, "learning_rate": 7.711304725597164e-05, "loss": 0.0113, "step": 35170 }, { "epoch": 52.74362818590705, "grad_norm": 0.24477531015872955, "learning_rate": 7.709915315275151e-05, "loss": 0.0145, "step": 35180 }, { "epoch": 52.758620689655174, "grad_norm": 0.2617207467556, "learning_rate": 7.708525608600876e-05, "loss": 0.0115, "step": 35190 }, { "epoch": 52.7736131934033, "grad_norm": 0.18798819184303284, "learning_rate": 7.707135605726311e-05, "loss": 0.0117, "step": 35200 }, { "epoch": 52.788605697151425, "grad_norm": 0.4403119385242462, "learning_rate": 7.705745306803466e-05, "loss": 0.013, "step": 35210 }, { "epoch": 52.80359820089955, "grad_norm": 0.20871207118034363, "learning_rate": 7.704354711984383e-05, "loss": 0.013, "step": 35220 }, { "epoch": 52.818590704647676, "grad_norm": 0.34250015020370483, "learning_rate": 7.702963821421133e-05, "loss": 0.0123, "step": 35230 }, { "epoch": 52.8335832083958, "grad_norm": 0.17113275825977325, "learning_rate": 7.701572635265826e-05, "loss": 0.0089, "step": 35240 }, { "epoch": 52.848575712143926, "grad_norm": 0.37042713165283203, "learning_rate": 7.700181153670596e-05, "loss": 0.012, "step": 35250 }, { "epoch": 52.86356821589205, "grad_norm": 0.15220320224761963, "learning_rate": 7.698789376787616e-05, "loss": 0.0129, "step": 35260 }, { "epoch": 52.87856071964018, "grad_norm": 0.1750524342060089, "learning_rate": 7.697397304769085e-05, "loss": 0.0133, "step": 35270 }, { "epoch": 52.89355322338831, "grad_norm": 0.3712165355682373, "learning_rate": 7.696004937767241e-05, "loss": 0.0124, "step": 35280 }, { "epoch": 52.908545727136435, "grad_norm": 0.18099437654018402, "learning_rate": 7.694612275934352e-05, "loss": 0.0137, "step": 35290 }, { "epoch": 52.92353823088456, "grad_norm": 0.14680258929729462, "learning_rate": 7.693219319422714e-05, "loss": 0.0128, "step": 35300 }, { "epoch": 52.938530734632685, "grad_norm": 0.20847758650779724, "learning_rate": 7.69182606838466e-05, "loss": 0.0119, "step": 35310 }, { "epoch": 52.95352323838081, "grad_norm": 0.17892490327358246, "learning_rate": 7.690432522972558e-05, "loss": 0.0126, "step": 35320 }, { "epoch": 52.968515742128936, "grad_norm": 0.21346548199653625, "learning_rate": 7.689038683338796e-05, "loss": 0.0109, "step": 35330 }, { "epoch": 52.98350824587706, "grad_norm": 0.24147745966911316, "learning_rate": 7.687644549635808e-05, "loss": 0.0101, "step": 35340 }, { "epoch": 52.99850074962519, "grad_norm": 0.3168124556541443, "learning_rate": 7.686250122016053e-05, "loss": 0.0119, "step": 35350 }, { "epoch": 53.01349325337331, "grad_norm": 0.2459658831357956, "learning_rate": 7.684855400632022e-05, "loss": 0.01, "step": 35360 }, { "epoch": 53.02848575712144, "grad_norm": 0.2411869913339615, "learning_rate": 7.683460385636243e-05, "loss": 0.0121, "step": 35370 }, { "epoch": 53.04347826086956, "grad_norm": 0.2407112568616867, "learning_rate": 7.68206507718127e-05, "loss": 0.0133, "step": 35380 }, { "epoch": 53.05847076461769, "grad_norm": 0.15190252661705017, "learning_rate": 7.680669475419693e-05, "loss": 0.0109, "step": 35390 }, { "epoch": 53.07346326836582, "grad_norm": 0.20786085724830627, "learning_rate": 7.679273580504132e-05, "loss": 0.0128, "step": 35400 }, { "epoch": 53.088455772113946, "grad_norm": 0.2590274512767792, "learning_rate": 7.67787739258724e-05, "loss": 0.0099, "step": 35410 }, { "epoch": 53.10344827586207, "grad_norm": 0.16497081518173218, "learning_rate": 7.676480911821705e-05, "loss": 0.0091, "step": 35420 }, { "epoch": 53.1184407796102, "grad_norm": 0.13665980100631714, "learning_rate": 7.675084138360238e-05, "loss": 0.0093, "step": 35430 }, { "epoch": 53.13343328335832, "grad_norm": 0.17097750306129456, "learning_rate": 7.673687072355592e-05, "loss": 0.0093, "step": 35440 }, { "epoch": 53.14842578710645, "grad_norm": 0.24037212133407593, "learning_rate": 7.67228971396055e-05, "loss": 0.0105, "step": 35450 }, { "epoch": 53.16341829085457, "grad_norm": 0.22607092559337616, "learning_rate": 7.670892063327922e-05, "loss": 0.0097, "step": 35460 }, { "epoch": 53.1784107946027, "grad_norm": 0.29002809524536133, "learning_rate": 7.669494120610552e-05, "loss": 0.01, "step": 35470 }, { "epoch": 53.19340329835082, "grad_norm": 0.22300490736961365, "learning_rate": 7.66809588596132e-05, "loss": 0.0092, "step": 35480 }, { "epoch": 53.20839580209895, "grad_norm": 0.20227032899856567, "learning_rate": 7.666697359533132e-05, "loss": 0.0086, "step": 35490 }, { "epoch": 53.223388305847074, "grad_norm": 0.1484893411397934, "learning_rate": 7.665298541478932e-05, "loss": 0.0102, "step": 35500 }, { "epoch": 53.2383808095952, "grad_norm": 0.16746079921722412, "learning_rate": 7.66389943195169e-05, "loss": 0.0102, "step": 35510 }, { "epoch": 53.25337331334333, "grad_norm": 0.1328774094581604, "learning_rate": 7.66250003110441e-05, "loss": 0.0089, "step": 35520 }, { "epoch": 53.26836581709146, "grad_norm": 0.22059959173202515, "learning_rate": 7.66110033909013e-05, "loss": 0.0077, "step": 35530 }, { "epoch": 53.28335832083958, "grad_norm": 0.31017163395881653, "learning_rate": 7.659700356061918e-05, "loss": 0.0083, "step": 35540 }, { "epoch": 53.29835082458771, "grad_norm": 0.31854885816574097, "learning_rate": 7.658300082172875e-05, "loss": 0.0102, "step": 35550 }, { "epoch": 53.31334332833583, "grad_norm": 0.31005749106407166, "learning_rate": 7.65689951757613e-05, "loss": 0.0099, "step": 35560 }, { "epoch": 53.32833583208396, "grad_norm": 0.21827289462089539, "learning_rate": 7.655498662424849e-05, "loss": 0.0109, "step": 35570 }, { "epoch": 53.343328335832084, "grad_norm": 0.19480851292610168, "learning_rate": 7.654097516872227e-05, "loss": 0.0119, "step": 35580 }, { "epoch": 53.35832083958021, "grad_norm": 0.3571593761444092, "learning_rate": 7.65269608107149e-05, "loss": 0.0104, "step": 35590 }, { "epoch": 53.373313343328334, "grad_norm": 0.22306004166603088, "learning_rate": 7.6512943551759e-05, "loss": 0.0089, "step": 35600 }, { "epoch": 53.38830584707646, "grad_norm": 0.2483246624469757, "learning_rate": 7.649892339338743e-05, "loss": 0.0115, "step": 35610 }, { "epoch": 53.403298350824585, "grad_norm": 0.18069766461849213, "learning_rate": 7.648490033713344e-05, "loss": 0.0151, "step": 35620 }, { "epoch": 53.41829085457271, "grad_norm": 0.27090007066726685, "learning_rate": 7.647087438453058e-05, "loss": 0.0096, "step": 35630 }, { "epoch": 53.43328335832084, "grad_norm": 0.25226274132728577, "learning_rate": 7.64568455371127e-05, "loss": 0.0097, "step": 35640 }, { "epoch": 53.44827586206897, "grad_norm": 0.13771569728851318, "learning_rate": 7.644281379641396e-05, "loss": 0.0145, "step": 35650 }, { "epoch": 53.46326836581709, "grad_norm": 0.14304114878177643, "learning_rate": 7.642877916396887e-05, "loss": 0.0108, "step": 35660 }, { "epoch": 53.47826086956522, "grad_norm": 0.3572135269641876, "learning_rate": 7.641474164131221e-05, "loss": 0.0103, "step": 35670 }, { "epoch": 53.493253373313344, "grad_norm": 0.2674345374107361, "learning_rate": 7.640070122997913e-05, "loss": 0.0148, "step": 35680 }, { "epoch": 53.50824587706147, "grad_norm": 0.248753622174263, "learning_rate": 7.638665793150505e-05, "loss": 0.0114, "step": 35690 }, { "epoch": 53.523238380809595, "grad_norm": 0.25115299224853516, "learning_rate": 7.637261174742574e-05, "loss": 0.012, "step": 35700 }, { "epoch": 53.53823088455772, "grad_norm": 0.17243389785289764, "learning_rate": 7.635856267927725e-05, "loss": 0.0124, "step": 35710 }, { "epoch": 53.553223388305845, "grad_norm": 0.11203795671463013, "learning_rate": 7.634451072859597e-05, "loss": 0.0116, "step": 35720 }, { "epoch": 53.56821589205397, "grad_norm": 0.15397459268569946, "learning_rate": 7.633045589691863e-05, "loss": 0.0127, "step": 35730 }, { "epoch": 53.583208395802096, "grad_norm": 0.15404567122459412, "learning_rate": 7.63163981857822e-05, "loss": 0.0122, "step": 35740 }, { "epoch": 53.59820089955023, "grad_norm": 0.1731720268726349, "learning_rate": 7.630233759672403e-05, "loss": 0.0094, "step": 35750 }, { "epoch": 53.613193403298354, "grad_norm": 0.21851179003715515, "learning_rate": 7.628827413128177e-05, "loss": 0.0099, "step": 35760 }, { "epoch": 53.62818590704648, "grad_norm": 0.18910075724124908, "learning_rate": 7.627420779099338e-05, "loss": 0.0122, "step": 35770 }, { "epoch": 53.643178410794604, "grad_norm": 0.3189106285572052, "learning_rate": 7.626013857739711e-05, "loss": 0.01, "step": 35780 }, { "epoch": 53.65817091454273, "grad_norm": 0.18209201097488403, "learning_rate": 7.624606649203158e-05, "loss": 0.0094, "step": 35790 }, { "epoch": 53.673163418290855, "grad_norm": 0.2433575689792633, "learning_rate": 7.623199153643569e-05, "loss": 0.0083, "step": 35800 }, { "epoch": 53.68815592203898, "grad_norm": 0.15835587680339813, "learning_rate": 7.621791371214863e-05, "loss": 0.0098, "step": 35810 }, { "epoch": 53.703148425787106, "grad_norm": 0.3613719642162323, "learning_rate": 7.620383302070995e-05, "loss": 0.0093, "step": 35820 }, { "epoch": 53.71814092953523, "grad_norm": 0.28933557868003845, "learning_rate": 7.61897494636595e-05, "loss": 0.0119, "step": 35830 }, { "epoch": 53.733133433283356, "grad_norm": 0.15201683342456818, "learning_rate": 7.617566304253739e-05, "loss": 0.0112, "step": 35840 }, { "epoch": 53.74812593703148, "grad_norm": 0.20855265855789185, "learning_rate": 7.616157375888416e-05, "loss": 0.0111, "step": 35850 }, { "epoch": 53.76311844077961, "grad_norm": 0.2117539644241333, "learning_rate": 7.614748161424053e-05, "loss": 0.0098, "step": 35860 }, { "epoch": 53.77811094452774, "grad_norm": 0.41877204179763794, "learning_rate": 7.613338661014763e-05, "loss": 0.0093, "step": 35870 }, { "epoch": 53.793103448275865, "grad_norm": 0.2099210023880005, "learning_rate": 7.611928874814686e-05, "loss": 0.0093, "step": 35880 }, { "epoch": 53.80809595202399, "grad_norm": 0.29246997833251953, "learning_rate": 7.610518802977996e-05, "loss": 0.0116, "step": 35890 }, { "epoch": 53.823088455772115, "grad_norm": 0.19292673468589783, "learning_rate": 7.609108445658893e-05, "loss": 0.0093, "step": 35900 }, { "epoch": 53.83808095952024, "grad_norm": 0.2421858161687851, "learning_rate": 7.607697803011612e-05, "loss": 0.0119, "step": 35910 }, { "epoch": 53.853073463268366, "grad_norm": 0.22970375418663025, "learning_rate": 7.606286875190421e-05, "loss": 0.011, "step": 35920 }, { "epoch": 53.86806596701649, "grad_norm": 0.17335282266139984, "learning_rate": 7.604875662349617e-05, "loss": 0.0117, "step": 35930 }, { "epoch": 53.88305847076462, "grad_norm": 0.27585428953170776, "learning_rate": 7.603464164643525e-05, "loss": 0.012, "step": 35940 }, { "epoch": 53.89805097451274, "grad_norm": 0.20334674417972565, "learning_rate": 7.602052382226507e-05, "loss": 0.0094, "step": 35950 }, { "epoch": 53.91304347826087, "grad_norm": 0.1972716897726059, "learning_rate": 7.600640315252954e-05, "loss": 0.0087, "step": 35960 }, { "epoch": 53.92803598200899, "grad_norm": 0.17461682856082916, "learning_rate": 7.599227963877284e-05, "loss": 0.0082, "step": 35970 }, { "epoch": 53.94302848575712, "grad_norm": 0.21584638953208923, "learning_rate": 7.597815328253954e-05, "loss": 0.009, "step": 35980 }, { "epoch": 53.95802098950525, "grad_norm": 0.1845102608203888, "learning_rate": 7.596402408537443e-05, "loss": 0.0106, "step": 35990 }, { "epoch": 53.973013493253376, "grad_norm": 0.1829235702753067, "learning_rate": 7.59498920488227e-05, "loss": 0.0093, "step": 36000 }, { "epoch": 53.9880059970015, "grad_norm": 0.23362478613853455, "learning_rate": 7.593575717442979e-05, "loss": 0.0134, "step": 36010 }, { "epoch": 54.00299850074963, "grad_norm": 0.16870561242103577, "learning_rate": 7.592161946374147e-05, "loss": 0.0081, "step": 36020 }, { "epoch": 54.01799100449775, "grad_norm": 0.39973416924476624, "learning_rate": 7.590747891830381e-05, "loss": 0.0134, "step": 36030 }, { "epoch": 54.03298350824588, "grad_norm": 0.26294243335723877, "learning_rate": 7.58933355396632e-05, "loss": 0.0108, "step": 36040 }, { "epoch": 54.047976011994, "grad_norm": 0.1910596489906311, "learning_rate": 7.587918932936636e-05, "loss": 0.0098, "step": 36050 }, { "epoch": 54.06296851574213, "grad_norm": 0.23944209516048431, "learning_rate": 7.586504028896028e-05, "loss": 0.0093, "step": 36060 }, { "epoch": 54.07796101949025, "grad_norm": 0.30077388882637024, "learning_rate": 7.585088841999228e-05, "loss": 0.0147, "step": 36070 }, { "epoch": 54.09295352323838, "grad_norm": 0.3016151785850525, "learning_rate": 7.583673372400999e-05, "loss": 0.011, "step": 36080 }, { "epoch": 54.107946026986504, "grad_norm": 0.30049654841423035, "learning_rate": 7.582257620256134e-05, "loss": 0.0129, "step": 36090 }, { "epoch": 54.12293853073463, "grad_norm": 0.3282249867916107, "learning_rate": 7.580841585719458e-05, "loss": 0.0141, "step": 36100 }, { "epoch": 54.13793103448276, "grad_norm": 1.04066801071167, "learning_rate": 7.579425268945825e-05, "loss": 0.0109, "step": 36110 }, { "epoch": 54.15292353823089, "grad_norm": 0.2675156593322754, "learning_rate": 7.578008670090127e-05, "loss": 0.0086, "step": 36120 }, { "epoch": 54.16791604197901, "grad_norm": 0.24752679467201233, "learning_rate": 7.576591789307272e-05, "loss": 0.0098, "step": 36130 }, { "epoch": 54.18290854572714, "grad_norm": 0.1386595517396927, "learning_rate": 7.575174626752216e-05, "loss": 0.0104, "step": 36140 }, { "epoch": 54.19790104947526, "grad_norm": 0.2900526821613312, "learning_rate": 7.573757182579934e-05, "loss": 0.0108, "step": 36150 }, { "epoch": 54.21289355322339, "grad_norm": 0.19671949744224548, "learning_rate": 7.572339456945435e-05, "loss": 0.0104, "step": 36160 }, { "epoch": 54.22788605697151, "grad_norm": 0.2754293978214264, "learning_rate": 7.570921450003762e-05, "loss": 0.0132, "step": 36170 }, { "epoch": 54.24287856071964, "grad_norm": 0.2071913182735443, "learning_rate": 7.569503161909983e-05, "loss": 0.0094, "step": 36180 }, { "epoch": 54.257871064467764, "grad_norm": 0.22395406663417816, "learning_rate": 7.568084592819202e-05, "loss": 0.0126, "step": 36190 }, { "epoch": 54.27286356821589, "grad_norm": 0.23242633044719696, "learning_rate": 7.566665742886551e-05, "loss": 0.0126, "step": 36200 }, { "epoch": 54.287856071964015, "grad_norm": 0.2624111771583557, "learning_rate": 7.565246612267194e-05, "loss": 0.0098, "step": 36210 }, { "epoch": 54.30284857571215, "grad_norm": 0.27580225467681885, "learning_rate": 7.563827201116325e-05, "loss": 0.0111, "step": 36220 }, { "epoch": 54.31784107946027, "grad_norm": 0.33248117566108704, "learning_rate": 7.562407509589164e-05, "loss": 0.0119, "step": 36230 }, { "epoch": 54.3328335832084, "grad_norm": 0.24680069088935852, "learning_rate": 7.560987537840973e-05, "loss": 0.0118, "step": 36240 }, { "epoch": 54.34782608695652, "grad_norm": 0.188057079911232, "learning_rate": 7.559567286027036e-05, "loss": 0.0095, "step": 36250 }, { "epoch": 54.36281859070465, "grad_norm": 0.5052679777145386, "learning_rate": 7.558146754302668e-05, "loss": 0.009, "step": 36260 }, { "epoch": 54.377811094452774, "grad_norm": 0.2801818549633026, "learning_rate": 7.556725942823217e-05, "loss": 0.0128, "step": 36270 }, { "epoch": 54.3928035982009, "grad_norm": 0.9129924178123474, "learning_rate": 7.555304851744061e-05, "loss": 0.013, "step": 36280 }, { "epoch": 54.407796101949025, "grad_norm": 0.25120243430137634, "learning_rate": 7.553883481220608e-05, "loss": 0.0129, "step": 36290 }, { "epoch": 54.42278860569715, "grad_norm": 0.2021176815032959, "learning_rate": 7.552461831408298e-05, "loss": 0.0104, "step": 36300 }, { "epoch": 54.437781109445275, "grad_norm": 0.26631784439086914, "learning_rate": 7.551039902462599e-05, "loss": 0.0107, "step": 36310 }, { "epoch": 54.4527736131934, "grad_norm": 0.34353503584861755, "learning_rate": 7.549617694539011e-05, "loss": 0.0108, "step": 36320 }, { "epoch": 54.467766116941526, "grad_norm": 0.3682212829589844, "learning_rate": 7.548195207793066e-05, "loss": 0.0138, "step": 36330 }, { "epoch": 54.48275862068966, "grad_norm": 0.19999587535858154, "learning_rate": 7.546772442380323e-05, "loss": 0.0156, "step": 36340 }, { "epoch": 54.497751124437784, "grad_norm": 0.20706871151924133, "learning_rate": 7.545349398456376e-05, "loss": 0.0124, "step": 36350 }, { "epoch": 54.51274362818591, "grad_norm": 0.2147810459136963, "learning_rate": 7.543926076176845e-05, "loss": 0.0125, "step": 36360 }, { "epoch": 54.527736131934034, "grad_norm": 0.24472305178642273, "learning_rate": 7.542502475697385e-05, "loss": 0.0111, "step": 36370 }, { "epoch": 54.54272863568216, "grad_norm": 0.1725701242685318, "learning_rate": 7.541078597173675e-05, "loss": 0.01, "step": 36380 }, { "epoch": 54.557721139430285, "grad_norm": 0.24938668310642242, "learning_rate": 7.539654440761431e-05, "loss": 0.0099, "step": 36390 }, { "epoch": 54.57271364317841, "grad_norm": 0.16979746520519257, "learning_rate": 7.538230006616395e-05, "loss": 0.0086, "step": 36400 }, { "epoch": 54.587706146926536, "grad_norm": 0.12028616666793823, "learning_rate": 7.536805294894342e-05, "loss": 0.0097, "step": 36410 }, { "epoch": 54.60269865067466, "grad_norm": 0.1402750164270401, "learning_rate": 7.535380305751076e-05, "loss": 0.0086, "step": 36420 }, { "epoch": 54.617691154422786, "grad_norm": 0.30000174045562744, "learning_rate": 7.533955039342431e-05, "loss": 0.0128, "step": 36430 }, { "epoch": 54.63268365817091, "grad_norm": 0.32159143686294556, "learning_rate": 7.532529495824274e-05, "loss": 0.012, "step": 36440 }, { "epoch": 54.64767616191904, "grad_norm": 0.16670358180999756, "learning_rate": 7.531103675352497e-05, "loss": 0.0119, "step": 36450 }, { "epoch": 54.66266866566717, "grad_norm": 0.17717333137989044, "learning_rate": 7.529677578083029e-05, "loss": 0.0102, "step": 36460 }, { "epoch": 54.677661169415295, "grad_norm": 0.16722895205020905, "learning_rate": 7.528251204171823e-05, "loss": 0.01, "step": 36470 }, { "epoch": 54.69265367316342, "grad_norm": 0.24238695204257965, "learning_rate": 7.526824553774867e-05, "loss": 0.0095, "step": 36480 }, { "epoch": 54.707646176911545, "grad_norm": 0.22618761658668518, "learning_rate": 7.525397627048178e-05, "loss": 0.0125, "step": 36490 }, { "epoch": 54.72263868065967, "grad_norm": 0.23641656339168549, "learning_rate": 7.523970424147802e-05, "loss": 0.0128, "step": 36500 }, { "epoch": 54.737631184407796, "grad_norm": 0.1969708651304245, "learning_rate": 7.522542945229813e-05, "loss": 0.0108, "step": 36510 }, { "epoch": 54.75262368815592, "grad_norm": 0.3669096827507019, "learning_rate": 7.521115190450321e-05, "loss": 0.0115, "step": 36520 }, { "epoch": 54.76761619190405, "grad_norm": 0.30155083537101746, "learning_rate": 7.519687159965462e-05, "loss": 0.0093, "step": 36530 }, { "epoch": 54.78260869565217, "grad_norm": 0.19224505126476288, "learning_rate": 7.518258853931403e-05, "loss": 0.0118, "step": 36540 }, { "epoch": 54.7976011994003, "grad_norm": 0.7642673254013062, "learning_rate": 7.516830272504342e-05, "loss": 0.0134, "step": 36550 }, { "epoch": 54.81259370314842, "grad_norm": 0.21205221116542816, "learning_rate": 7.515401415840509e-05, "loss": 0.0148, "step": 36560 }, { "epoch": 54.827586206896555, "grad_norm": 0.2193419635295868, "learning_rate": 7.513972284096155e-05, "loss": 0.0115, "step": 36570 }, { "epoch": 54.84257871064468, "grad_norm": 0.28530463576316833, "learning_rate": 7.512542877427574e-05, "loss": 0.0101, "step": 36580 }, { "epoch": 54.857571214392806, "grad_norm": 0.20092684030532837, "learning_rate": 7.51111319599108e-05, "loss": 0.0111, "step": 36590 }, { "epoch": 54.87256371814093, "grad_norm": 0.22447365522384644, "learning_rate": 7.509683239943024e-05, "loss": 0.0139, "step": 36600 }, { "epoch": 54.88755622188906, "grad_norm": 0.18167200684547424, "learning_rate": 7.50825300943978e-05, "loss": 0.009, "step": 36610 }, { "epoch": 54.90254872563718, "grad_norm": 0.24867331981658936, "learning_rate": 7.50682250463776e-05, "loss": 0.0088, "step": 36620 }, { "epoch": 54.91754122938531, "grad_norm": 0.22011341154575348, "learning_rate": 7.5053917256934e-05, "loss": 0.0124, "step": 36630 }, { "epoch": 54.93253373313343, "grad_norm": 0.18232928216457367, "learning_rate": 7.503960672763166e-05, "loss": 0.0095, "step": 36640 }, { "epoch": 54.94752623688156, "grad_norm": 0.2721358835697174, "learning_rate": 7.502529346003559e-05, "loss": 0.0109, "step": 36650 }, { "epoch": 54.96251874062968, "grad_norm": 0.2172016054391861, "learning_rate": 7.501097745571106e-05, "loss": 0.0123, "step": 36660 }, { "epoch": 54.97751124437781, "grad_norm": 0.19737614691257477, "learning_rate": 7.499665871622361e-05, "loss": 0.011, "step": 36670 }, { "epoch": 54.992503748125934, "grad_norm": 0.31656625866889954, "learning_rate": 7.498233724313917e-05, "loss": 0.0126, "step": 36680 }, { "epoch": 55.007496251874066, "grad_norm": 0.22791005671024323, "learning_rate": 7.496801303802387e-05, "loss": 0.0096, "step": 36690 }, { "epoch": 55.02248875562219, "grad_norm": 0.2131841480731964, "learning_rate": 7.495368610244423e-05, "loss": 0.0105, "step": 36700 }, { "epoch": 55.03748125937032, "grad_norm": 0.18410970270633698, "learning_rate": 7.493935643796697e-05, "loss": 0.0093, "step": 36710 }, { "epoch": 55.05247376311844, "grad_norm": 0.15193480253219604, "learning_rate": 7.49250240461592e-05, "loss": 0.0102, "step": 36720 }, { "epoch": 55.06746626686657, "grad_norm": 0.24898530542850494, "learning_rate": 7.491068892858826e-05, "loss": 0.0111, "step": 36730 }, { "epoch": 55.08245877061469, "grad_norm": 0.21411263942718506, "learning_rate": 7.489635108682184e-05, "loss": 0.0106, "step": 36740 }, { "epoch": 55.09745127436282, "grad_norm": 0.316766619682312, "learning_rate": 7.48820105224279e-05, "loss": 0.012, "step": 36750 }, { "epoch": 55.11244377811094, "grad_norm": 0.15900132060050964, "learning_rate": 7.48676672369747e-05, "loss": 0.0145, "step": 36760 }, { "epoch": 55.12743628185907, "grad_norm": 0.26546502113342285, "learning_rate": 7.485332123203079e-05, "loss": 0.0103, "step": 36770 }, { "epoch": 55.142428785607194, "grad_norm": 0.1588495522737503, "learning_rate": 7.483897250916504e-05, "loss": 0.0097, "step": 36780 }, { "epoch": 55.15742128935532, "grad_norm": 0.16222892701625824, "learning_rate": 7.48246210699466e-05, "loss": 0.0114, "step": 36790 }, { "epoch": 55.172413793103445, "grad_norm": 0.24904516339302063, "learning_rate": 7.481026691594492e-05, "loss": 0.01, "step": 36800 }, { "epoch": 55.18740629685158, "grad_norm": 0.2309422492980957, "learning_rate": 7.479591004872975e-05, "loss": 0.0121, "step": 36810 }, { "epoch": 55.2023988005997, "grad_norm": 0.22776205837726593, "learning_rate": 7.478155046987114e-05, "loss": 0.0159, "step": 36820 }, { "epoch": 55.21739130434783, "grad_norm": 0.16068348288536072, "learning_rate": 7.476718818093944e-05, "loss": 0.0112, "step": 36830 }, { "epoch": 55.23238380809595, "grad_norm": 0.2793883979320526, "learning_rate": 7.475282318350527e-05, "loss": 0.0113, "step": 36840 }, { "epoch": 55.24737631184408, "grad_norm": 0.24866792559623718, "learning_rate": 7.473845547913957e-05, "loss": 0.0109, "step": 36850 }, { "epoch": 55.262368815592204, "grad_norm": 0.15643185377120972, "learning_rate": 7.472408506941357e-05, "loss": 0.0149, "step": 36860 }, { "epoch": 55.27736131934033, "grad_norm": 0.1448015570640564, "learning_rate": 7.470971195589881e-05, "loss": 0.0102, "step": 36870 }, { "epoch": 55.292353823088455, "grad_norm": 0.25798436999320984, "learning_rate": 7.46953361401671e-05, "loss": 0.0092, "step": 36880 }, { "epoch": 55.30734632683658, "grad_norm": 0.28506916761398315, "learning_rate": 7.468095762379055e-05, "loss": 0.012, "step": 36890 }, { "epoch": 55.322338830584705, "grad_norm": 0.3023412823677063, "learning_rate": 7.466657640834158e-05, "loss": 0.0159, "step": 36900 }, { "epoch": 55.33733133433283, "grad_norm": 0.2614111304283142, "learning_rate": 7.46521924953929e-05, "loss": 0.0102, "step": 36910 }, { "epoch": 55.35232383808096, "grad_norm": 0.2030326873064041, "learning_rate": 7.463780588651752e-05, "loss": 0.0101, "step": 36920 }, { "epoch": 55.36731634182909, "grad_norm": 0.16448435187339783, "learning_rate": 7.462341658328873e-05, "loss": 0.0137, "step": 36930 }, { "epoch": 55.382308845577214, "grad_norm": 0.2342391461133957, "learning_rate": 7.460902458728012e-05, "loss": 0.0097, "step": 36940 }, { "epoch": 55.39730134932534, "grad_norm": 0.23379503190517426, "learning_rate": 7.459462990006558e-05, "loss": 0.0103, "step": 36950 }, { "epoch": 55.412293853073464, "grad_norm": 0.26909488439559937, "learning_rate": 7.458023252321929e-05, "loss": 0.0091, "step": 36960 }, { "epoch": 55.42728635682159, "grad_norm": 0.2460850179195404, "learning_rate": 7.456583245831573e-05, "loss": 0.0086, "step": 36970 }, { "epoch": 55.442278860569715, "grad_norm": 0.130550354719162, "learning_rate": 7.455142970692968e-05, "loss": 0.0132, "step": 36980 }, { "epoch": 55.45727136431784, "grad_norm": 0.2327672392129898, "learning_rate": 7.453702427063615e-05, "loss": 0.0099, "step": 36990 }, { "epoch": 55.472263868065966, "grad_norm": 0.3025158941745758, "learning_rate": 7.452261615101057e-05, "loss": 0.0125, "step": 37000 }, { "epoch": 55.48725637181409, "grad_norm": 0.2164091020822525, "learning_rate": 7.450820534962856e-05, "loss": 0.0096, "step": 37010 }, { "epoch": 55.502248875562216, "grad_norm": 0.17623589932918549, "learning_rate": 7.449379186806604e-05, "loss": 0.0099, "step": 37020 }, { "epoch": 55.51724137931034, "grad_norm": 0.24473610520362854, "learning_rate": 7.44793757078993e-05, "loss": 0.0129, "step": 37030 }, { "epoch": 55.532233883058474, "grad_norm": 0.24372902512550354, "learning_rate": 7.44649568707048e-05, "loss": 0.0122, "step": 37040 }, { "epoch": 55.5472263868066, "grad_norm": 0.18415652215480804, "learning_rate": 7.445053535805942e-05, "loss": 0.0119, "step": 37050 }, { "epoch": 55.562218890554725, "grad_norm": 0.25744757056236267, "learning_rate": 7.443611117154024e-05, "loss": 0.0103, "step": 37060 }, { "epoch": 55.57721139430285, "grad_norm": 0.21847274899482727, "learning_rate": 7.442168431272469e-05, "loss": 0.0097, "step": 37070 }, { "epoch": 55.592203898050975, "grad_norm": 0.3670768141746521, "learning_rate": 7.440725478319044e-05, "loss": 0.0115, "step": 37080 }, { "epoch": 55.6071964017991, "grad_norm": 0.2116558998823166, "learning_rate": 7.439282258451553e-05, "loss": 0.0102, "step": 37090 }, { "epoch": 55.622188905547226, "grad_norm": 0.1689104288816452, "learning_rate": 7.43783877182782e-05, "loss": 0.0107, "step": 37100 }, { "epoch": 55.63718140929535, "grad_norm": 0.2213943749666214, "learning_rate": 7.436395018605707e-05, "loss": 0.0104, "step": 37110 }, { "epoch": 55.65217391304348, "grad_norm": 0.22743888199329376, "learning_rate": 7.434950998943094e-05, "loss": 0.0117, "step": 37120 }, { "epoch": 55.6671664167916, "grad_norm": 0.15468712151050568, "learning_rate": 7.433506712997902e-05, "loss": 0.0111, "step": 37130 }, { "epoch": 55.68215892053973, "grad_norm": 0.3107714354991913, "learning_rate": 7.432062160928077e-05, "loss": 0.0112, "step": 37140 }, { "epoch": 55.69715142428785, "grad_norm": 0.21850337088108063, "learning_rate": 7.430617342891588e-05, "loss": 0.0142, "step": 37150 }, { "epoch": 55.712143928035985, "grad_norm": 0.24907070398330688, "learning_rate": 7.429172259046442e-05, "loss": 0.0088, "step": 37160 }, { "epoch": 55.72713643178411, "grad_norm": 0.23719485104084015, "learning_rate": 7.42772690955067e-05, "loss": 0.008, "step": 37170 }, { "epoch": 55.742128935532236, "grad_norm": 0.14067894220352173, "learning_rate": 7.426281294562332e-05, "loss": 0.0111, "step": 37180 }, { "epoch": 55.75712143928036, "grad_norm": 0.21889014542102814, "learning_rate": 7.424835414239519e-05, "loss": 0.0117, "step": 37190 }, { "epoch": 55.77211394302849, "grad_norm": 0.259413480758667, "learning_rate": 7.423389268740352e-05, "loss": 0.0087, "step": 37200 }, { "epoch": 55.78710644677661, "grad_norm": 0.19197040796279907, "learning_rate": 7.42194285822298e-05, "loss": 0.013, "step": 37210 }, { "epoch": 55.80209895052474, "grad_norm": 0.19246980547904968, "learning_rate": 7.420496182845576e-05, "loss": 0.0091, "step": 37220 }, { "epoch": 55.81709145427286, "grad_norm": 0.28207334876060486, "learning_rate": 7.419049242766348e-05, "loss": 0.0108, "step": 37230 }, { "epoch": 55.83208395802099, "grad_norm": 0.2326042503118515, "learning_rate": 7.417602038143533e-05, "loss": 0.0087, "step": 37240 }, { "epoch": 55.84707646176911, "grad_norm": 0.18490059673786163, "learning_rate": 7.416154569135393e-05, "loss": 0.0095, "step": 37250 }, { "epoch": 55.86206896551724, "grad_norm": 0.24493394792079926, "learning_rate": 7.414706835900222e-05, "loss": 0.0105, "step": 37260 }, { "epoch": 55.87706146926537, "grad_norm": 0.20796236395835876, "learning_rate": 7.413258838596345e-05, "loss": 0.0098, "step": 37270 }, { "epoch": 55.892053973013496, "grad_norm": 0.29272693395614624, "learning_rate": 7.411810577382106e-05, "loss": 0.0109, "step": 37280 }, { "epoch": 55.90704647676162, "grad_norm": 0.2030687779188156, "learning_rate": 7.410362052415889e-05, "loss": 0.0075, "step": 37290 }, { "epoch": 55.92203898050975, "grad_norm": 0.1630103439092636, "learning_rate": 7.408913263856102e-05, "loss": 0.007, "step": 37300 }, { "epoch": 55.93703148425787, "grad_norm": 0.32218748331069946, "learning_rate": 7.407464211861183e-05, "loss": 0.0091, "step": 37310 }, { "epoch": 55.952023988006, "grad_norm": 0.1585739552974701, "learning_rate": 7.406014896589597e-05, "loss": 0.009, "step": 37320 }, { "epoch": 55.96701649175412, "grad_norm": 0.16432778537273407, "learning_rate": 7.404565318199838e-05, "loss": 0.0081, "step": 37330 }, { "epoch": 55.98200899550225, "grad_norm": 0.20014069974422455, "learning_rate": 7.403115476850432e-05, "loss": 0.0086, "step": 37340 }, { "epoch": 55.99700149925037, "grad_norm": 0.13088160753250122, "learning_rate": 7.401665372699932e-05, "loss": 0.0089, "step": 37350 }, { "epoch": 56.0119940029985, "grad_norm": 0.21189238131046295, "learning_rate": 7.400215005906916e-05, "loss": 0.0119, "step": 37360 }, { "epoch": 56.026986506746624, "grad_norm": 0.17984318733215332, "learning_rate": 7.398764376629996e-05, "loss": 0.0089, "step": 37370 }, { "epoch": 56.04197901049475, "grad_norm": 0.1750382035970688, "learning_rate": 7.39731348502781e-05, "loss": 0.0088, "step": 37380 }, { "epoch": 56.05697151424288, "grad_norm": 0.15748658776283264, "learning_rate": 7.395862331259028e-05, "loss": 0.0088, "step": 37390 }, { "epoch": 56.07196401799101, "grad_norm": 0.29458677768707275, "learning_rate": 7.394410915482341e-05, "loss": 0.0106, "step": 37400 }, { "epoch": 56.08695652173913, "grad_norm": 0.18531262874603271, "learning_rate": 7.392959237856476e-05, "loss": 0.0082, "step": 37410 }, { "epoch": 56.10194902548726, "grad_norm": 0.14484667778015137, "learning_rate": 7.391507298540187e-05, "loss": 0.009, "step": 37420 }, { "epoch": 56.11694152923538, "grad_norm": 0.16067016124725342, "learning_rate": 7.390055097692257e-05, "loss": 0.0093, "step": 37430 }, { "epoch": 56.13193403298351, "grad_norm": 0.224713996052742, "learning_rate": 7.388602635471494e-05, "loss": 0.0094, "step": 37440 }, { "epoch": 56.146926536731634, "grad_norm": 0.17474490404129028, "learning_rate": 7.387149912036738e-05, "loss": 0.0114, "step": 37450 }, { "epoch": 56.16191904047976, "grad_norm": 0.17422226071357727, "learning_rate": 7.385696927546854e-05, "loss": 0.0085, "step": 37460 }, { "epoch": 56.176911544227885, "grad_norm": 0.18721911311149597, "learning_rate": 7.384243682160743e-05, "loss": 0.0084, "step": 37470 }, { "epoch": 56.19190404797601, "grad_norm": 0.27623483538627625, "learning_rate": 7.382790176037329e-05, "loss": 0.0141, "step": 37480 }, { "epoch": 56.206896551724135, "grad_norm": 0.27522504329681396, "learning_rate": 7.381336409335562e-05, "loss": 0.0097, "step": 37490 }, { "epoch": 56.22188905547226, "grad_norm": 0.24448229372501373, "learning_rate": 7.379882382214426e-05, "loss": 0.0098, "step": 37500 }, { "epoch": 56.23688155922039, "grad_norm": 0.301165372133255, "learning_rate": 7.378428094832931e-05, "loss": 0.0121, "step": 37510 }, { "epoch": 56.25187406296852, "grad_norm": 0.21590936183929443, "learning_rate": 7.376973547350114e-05, "loss": 0.0093, "step": 37520 }, { "epoch": 56.266866566716644, "grad_norm": 0.19759206473827362, "learning_rate": 7.375518739925046e-05, "loss": 0.009, "step": 37530 }, { "epoch": 56.28185907046477, "grad_norm": 0.21891650557518005, "learning_rate": 7.374063672716818e-05, "loss": 0.0108, "step": 37540 }, { "epoch": 56.296851574212894, "grad_norm": 0.2715907394886017, "learning_rate": 7.372608345884558e-05, "loss": 0.0101, "step": 37550 }, { "epoch": 56.31184407796102, "grad_norm": 0.22141222655773163, "learning_rate": 7.371152759587415e-05, "loss": 0.0096, "step": 37560 }, { "epoch": 56.326836581709145, "grad_norm": 0.23733817040920258, "learning_rate": 7.369696913984572e-05, "loss": 0.0084, "step": 37570 }, { "epoch": 56.34182908545727, "grad_norm": 0.33899572491645813, "learning_rate": 7.368240809235237e-05, "loss": 0.0125, "step": 37580 }, { "epoch": 56.356821589205396, "grad_norm": 0.22799916565418243, "learning_rate": 7.366784445498649e-05, "loss": 0.0085, "step": 37590 }, { "epoch": 56.37181409295352, "grad_norm": 0.2011147141456604, "learning_rate": 7.36532782293407e-05, "loss": 0.0103, "step": 37600 }, { "epoch": 56.386806596701646, "grad_norm": 0.12764467298984528, "learning_rate": 7.363870941700797e-05, "loss": 0.0148, "step": 37610 }, { "epoch": 56.40179910044977, "grad_norm": 0.15749457478523254, "learning_rate": 7.362413801958152e-05, "loss": 0.0098, "step": 37620 }, { "epoch": 56.416791604197904, "grad_norm": 0.31933119893074036, "learning_rate": 7.360956403865486e-05, "loss": 0.0106, "step": 37630 }, { "epoch": 56.43178410794603, "grad_norm": 0.2471165955066681, "learning_rate": 7.359498747582177e-05, "loss": 0.0099, "step": 37640 }, { "epoch": 56.446776611694155, "grad_norm": 0.26678401231765747, "learning_rate": 7.358040833267634e-05, "loss": 0.0124, "step": 37650 }, { "epoch": 56.46176911544228, "grad_norm": 0.3925624191761017, "learning_rate": 7.356582661081288e-05, "loss": 0.0115, "step": 37660 }, { "epoch": 56.476761619190405, "grad_norm": 0.16422417759895325, "learning_rate": 7.355124231182607e-05, "loss": 0.011, "step": 37670 }, { "epoch": 56.49175412293853, "grad_norm": 0.20414823293685913, "learning_rate": 7.353665543731083e-05, "loss": 0.0128, "step": 37680 }, { "epoch": 56.506746626686656, "grad_norm": 0.32882991433143616, "learning_rate": 7.352206598886229e-05, "loss": 0.0106, "step": 37690 }, { "epoch": 56.52173913043478, "grad_norm": 0.24699150025844574, "learning_rate": 7.350747396807601e-05, "loss": 0.0099, "step": 37700 }, { "epoch": 56.53673163418291, "grad_norm": 0.1537906974554062, "learning_rate": 7.349287937654772e-05, "loss": 0.012, "step": 37710 }, { "epoch": 56.55172413793103, "grad_norm": 0.2261163890361786, "learning_rate": 7.347828221587345e-05, "loss": 0.0138, "step": 37720 }, { "epoch": 56.56671664167916, "grad_norm": 0.27514031529426575, "learning_rate": 7.346368248764955e-05, "loss": 0.0091, "step": 37730 }, { "epoch": 56.58170914542729, "grad_norm": 0.19677311182022095, "learning_rate": 7.34490801934726e-05, "loss": 0.0105, "step": 37740 }, { "epoch": 56.596701649175415, "grad_norm": 0.2183387726545334, "learning_rate": 7.343447533493947e-05, "loss": 0.0089, "step": 37750 }, { "epoch": 56.61169415292354, "grad_norm": 0.19554810225963593, "learning_rate": 7.341986791364739e-05, "loss": 0.0098, "step": 37760 }, { "epoch": 56.626686656671666, "grad_norm": 0.25664108991622925, "learning_rate": 7.340525793119374e-05, "loss": 0.0107, "step": 37770 }, { "epoch": 56.64167916041979, "grad_norm": 0.21398040652275085, "learning_rate": 7.33906453891763e-05, "loss": 0.0098, "step": 37780 }, { "epoch": 56.656671664167916, "grad_norm": 0.21112507581710815, "learning_rate": 7.337603028919303e-05, "loss": 0.009, "step": 37790 }, { "epoch": 56.67166416791604, "grad_norm": 0.34603074193000793, "learning_rate": 7.336141263284225e-05, "loss": 0.0109, "step": 37800 }, { "epoch": 56.68665667166417, "grad_norm": 0.17203664779663086, "learning_rate": 7.334679242172249e-05, "loss": 0.008, "step": 37810 }, { "epoch": 56.70164917541229, "grad_norm": 0.21670806407928467, "learning_rate": 7.33321696574326e-05, "loss": 0.0096, "step": 37820 }, { "epoch": 56.71664167916042, "grad_norm": 0.1629638969898224, "learning_rate": 7.331754434157174e-05, "loss": 0.0118, "step": 37830 }, { "epoch": 56.73163418290854, "grad_norm": 0.24078334867954254, "learning_rate": 7.330291647573928e-05, "loss": 0.0114, "step": 37840 }, { "epoch": 56.74662668665667, "grad_norm": 0.24772147834300995, "learning_rate": 7.32882860615349e-05, "loss": 0.0098, "step": 37850 }, { "epoch": 56.7616191904048, "grad_norm": 0.29776060581207275, "learning_rate": 7.327365310055858e-05, "loss": 0.0139, "step": 37860 }, { "epoch": 56.776611694152926, "grad_norm": 0.448726624250412, "learning_rate": 7.325901759441055e-05, "loss": 0.0082, "step": 37870 }, { "epoch": 56.79160419790105, "grad_norm": 0.1160706952214241, "learning_rate": 7.324437954469132e-05, "loss": 0.0103, "step": 37880 }, { "epoch": 56.80659670164918, "grad_norm": 0.18315503001213074, "learning_rate": 7.32297389530017e-05, "loss": 0.0083, "step": 37890 }, { "epoch": 56.8215892053973, "grad_norm": 0.1282522976398468, "learning_rate": 7.321509582094274e-05, "loss": 0.0073, "step": 37900 }, { "epoch": 56.83658170914543, "grad_norm": 0.2253972440958023, "learning_rate": 7.320045015011582e-05, "loss": 0.0108, "step": 37910 }, { "epoch": 56.85157421289355, "grad_norm": 0.29326343536376953, "learning_rate": 7.318580194212253e-05, "loss": 0.0088, "step": 37920 }, { "epoch": 56.86656671664168, "grad_norm": 0.31127461791038513, "learning_rate": 7.317115119856483e-05, "loss": 0.0125, "step": 37930 }, { "epoch": 56.8815592203898, "grad_norm": 0.1784108281135559, "learning_rate": 7.315649792104485e-05, "loss": 0.0102, "step": 37940 }, { "epoch": 56.89655172413793, "grad_norm": 0.15715113282203674, "learning_rate": 7.314184211116507e-05, "loss": 0.0112, "step": 37950 }, { "epoch": 56.911544227886054, "grad_norm": 0.16576911509037018, "learning_rate": 7.312718377052823e-05, "loss": 0.0112, "step": 37960 }, { "epoch": 56.92653673163418, "grad_norm": 0.23394553363323212, "learning_rate": 7.311252290073737e-05, "loss": 0.0097, "step": 37970 }, { "epoch": 56.94152923538231, "grad_norm": 0.2259296029806137, "learning_rate": 7.309785950339572e-05, "loss": 0.0125, "step": 37980 }, { "epoch": 56.95652173913044, "grad_norm": 0.21870441734790802, "learning_rate": 7.308319358010692e-05, "loss": 0.0108, "step": 37990 }, { "epoch": 56.97151424287856, "grad_norm": 0.21478918194770813, "learning_rate": 7.306852513247474e-05, "loss": 0.0087, "step": 38000 }, { "epoch": 56.98650674662669, "grad_norm": 0.24495959281921387, "learning_rate": 7.305385416210336e-05, "loss": 0.0088, "step": 38010 }, { "epoch": 57.00149925037481, "grad_norm": 0.217265322804451, "learning_rate": 7.303918067059711e-05, "loss": 0.0105, "step": 38020 }, { "epoch": 57.01649175412294, "grad_norm": 0.2404395192861557, "learning_rate": 7.302450465956075e-05, "loss": 0.008, "step": 38030 }, { "epoch": 57.031484257871064, "grad_norm": 0.20125322043895721, "learning_rate": 7.300982613059914e-05, "loss": 0.0095, "step": 38040 }, { "epoch": 57.04647676161919, "grad_norm": 0.239437535405159, "learning_rate": 7.299514508531757e-05, "loss": 0.0097, "step": 38050 }, { "epoch": 57.061469265367315, "grad_norm": 0.1966913342475891, "learning_rate": 7.298046152532148e-05, "loss": 0.0111, "step": 38060 }, { "epoch": 57.07646176911544, "grad_norm": 0.3230597674846649, "learning_rate": 7.296577545221668e-05, "loss": 0.0126, "step": 38070 }, { "epoch": 57.091454272863565, "grad_norm": 0.24357572197914124, "learning_rate": 7.29510868676092e-05, "loss": 0.0111, "step": 38080 }, { "epoch": 57.1064467766117, "grad_norm": 0.2863118052482605, "learning_rate": 7.293639577310538e-05, "loss": 0.0114, "step": 38090 }, { "epoch": 57.12143928035982, "grad_norm": 0.2332863062620163, "learning_rate": 7.292170217031179e-05, "loss": 0.0113, "step": 38100 }, { "epoch": 57.13643178410795, "grad_norm": 0.33997324109077454, "learning_rate": 7.290700606083532e-05, "loss": 0.0107, "step": 38110 }, { "epoch": 57.151424287856074, "grad_norm": 0.3430155813694, "learning_rate": 7.28923074462831e-05, "loss": 0.0152, "step": 38120 }, { "epoch": 57.1664167916042, "grad_norm": 0.3556188642978668, "learning_rate": 7.287760632826255e-05, "loss": 0.0127, "step": 38130 }, { "epoch": 57.181409295352324, "grad_norm": 0.2161775380373001, "learning_rate": 7.286290270838139e-05, "loss": 0.0139, "step": 38140 }, { "epoch": 57.19640179910045, "grad_norm": 0.20631751418113708, "learning_rate": 7.284819658824756e-05, "loss": 0.0107, "step": 38150 }, { "epoch": 57.211394302848575, "grad_norm": 0.2905193865299225, "learning_rate": 7.283348796946929e-05, "loss": 0.0149, "step": 38160 }, { "epoch": 57.2263868065967, "grad_norm": 0.26246464252471924, "learning_rate": 7.28187768536551e-05, "loss": 0.0094, "step": 38170 }, { "epoch": 57.241379310344826, "grad_norm": 0.24416345357894897, "learning_rate": 7.280406324241379e-05, "loss": 0.0086, "step": 38180 }, { "epoch": 57.25637181409295, "grad_norm": 0.3103572130203247, "learning_rate": 7.278934713735441e-05, "loss": 0.0135, "step": 38190 }, { "epoch": 57.271364317841076, "grad_norm": 0.19255851209163666, "learning_rate": 7.277462854008629e-05, "loss": 0.0133, "step": 38200 }, { "epoch": 57.28635682158921, "grad_norm": 0.4600886106491089, "learning_rate": 7.275990745221904e-05, "loss": 0.0121, "step": 38210 }, { "epoch": 57.301349325337334, "grad_norm": 0.22637486457824707, "learning_rate": 7.274518387536252e-05, "loss": 0.0137, "step": 38220 }, { "epoch": 57.31634182908546, "grad_norm": 0.22531302273273468, "learning_rate": 7.273045781112689e-05, "loss": 0.0091, "step": 38230 }, { "epoch": 57.331334332833585, "grad_norm": 0.23859532177448273, "learning_rate": 7.271572926112257e-05, "loss": 0.0123, "step": 38240 }, { "epoch": 57.34632683658171, "grad_norm": 0.2388676553964615, "learning_rate": 7.270099822696024e-05, "loss": 0.0093, "step": 38250 }, { "epoch": 57.361319340329835, "grad_norm": 0.24630172550678253, "learning_rate": 7.26862647102509e-05, "loss": 0.0098, "step": 38260 }, { "epoch": 57.37631184407796, "grad_norm": 0.2870774567127228, "learning_rate": 7.267152871260573e-05, "loss": 0.0098, "step": 38270 }, { "epoch": 57.391304347826086, "grad_norm": 0.22586028277873993, "learning_rate": 7.265679023563629e-05, "loss": 0.0106, "step": 38280 }, { "epoch": 57.40629685157421, "grad_norm": 0.14657743275165558, "learning_rate": 7.264204928095433e-05, "loss": 0.0134, "step": 38290 }, { "epoch": 57.42128935532234, "grad_norm": 0.16221041977405548, "learning_rate": 7.262730585017188e-05, "loss": 0.0154, "step": 38300 }, { "epoch": 57.43628185907046, "grad_norm": 0.5602625608444214, "learning_rate": 7.261255994490131e-05, "loss": 0.0097, "step": 38310 }, { "epoch": 57.45127436281859, "grad_norm": 0.24857544898986816, "learning_rate": 7.259781156675517e-05, "loss": 0.0124, "step": 38320 }, { "epoch": 57.46626686656672, "grad_norm": 0.13824740052223206, "learning_rate": 7.258306071734633e-05, "loss": 0.0081, "step": 38330 }, { "epoch": 57.481259370314845, "grad_norm": 0.17518319189548492, "learning_rate": 7.256830739828792e-05, "loss": 0.0101, "step": 38340 }, { "epoch": 57.49625187406297, "grad_norm": 0.2227618545293808, "learning_rate": 7.255355161119336e-05, "loss": 0.0078, "step": 38350 }, { "epoch": 57.511244377811096, "grad_norm": 0.1441141813993454, "learning_rate": 7.253879335767628e-05, "loss": 0.0104, "step": 38360 }, { "epoch": 57.52623688155922, "grad_norm": 0.262888103723526, "learning_rate": 7.252403263935067e-05, "loss": 0.0094, "step": 38370 }, { "epoch": 57.541229385307346, "grad_norm": 0.2263338416814804, "learning_rate": 7.25092694578307e-05, "loss": 0.01, "step": 38380 }, { "epoch": 57.55622188905547, "grad_norm": 0.275846928358078, "learning_rate": 7.249450381473087e-05, "loss": 0.0136, "step": 38390 }, { "epoch": 57.5712143928036, "grad_norm": 0.13720978796482086, "learning_rate": 7.247973571166593e-05, "loss": 0.0152, "step": 38400 }, { "epoch": 57.58620689655172, "grad_norm": 0.1521092653274536, "learning_rate": 7.246496515025089e-05, "loss": 0.0096, "step": 38410 }, { "epoch": 57.60119940029985, "grad_norm": 0.1999962031841278, "learning_rate": 7.245019213210101e-05, "loss": 0.0088, "step": 38420 }, { "epoch": 57.61619190404797, "grad_norm": 0.21661178767681122, "learning_rate": 7.24354166588319e-05, "loss": 0.0092, "step": 38430 }, { "epoch": 57.6311844077961, "grad_norm": 0.2174401730298996, "learning_rate": 7.242063873205935e-05, "loss": 0.0107, "step": 38440 }, { "epoch": 57.64617691154423, "grad_norm": 0.2707829177379608, "learning_rate": 7.240585835339946e-05, "loss": 0.0131, "step": 38450 }, { "epoch": 57.661169415292356, "grad_norm": 0.2265457659959793, "learning_rate": 7.239107552446857e-05, "loss": 0.0106, "step": 38460 }, { "epoch": 57.67616191904048, "grad_norm": 0.21713531017303467, "learning_rate": 7.237629024688334e-05, "loss": 0.0094, "step": 38470 }, { "epoch": 57.69115442278861, "grad_norm": 0.28782835602760315, "learning_rate": 7.236150252226064e-05, "loss": 0.011, "step": 38480 }, { "epoch": 57.70614692653673, "grad_norm": 0.30068081617355347, "learning_rate": 7.234671235221765e-05, "loss": 0.0113, "step": 38490 }, { "epoch": 57.72113943028486, "grad_norm": 0.25913187861442566, "learning_rate": 7.233191973837179e-05, "loss": 0.011, "step": 38500 }, { "epoch": 57.73613193403298, "grad_norm": 0.25781041383743286, "learning_rate": 7.231712468234078e-05, "loss": 0.0171, "step": 38510 }, { "epoch": 57.75112443778111, "grad_norm": 0.30467334389686584, "learning_rate": 7.230232718574254e-05, "loss": 0.0125, "step": 38520 }, { "epoch": 57.76611694152923, "grad_norm": 0.2672896087169647, "learning_rate": 7.228752725019535e-05, "loss": 0.0101, "step": 38530 }, { "epoch": 57.78110944527736, "grad_norm": 0.20873981714248657, "learning_rate": 7.227272487731769e-05, "loss": 0.0098, "step": 38540 }, { "epoch": 57.796101949025484, "grad_norm": 0.2534633278846741, "learning_rate": 7.225792006872831e-05, "loss": 0.011, "step": 38550 }, { "epoch": 57.81109445277362, "grad_norm": 0.29470890760421753, "learning_rate": 7.224311282604628e-05, "loss": 0.0118, "step": 38560 }, { "epoch": 57.82608695652174, "grad_norm": 0.22966435551643372, "learning_rate": 7.222830315089085e-05, "loss": 0.0122, "step": 38570 }, { "epoch": 57.84107946026987, "grad_norm": 0.3244194984436035, "learning_rate": 7.22134910448816e-05, "loss": 0.009, "step": 38580 }, { "epoch": 57.85607196401799, "grad_norm": 0.22204728424549103, "learning_rate": 7.219867650963839e-05, "loss": 0.0104, "step": 38590 }, { "epoch": 57.87106446776612, "grad_norm": 0.23679745197296143, "learning_rate": 7.21838595467813e-05, "loss": 0.013, "step": 38600 }, { "epoch": 57.88605697151424, "grad_norm": 0.3535931408405304, "learning_rate": 7.216904015793067e-05, "loss": 0.0106, "step": 38610 }, { "epoch": 57.90104947526237, "grad_norm": 0.29746297001838684, "learning_rate": 7.215421834470713e-05, "loss": 0.0108, "step": 38620 }, { "epoch": 57.916041979010494, "grad_norm": 0.36033564805984497, "learning_rate": 7.213939410873158e-05, "loss": 0.01, "step": 38630 }, { "epoch": 57.93103448275862, "grad_norm": 0.21776576340198517, "learning_rate": 7.212456745162519e-05, "loss": 0.0106, "step": 38640 }, { "epoch": 57.946026986506745, "grad_norm": 0.22226881980895996, "learning_rate": 7.210973837500937e-05, "loss": 0.0118, "step": 38650 }, { "epoch": 57.96101949025487, "grad_norm": 0.17661449313163757, "learning_rate": 7.209490688050578e-05, "loss": 0.009, "step": 38660 }, { "epoch": 57.976011994002995, "grad_norm": 0.21139001846313477, "learning_rate": 7.208007296973641e-05, "loss": 0.0104, "step": 38670 }, { "epoch": 57.99100449775113, "grad_norm": 0.4078173339366913, "learning_rate": 7.206523664432345e-05, "loss": 0.0107, "step": 38680 }, { "epoch": 58.00599700149925, "grad_norm": 0.24678607285022736, "learning_rate": 7.205039790588939e-05, "loss": 0.0114, "step": 38690 }, { "epoch": 58.02098950524738, "grad_norm": 0.2043001651763916, "learning_rate": 7.203555675605697e-05, "loss": 0.0089, "step": 38700 }, { "epoch": 58.035982008995504, "grad_norm": 0.1754598170518875, "learning_rate": 7.202071319644917e-05, "loss": 0.0098, "step": 38710 }, { "epoch": 58.05097451274363, "grad_norm": 0.21701332926750183, "learning_rate": 7.200586722868932e-05, "loss": 0.0104, "step": 38720 }, { "epoch": 58.065967016491754, "grad_norm": 0.17841795086860657, "learning_rate": 7.199101885440088e-05, "loss": 0.0086, "step": 38730 }, { "epoch": 58.08095952023988, "grad_norm": 0.47494298219680786, "learning_rate": 7.197616807520771e-05, "loss": 0.0135, "step": 38740 }, { "epoch": 58.095952023988005, "grad_norm": 0.15919040143489838, "learning_rate": 7.196131489273381e-05, "loss": 0.0124, "step": 38750 }, { "epoch": 58.11094452773613, "grad_norm": 0.190465047955513, "learning_rate": 7.194645930860355e-05, "loss": 0.0092, "step": 38760 }, { "epoch": 58.125937031484256, "grad_norm": 0.1808687001466751, "learning_rate": 7.19316013244415e-05, "loss": 0.0103, "step": 38770 }, { "epoch": 58.14092953523238, "grad_norm": 0.22880429029464722, "learning_rate": 7.191674094187248e-05, "loss": 0.0101, "step": 38780 }, { "epoch": 58.155922038980506, "grad_norm": 0.3606894612312317, "learning_rate": 7.190187816252165e-05, "loss": 0.0131, "step": 38790 }, { "epoch": 58.17091454272864, "grad_norm": 0.23165923357009888, "learning_rate": 7.188701298801435e-05, "loss": 0.0118, "step": 38800 }, { "epoch": 58.185907046476764, "grad_norm": 0.20844294130802155, "learning_rate": 7.18721454199762e-05, "loss": 0.0079, "step": 38810 }, { "epoch": 58.20089955022489, "grad_norm": 0.21709643304347992, "learning_rate": 7.185727546003312e-05, "loss": 0.0119, "step": 38820 }, { "epoch": 58.215892053973015, "grad_norm": 0.300650030374527, "learning_rate": 7.184240310981126e-05, "loss": 0.0113, "step": 38830 }, { "epoch": 58.23088455772114, "grad_norm": 0.189447283744812, "learning_rate": 7.182752837093704e-05, "loss": 0.0092, "step": 38840 }, { "epoch": 58.245877061469265, "grad_norm": 0.1566506028175354, "learning_rate": 7.181265124503711e-05, "loss": 0.0103, "step": 38850 }, { "epoch": 58.26086956521739, "grad_norm": 0.22208508849143982, "learning_rate": 7.179777173373847e-05, "loss": 0.0092, "step": 38860 }, { "epoch": 58.275862068965516, "grad_norm": 0.20677492022514343, "learning_rate": 7.178288983866826e-05, "loss": 0.0097, "step": 38870 }, { "epoch": 58.29085457271364, "grad_norm": 0.3668442666530609, "learning_rate": 7.176800556145397e-05, "loss": 0.016, "step": 38880 }, { "epoch": 58.30584707646177, "grad_norm": 0.28701749444007874, "learning_rate": 7.175311890372334e-05, "loss": 0.0087, "step": 38890 }, { "epoch": 58.32083958020989, "grad_norm": 0.19708183407783508, "learning_rate": 7.17382298671043e-05, "loss": 0.0092, "step": 38900 }, { "epoch": 58.335832083958024, "grad_norm": 0.134673610329628, "learning_rate": 7.172333845322515e-05, "loss": 0.0083, "step": 38910 }, { "epoch": 58.35082458770615, "grad_norm": 0.1614800989627838, "learning_rate": 7.170844466371436e-05, "loss": 0.0083, "step": 38920 }, { "epoch": 58.365817091454275, "grad_norm": 0.33030185103416443, "learning_rate": 7.16935485002007e-05, "loss": 0.0093, "step": 38930 }, { "epoch": 58.3808095952024, "grad_norm": 0.12489331513643265, "learning_rate": 7.167864996431319e-05, "loss": 0.0078, "step": 38940 }, { "epoch": 58.395802098950526, "grad_norm": 0.17243681848049164, "learning_rate": 7.166374905768111e-05, "loss": 0.0081, "step": 38950 }, { "epoch": 58.41079460269865, "grad_norm": 0.13255470991134644, "learning_rate": 7.164884578193401e-05, "loss": 0.0086, "step": 38960 }, { "epoch": 58.425787106446776, "grad_norm": 0.3058273494243622, "learning_rate": 7.163394013870168e-05, "loss": 0.0105, "step": 38970 }, { "epoch": 58.4407796101949, "grad_norm": 0.12611596286296844, "learning_rate": 7.161903212961419e-05, "loss": 0.011, "step": 38980 }, { "epoch": 58.45577211394303, "grad_norm": 0.2511724829673767, "learning_rate": 7.160412175630183e-05, "loss": 0.0082, "step": 38990 }, { "epoch": 58.47076461769115, "grad_norm": 0.3175627589225769, "learning_rate": 7.158920902039521e-05, "loss": 0.0115, "step": 39000 }, { "epoch": 58.48575712143928, "grad_norm": 0.20204301178455353, "learning_rate": 7.157429392352514e-05, "loss": 0.0101, "step": 39010 }, { "epoch": 58.5007496251874, "grad_norm": 0.23976826667785645, "learning_rate": 7.155937646732275e-05, "loss": 0.015, "step": 39020 }, { "epoch": 58.515742128935536, "grad_norm": 0.21765409409999847, "learning_rate": 7.154445665341933e-05, "loss": 0.0096, "step": 39030 }, { "epoch": 58.53073463268366, "grad_norm": 0.22783629596233368, "learning_rate": 7.152953448344654e-05, "loss": 0.009, "step": 39040 }, { "epoch": 58.545727136431786, "grad_norm": 0.18010835349559784, "learning_rate": 7.151460995903624e-05, "loss": 0.0109, "step": 39050 }, { "epoch": 58.56071964017991, "grad_norm": 0.1745825856924057, "learning_rate": 7.149968308182052e-05, "loss": 0.0095, "step": 39060 }, { "epoch": 58.57571214392804, "grad_norm": 0.2893361449241638, "learning_rate": 7.14847538534318e-05, "loss": 0.0105, "step": 39070 }, { "epoch": 58.59070464767616, "grad_norm": 0.21975651383399963, "learning_rate": 7.14698222755027e-05, "loss": 0.0107, "step": 39080 }, { "epoch": 58.60569715142429, "grad_norm": 0.27238115668296814, "learning_rate": 7.14548883496661e-05, "loss": 0.0099, "step": 39090 }, { "epoch": 58.62068965517241, "grad_norm": 0.3582249581813812, "learning_rate": 7.143995207755517e-05, "loss": 0.0101, "step": 39100 }, { "epoch": 58.63568215892054, "grad_norm": 0.17492012679576874, "learning_rate": 7.142501346080333e-05, "loss": 0.0112, "step": 39110 }, { "epoch": 58.65067466266866, "grad_norm": 0.10771647840738297, "learning_rate": 7.141007250104421e-05, "loss": 0.0101, "step": 39120 }, { "epoch": 58.66566716641679, "grad_norm": 0.29413682222366333, "learning_rate": 7.139512919991176e-05, "loss": 0.0096, "step": 39130 }, { "epoch": 58.680659670164914, "grad_norm": 0.3252902328968048, "learning_rate": 7.138018355904014e-05, "loss": 0.0094, "step": 39140 }, { "epoch": 58.69565217391305, "grad_norm": 0.23126348853111267, "learning_rate": 7.13652355800638e-05, "loss": 0.013, "step": 39150 }, { "epoch": 58.71064467766117, "grad_norm": 0.26650136709213257, "learning_rate": 7.135028526461743e-05, "loss": 0.0099, "step": 39160 }, { "epoch": 58.7256371814093, "grad_norm": 0.2789054214954376, "learning_rate": 7.133533261433594e-05, "loss": 0.0103, "step": 39170 }, { "epoch": 58.74062968515742, "grad_norm": 0.20080898702144623, "learning_rate": 7.132037763085457e-05, "loss": 0.0087, "step": 39180 }, { "epoch": 58.75562218890555, "grad_norm": 0.17284069955348969, "learning_rate": 7.130542031580875e-05, "loss": 0.01, "step": 39190 }, { "epoch": 58.77061469265367, "grad_norm": 0.18214751780033112, "learning_rate": 7.12904606708342e-05, "loss": 0.0118, "step": 39200 }, { "epoch": 58.7856071964018, "grad_norm": 0.18713440001010895, "learning_rate": 7.127549869756687e-05, "loss": 0.0098, "step": 39210 }, { "epoch": 58.800599700149924, "grad_norm": 0.17824335396289825, "learning_rate": 7.126053439764299e-05, "loss": 0.0099, "step": 39220 }, { "epoch": 58.81559220389805, "grad_norm": 0.16663694381713867, "learning_rate": 7.124556777269904e-05, "loss": 0.0103, "step": 39230 }, { "epoch": 58.830584707646175, "grad_norm": 0.2038358896970749, "learning_rate": 7.123059882437174e-05, "loss": 0.0101, "step": 39240 }, { "epoch": 58.8455772113943, "grad_norm": 0.2259635329246521, "learning_rate": 7.121562755429807e-05, "loss": 0.0122, "step": 39250 }, { "epoch": 58.86056971514243, "grad_norm": 0.2795336842536926, "learning_rate": 7.120065396411528e-05, "loss": 0.0126, "step": 39260 }, { "epoch": 58.87556221889056, "grad_norm": 0.20137785375118256, "learning_rate": 7.118567805546084e-05, "loss": 0.0102, "step": 39270 }, { "epoch": 58.89055472263868, "grad_norm": 0.20058855414390564, "learning_rate": 7.117069982997248e-05, "loss": 0.0083, "step": 39280 }, { "epoch": 58.90554722638681, "grad_norm": 0.22474078834056854, "learning_rate": 7.115571928928825e-05, "loss": 0.0079, "step": 39290 }, { "epoch": 58.920539730134934, "grad_norm": 0.24123144149780273, "learning_rate": 7.114073643504635e-05, "loss": 0.0119, "step": 39300 }, { "epoch": 58.93553223388306, "grad_norm": 0.2897474765777588, "learning_rate": 7.11257512688853e-05, "loss": 0.0113, "step": 39310 }, { "epoch": 58.950524737631184, "grad_norm": 0.13341014087200165, "learning_rate": 7.111076379244384e-05, "loss": 0.0097, "step": 39320 }, { "epoch": 58.96551724137931, "grad_norm": 0.14021123945713043, "learning_rate": 7.109577400736101e-05, "loss": 0.011, "step": 39330 }, { "epoch": 58.980509745127435, "grad_norm": 0.2831204831600189, "learning_rate": 7.108078191527605e-05, "loss": 0.0106, "step": 39340 }, { "epoch": 58.99550224887556, "grad_norm": 0.09947346150875092, "learning_rate": 7.106578751782847e-05, "loss": 0.0107, "step": 39350 }, { "epoch": 59.010494752623686, "grad_norm": 0.26914721727371216, "learning_rate": 7.105079081665803e-05, "loss": 0.0099, "step": 39360 }, { "epoch": 59.02548725637181, "grad_norm": 0.2420497089624405, "learning_rate": 7.103579181340476e-05, "loss": 0.0089, "step": 39370 }, { "epoch": 59.04047976011994, "grad_norm": 0.1460186392068863, "learning_rate": 7.102079050970893e-05, "loss": 0.0085, "step": 39380 }, { "epoch": 59.05547226386807, "grad_norm": 0.21356020867824554, "learning_rate": 7.100578690721105e-05, "loss": 0.0127, "step": 39390 }, { "epoch": 59.070464767616194, "grad_norm": 0.1665947586297989, "learning_rate": 7.09907810075519e-05, "loss": 0.0075, "step": 39400 }, { "epoch": 59.08545727136432, "grad_norm": 0.23792199790477753, "learning_rate": 7.097577281237249e-05, "loss": 0.008, "step": 39410 }, { "epoch": 59.100449775112445, "grad_norm": 0.20755831897258759, "learning_rate": 7.09607623233141e-05, "loss": 0.0095, "step": 39420 }, { "epoch": 59.11544227886057, "grad_norm": 0.2140742838382721, "learning_rate": 7.094574954201827e-05, "loss": 0.0073, "step": 39430 }, { "epoch": 59.130434782608695, "grad_norm": 0.1436961591243744, "learning_rate": 7.093073447012675e-05, "loss": 0.0075, "step": 39440 }, { "epoch": 59.14542728635682, "grad_norm": 0.17382636666297913, "learning_rate": 7.09157171092816e-05, "loss": 0.0084, "step": 39450 }, { "epoch": 59.160419790104946, "grad_norm": 0.3374429941177368, "learning_rate": 7.090069746112504e-05, "loss": 0.0098, "step": 39460 }, { "epoch": 59.17541229385307, "grad_norm": 0.28581079840660095, "learning_rate": 7.088567552729965e-05, "loss": 0.0085, "step": 39470 }, { "epoch": 59.1904047976012, "grad_norm": 0.2302553653717041, "learning_rate": 7.087065130944818e-05, "loss": 0.0096, "step": 39480 }, { "epoch": 59.20539730134932, "grad_norm": 0.2001619189977646, "learning_rate": 7.085562480921366e-05, "loss": 0.0107, "step": 39490 }, { "epoch": 59.220389805097454, "grad_norm": 0.2364124208688736, "learning_rate": 7.084059602823937e-05, "loss": 0.0099, "step": 39500 }, { "epoch": 59.23538230884558, "grad_norm": 0.3448309898376465, "learning_rate": 7.082556496816882e-05, "loss": 0.0096, "step": 39510 }, { "epoch": 59.250374812593705, "grad_norm": 0.17523989081382751, "learning_rate": 7.081053163064582e-05, "loss": 0.0104, "step": 39520 }, { "epoch": 59.26536731634183, "grad_norm": 0.16455331444740295, "learning_rate": 7.079549601731434e-05, "loss": 0.0154, "step": 39530 }, { "epoch": 59.280359820089956, "grad_norm": 0.34488943219184875, "learning_rate": 7.07804581298187e-05, "loss": 0.0099, "step": 39540 }, { "epoch": 59.29535232383808, "grad_norm": 0.2067282795906067, "learning_rate": 7.07654179698034e-05, "loss": 0.0104, "step": 39550 }, { "epoch": 59.310344827586206, "grad_norm": 0.14613966643810272, "learning_rate": 7.075037553891321e-05, "loss": 0.0096, "step": 39560 }, { "epoch": 59.32533733133433, "grad_norm": 0.2491888552904129, "learning_rate": 7.073533083879315e-05, "loss": 0.0094, "step": 39570 }, { "epoch": 59.34032983508246, "grad_norm": 0.21905668079853058, "learning_rate": 7.072028387108849e-05, "loss": 0.0121, "step": 39580 }, { "epoch": 59.35532233883058, "grad_norm": 0.3318086862564087, "learning_rate": 7.070523463744472e-05, "loss": 0.0124, "step": 39590 }, { "epoch": 59.37031484257871, "grad_norm": 0.3118579089641571, "learning_rate": 7.069018313950763e-05, "loss": 0.0106, "step": 39600 }, { "epoch": 59.38530734632684, "grad_norm": 0.24240747094154358, "learning_rate": 7.067512937892322e-05, "loss": 0.0092, "step": 39610 }, { "epoch": 59.400299850074965, "grad_norm": 0.26548516750335693, "learning_rate": 7.066007335733775e-05, "loss": 0.01, "step": 39620 }, { "epoch": 59.41529235382309, "grad_norm": 0.26276788115501404, "learning_rate": 7.064501507639772e-05, "loss": 0.0122, "step": 39630 }, { "epoch": 59.430284857571216, "grad_norm": 0.19654424488544464, "learning_rate": 7.062995453774987e-05, "loss": 0.0084, "step": 39640 }, { "epoch": 59.44527736131934, "grad_norm": 0.28271326422691345, "learning_rate": 7.061489174304121e-05, "loss": 0.0097, "step": 39650 }, { "epoch": 59.46026986506747, "grad_norm": 0.2416294366121292, "learning_rate": 7.0599826693919e-05, "loss": 0.0105, "step": 39660 }, { "epoch": 59.47526236881559, "grad_norm": 0.23228928446769714, "learning_rate": 7.05847593920307e-05, "loss": 0.0105, "step": 39670 }, { "epoch": 59.49025487256372, "grad_norm": 0.18062014877796173, "learning_rate": 7.056968983902406e-05, "loss": 0.0094, "step": 39680 }, { "epoch": 59.50524737631184, "grad_norm": 0.21174904704093933, "learning_rate": 7.055461803654706e-05, "loss": 0.0107, "step": 39690 }, { "epoch": 59.52023988005997, "grad_norm": 0.28789570927619934, "learning_rate": 7.053954398624794e-05, "loss": 0.0087, "step": 39700 }, { "epoch": 59.53523238380809, "grad_norm": 0.21120695769786835, "learning_rate": 7.052446768977518e-05, "loss": 0.0093, "step": 39710 }, { "epoch": 59.55022488755622, "grad_norm": 0.5612185001373291, "learning_rate": 7.050938914877748e-05, "loss": 0.009, "step": 39720 }, { "epoch": 59.56521739130435, "grad_norm": 0.2751973271369934, "learning_rate": 7.049430836490382e-05, "loss": 0.0098, "step": 39730 }, { "epoch": 59.58020989505248, "grad_norm": 0.2856980562210083, "learning_rate": 7.047922533980342e-05, "loss": 0.0114, "step": 39740 }, { "epoch": 59.5952023988006, "grad_norm": 0.15585537254810333, "learning_rate": 7.046414007512571e-05, "loss": 0.0069, "step": 39750 }, { "epoch": 59.61019490254873, "grad_norm": 0.26106882095336914, "learning_rate": 7.044905257252042e-05, "loss": 0.0084, "step": 39760 }, { "epoch": 59.62518740629685, "grad_norm": 0.32203561067581177, "learning_rate": 7.04339628336375e-05, "loss": 0.0114, "step": 39770 }, { "epoch": 59.64017991004498, "grad_norm": 0.1285986751317978, "learning_rate": 7.041887086012711e-05, "loss": 0.0083, "step": 39780 }, { "epoch": 59.6551724137931, "grad_norm": 0.22290053963661194, "learning_rate": 7.040377665363969e-05, "loss": 0.0092, "step": 39790 }, { "epoch": 59.67016491754123, "grad_norm": 0.24974898993968964, "learning_rate": 7.038868021582594e-05, "loss": 0.0107, "step": 39800 }, { "epoch": 59.685157421289354, "grad_norm": 0.2782224714756012, "learning_rate": 7.037358154833679e-05, "loss": 0.0098, "step": 39810 }, { "epoch": 59.70014992503748, "grad_norm": 0.17400531470775604, "learning_rate": 7.035848065282339e-05, "loss": 0.0126, "step": 39820 }, { "epoch": 59.715142428785605, "grad_norm": 0.20197957754135132, "learning_rate": 7.034337753093714e-05, "loss": 0.0123, "step": 39830 }, { "epoch": 59.73013493253373, "grad_norm": 0.1415633261203766, "learning_rate": 7.032827218432972e-05, "loss": 0.0098, "step": 39840 }, { "epoch": 59.74512743628186, "grad_norm": 0.1269174963235855, "learning_rate": 7.031316461465302e-05, "loss": 0.0115, "step": 39850 }, { "epoch": 59.76011994002999, "grad_norm": 0.21685579419136047, "learning_rate": 7.029805482355915e-05, "loss": 0.0126, "step": 39860 }, { "epoch": 59.77511244377811, "grad_norm": 0.15954412519931793, "learning_rate": 7.028294281270055e-05, "loss": 0.0135, "step": 39870 }, { "epoch": 59.79010494752624, "grad_norm": 0.2107791304588318, "learning_rate": 7.026782858372982e-05, "loss": 0.0125, "step": 39880 }, { "epoch": 59.805097451274364, "grad_norm": 0.350946843624115, "learning_rate": 7.025271213829982e-05, "loss": 0.01, "step": 39890 }, { "epoch": 59.82008995502249, "grad_norm": 0.17786630988121033, "learning_rate": 7.023759347806366e-05, "loss": 0.0093, "step": 39900 }, { "epoch": 59.835082458770614, "grad_norm": 0.2664922773838043, "learning_rate": 7.022247260467472e-05, "loss": 0.0173, "step": 39910 }, { "epoch": 59.85007496251874, "grad_norm": 0.14755524694919586, "learning_rate": 7.020734951978658e-05, "loss": 0.0098, "step": 39920 }, { "epoch": 59.865067466266865, "grad_norm": 0.174472838640213, "learning_rate": 7.019222422505307e-05, "loss": 0.0123, "step": 39930 }, { "epoch": 59.88005997001499, "grad_norm": 0.2674141824245453, "learning_rate": 7.017709672212827e-05, "loss": 0.0079, "step": 39940 }, { "epoch": 59.895052473763116, "grad_norm": 0.186272993683815, "learning_rate": 7.016196701266652e-05, "loss": 0.0102, "step": 39950 }, { "epoch": 59.91004497751124, "grad_norm": 0.22368423640727997, "learning_rate": 7.014683509832238e-05, "loss": 0.0089, "step": 39960 }, { "epoch": 59.92503748125937, "grad_norm": 0.2748717963695526, "learning_rate": 7.013170098075063e-05, "loss": 0.0086, "step": 39970 }, { "epoch": 59.9400299850075, "grad_norm": 0.21722513437271118, "learning_rate": 7.011656466160632e-05, "loss": 0.0101, "step": 39980 }, { "epoch": 59.955022488755624, "grad_norm": 0.1908896565437317, "learning_rate": 7.010142614254475e-05, "loss": 0.0087, "step": 39990 }, { "epoch": 59.97001499250375, "grad_norm": 0.31103378534317017, "learning_rate": 7.008628542522147e-05, "loss": 0.0092, "step": 40000 }, { "epoch": 59.985007496251875, "grad_norm": 0.15563859045505524, "learning_rate": 7.007114251129218e-05, "loss": 0.0104, "step": 40010 }, { "epoch": 60.0, "grad_norm": 0.20176877081394196, "learning_rate": 7.005599740241293e-05, "loss": 0.0099, "step": 40020 }, { "epoch": 60.014992503748125, "grad_norm": 0.1566980481147766, "learning_rate": 7.004085010023996e-05, "loss": 0.0072, "step": 40030 }, { "epoch": 60.02998500749625, "grad_norm": 0.10527946054935455, "learning_rate": 7.002570060642976e-05, "loss": 0.0086, "step": 40040 }, { "epoch": 60.044977511244376, "grad_norm": 0.17548835277557373, "learning_rate": 7.001054892263903e-05, "loss": 0.0077, "step": 40050 }, { "epoch": 60.0599700149925, "grad_norm": 0.1923963725566864, "learning_rate": 6.999539505052477e-05, "loss": 0.0105, "step": 40060 }, { "epoch": 60.07496251874063, "grad_norm": 0.19175414741039276, "learning_rate": 6.998023899174418e-05, "loss": 0.0122, "step": 40070 }, { "epoch": 60.08995502248876, "grad_norm": 0.22024191915988922, "learning_rate": 6.996508074795467e-05, "loss": 0.0104, "step": 40080 }, { "epoch": 60.104947526236884, "grad_norm": 0.4933958649635315, "learning_rate": 6.994992032081396e-05, "loss": 0.0083, "step": 40090 }, { "epoch": 60.11994002998501, "grad_norm": 0.18910221755504608, "learning_rate": 6.993475771197995e-05, "loss": 0.0088, "step": 40100 }, { "epoch": 60.134932533733135, "grad_norm": 0.11705268174409866, "learning_rate": 6.991959292311082e-05, "loss": 0.007, "step": 40110 }, { "epoch": 60.14992503748126, "grad_norm": 0.2105374038219452, "learning_rate": 6.990442595586495e-05, "loss": 0.0077, "step": 40120 }, { "epoch": 60.164917541229386, "grad_norm": 0.1605563461780548, "learning_rate": 6.988925681190098e-05, "loss": 0.0087, "step": 40130 }, { "epoch": 60.17991004497751, "grad_norm": 0.23351311683654785, "learning_rate": 6.987408549287778e-05, "loss": 0.0097, "step": 40140 }, { "epoch": 60.194902548725636, "grad_norm": 0.21544848382472992, "learning_rate": 6.985891200045449e-05, "loss": 0.0109, "step": 40150 }, { "epoch": 60.20989505247376, "grad_norm": 0.21462669968605042, "learning_rate": 6.984373633629045e-05, "loss": 0.0095, "step": 40160 }, { "epoch": 60.22488755622189, "grad_norm": 0.48104581236839294, "learning_rate": 6.98285585020452e-05, "loss": 0.0114, "step": 40170 }, { "epoch": 60.23988005997001, "grad_norm": 0.5588064789772034, "learning_rate": 6.981337849937864e-05, "loss": 0.0088, "step": 40180 }, { "epoch": 60.25487256371814, "grad_norm": 0.14133451879024506, "learning_rate": 6.979819632995078e-05, "loss": 0.0083, "step": 40190 }, { "epoch": 60.26986506746627, "grad_norm": 0.16397148370742798, "learning_rate": 6.978301199542193e-05, "loss": 0.0079, "step": 40200 }, { "epoch": 60.284857571214395, "grad_norm": 0.13743051886558533, "learning_rate": 6.976782549745263e-05, "loss": 0.0097, "step": 40210 }, { "epoch": 60.29985007496252, "grad_norm": 0.16711504757404327, "learning_rate": 6.975263683770367e-05, "loss": 0.0107, "step": 40220 }, { "epoch": 60.314842578710646, "grad_norm": 0.14240871369838715, "learning_rate": 6.973744601783604e-05, "loss": 0.0087, "step": 40230 }, { "epoch": 60.32983508245877, "grad_norm": 0.20128318667411804, "learning_rate": 6.972225303951097e-05, "loss": 0.01, "step": 40240 }, { "epoch": 60.3448275862069, "grad_norm": 0.15380802750587463, "learning_rate": 6.970705790438998e-05, "loss": 0.0083, "step": 40250 }, { "epoch": 60.35982008995502, "grad_norm": 0.2429584562778473, "learning_rate": 6.969186061413477e-05, "loss": 0.011, "step": 40260 }, { "epoch": 60.37481259370315, "grad_norm": 0.17178082466125488, "learning_rate": 6.967666117040727e-05, "loss": 0.0081, "step": 40270 }, { "epoch": 60.38980509745127, "grad_norm": 0.19773855805397034, "learning_rate": 6.966145957486972e-05, "loss": 0.0084, "step": 40280 }, { "epoch": 60.4047976011994, "grad_norm": 0.11564931273460388, "learning_rate": 6.964625582918449e-05, "loss": 0.0085, "step": 40290 }, { "epoch": 60.41979010494752, "grad_norm": 0.2793220281600952, "learning_rate": 6.963104993501425e-05, "loss": 0.0088, "step": 40300 }, { "epoch": 60.43478260869565, "grad_norm": 0.2416740357875824, "learning_rate": 6.961584189402192e-05, "loss": 0.0122, "step": 40310 }, { "epoch": 60.44977511244378, "grad_norm": 0.2426164448261261, "learning_rate": 6.96006317078706e-05, "loss": 0.0113, "step": 40320 }, { "epoch": 60.46476761619191, "grad_norm": 0.20479093492031097, "learning_rate": 6.958541937822367e-05, "loss": 0.0105, "step": 40330 }, { "epoch": 60.47976011994003, "grad_norm": 0.18970456719398499, "learning_rate": 6.957020490674473e-05, "loss": 0.0114, "step": 40340 }, { "epoch": 60.49475262368816, "grad_norm": 0.2218521535396576, "learning_rate": 6.95549882950976e-05, "loss": 0.01, "step": 40350 }, { "epoch": 60.50974512743628, "grad_norm": 0.1798732727766037, "learning_rate": 6.953976954494635e-05, "loss": 0.0087, "step": 40360 }, { "epoch": 60.52473763118441, "grad_norm": 0.32046106457710266, "learning_rate": 6.952454865795528e-05, "loss": 0.0099, "step": 40370 }, { "epoch": 60.53973013493253, "grad_norm": 0.3863264322280884, "learning_rate": 6.950932563578892e-05, "loss": 0.0096, "step": 40380 }, { "epoch": 60.55472263868066, "grad_norm": 0.23071633279323578, "learning_rate": 6.949410048011206e-05, "loss": 0.0086, "step": 40390 }, { "epoch": 60.569715142428784, "grad_norm": 0.2095014452934265, "learning_rate": 6.947887319258966e-05, "loss": 0.0103, "step": 40400 }, { "epoch": 60.58470764617691, "grad_norm": 0.2843829095363617, "learning_rate": 6.946364377488696e-05, "loss": 0.0092, "step": 40410 }, { "epoch": 60.599700149925035, "grad_norm": 0.3394915759563446, "learning_rate": 6.944841222866947e-05, "loss": 0.0122, "step": 40420 }, { "epoch": 60.61469265367316, "grad_norm": 0.18481406569480896, "learning_rate": 6.943317855560284e-05, "loss": 0.013, "step": 40430 }, { "epoch": 60.62968515742129, "grad_norm": 0.15514449775218964, "learning_rate": 6.941794275735302e-05, "loss": 0.0112, "step": 40440 }, { "epoch": 60.64467766116942, "grad_norm": 0.19133859872817993, "learning_rate": 6.94027048355862e-05, "loss": 0.0114, "step": 40450 }, { "epoch": 60.65967016491754, "grad_norm": 0.3616866171360016, "learning_rate": 6.938746479196871e-05, "loss": 0.011, "step": 40460 }, { "epoch": 60.67466266866567, "grad_norm": 0.3320258855819702, "learning_rate": 6.937222262816724e-05, "loss": 0.0107, "step": 40470 }, { "epoch": 60.689655172413794, "grad_norm": 0.2416321188211441, "learning_rate": 6.935697834584865e-05, "loss": 0.0164, "step": 40480 }, { "epoch": 60.70464767616192, "grad_norm": 0.15370136499404907, "learning_rate": 6.934173194667998e-05, "loss": 0.0115, "step": 40490 }, { "epoch": 60.719640179910044, "grad_norm": 0.2974909842014313, "learning_rate": 6.93264834323286e-05, "loss": 0.0097, "step": 40500 }, { "epoch": 60.73463268365817, "grad_norm": 0.3706638216972351, "learning_rate": 6.931123280446206e-05, "loss": 0.0111, "step": 40510 }, { "epoch": 60.749625187406295, "grad_norm": 0.2038111537694931, "learning_rate": 6.929598006474811e-05, "loss": 0.0136, "step": 40520 }, { "epoch": 60.76461769115442, "grad_norm": 0.25674107670783997, "learning_rate": 6.92807252148548e-05, "loss": 0.0103, "step": 40530 }, { "epoch": 60.779610194902546, "grad_norm": 0.16866421699523926, "learning_rate": 6.926546825645037e-05, "loss": 0.0127, "step": 40540 }, { "epoch": 60.79460269865068, "grad_norm": 0.22127793729305267, "learning_rate": 6.92502091912033e-05, "loss": 0.0117, "step": 40550 }, { "epoch": 60.8095952023988, "grad_norm": 0.23397043347358704, "learning_rate": 6.923494802078229e-05, "loss": 0.0078, "step": 40560 }, { "epoch": 60.82458770614693, "grad_norm": 0.20638255774974823, "learning_rate": 6.921968474685628e-05, "loss": 0.0097, "step": 40570 }, { "epoch": 60.839580209895054, "grad_norm": 0.24254432320594788, "learning_rate": 6.920441937109445e-05, "loss": 0.0085, "step": 40580 }, { "epoch": 60.85457271364318, "grad_norm": 0.19061961770057678, "learning_rate": 6.91891518951662e-05, "loss": 0.0095, "step": 40590 }, { "epoch": 60.869565217391305, "grad_norm": 0.21356995403766632, "learning_rate": 6.917388232074114e-05, "loss": 0.0126, "step": 40600 }, { "epoch": 60.88455772113943, "grad_norm": 0.21456070244312286, "learning_rate": 6.915861064948914e-05, "loss": 0.0102, "step": 40610 }, { "epoch": 60.899550224887555, "grad_norm": 0.44019263982772827, "learning_rate": 6.914333688308025e-05, "loss": 0.0119, "step": 40620 }, { "epoch": 60.91454272863568, "grad_norm": 0.19682897627353668, "learning_rate": 6.912806102318487e-05, "loss": 0.01, "step": 40630 }, { "epoch": 60.929535232383806, "grad_norm": 0.1379341334104538, "learning_rate": 6.911278307147347e-05, "loss": 0.0088, "step": 40640 }, { "epoch": 60.94452773613193, "grad_norm": 0.3483320474624634, "learning_rate": 6.909750302961684e-05, "loss": 0.0107, "step": 40650 }, { "epoch": 60.95952023988006, "grad_norm": 0.13843953609466553, "learning_rate": 6.9082220899286e-05, "loss": 0.0092, "step": 40660 }, { "epoch": 60.97451274362819, "grad_norm": 0.18378359079360962, "learning_rate": 6.906693668215219e-05, "loss": 0.0094, "step": 40670 }, { "epoch": 60.989505247376314, "grad_norm": 0.216045543551445, "learning_rate": 6.905165037988683e-05, "loss": 0.0099, "step": 40680 }, { "epoch": 61.00449775112444, "grad_norm": 0.2085292488336563, "learning_rate": 6.903636199416164e-05, "loss": 0.0112, "step": 40690 }, { "epoch": 61.019490254872565, "grad_norm": 0.25436797738075256, "learning_rate": 6.902107152664851e-05, "loss": 0.0098, "step": 40700 }, { "epoch": 61.03448275862069, "grad_norm": 0.22071951627731323, "learning_rate": 6.90057789790196e-05, "loss": 0.0086, "step": 40710 }, { "epoch": 61.049475262368816, "grad_norm": 0.23778802156448364, "learning_rate": 6.899048435294728e-05, "loss": 0.0119, "step": 40720 }, { "epoch": 61.06446776611694, "grad_norm": 0.17514735460281372, "learning_rate": 6.897518765010415e-05, "loss": 0.0078, "step": 40730 }, { "epoch": 61.079460269865066, "grad_norm": 0.33197182416915894, "learning_rate": 6.895988887216302e-05, "loss": 0.0119, "step": 40740 }, { "epoch": 61.09445277361319, "grad_norm": 0.211883544921875, "learning_rate": 6.894458802079694e-05, "loss": 0.0099, "step": 40750 }, { "epoch": 61.10944527736132, "grad_norm": 0.3251708745956421, "learning_rate": 6.892928509767922e-05, "loss": 0.0117, "step": 40760 }, { "epoch": 61.12443778110944, "grad_norm": 0.20620296895503998, "learning_rate": 6.891398010448333e-05, "loss": 0.012, "step": 40770 }, { "epoch": 61.13943028485757, "grad_norm": 0.20200501382350922, "learning_rate": 6.8898673042883e-05, "loss": 0.0076, "step": 40780 }, { "epoch": 61.1544227886057, "grad_norm": 0.1998075544834137, "learning_rate": 6.888336391455222e-05, "loss": 0.0117, "step": 40790 }, { "epoch": 61.169415292353825, "grad_norm": 0.26258525252342224, "learning_rate": 6.886805272116513e-05, "loss": 0.0102, "step": 40800 }, { "epoch": 61.18440779610195, "grad_norm": 0.32895466685295105, "learning_rate": 6.885273946439617e-05, "loss": 0.0106, "step": 40810 }, { "epoch": 61.199400299850076, "grad_norm": 0.20206740498542786, "learning_rate": 6.883742414591998e-05, "loss": 0.0086, "step": 40820 }, { "epoch": 61.2143928035982, "grad_norm": 0.2986341714859009, "learning_rate": 6.88221067674114e-05, "loss": 0.0057, "step": 40830 }, { "epoch": 61.22938530734633, "grad_norm": 0.16961978375911713, "learning_rate": 6.88067873305455e-05, "loss": 0.0103, "step": 40840 }, { "epoch": 61.24437781109445, "grad_norm": 0.20097962021827698, "learning_rate": 6.879146583699765e-05, "loss": 0.0092, "step": 40850 }, { "epoch": 61.25937031484258, "grad_norm": 0.19327093660831451, "learning_rate": 6.877614228844334e-05, "loss": 0.008, "step": 40860 }, { "epoch": 61.2743628185907, "grad_norm": 0.195532888174057, "learning_rate": 6.876081668655832e-05, "loss": 0.0091, "step": 40870 }, { "epoch": 61.28935532233883, "grad_norm": 0.46263694763183594, "learning_rate": 6.874548903301861e-05, "loss": 0.0119, "step": 40880 }, { "epoch": 61.30434782608695, "grad_norm": 0.1919982135295868, "learning_rate": 6.873015932950039e-05, "loss": 0.0103, "step": 40890 }, { "epoch": 61.319340329835086, "grad_norm": 0.16016824543476105, "learning_rate": 6.871482757768012e-05, "loss": 0.0089, "step": 40900 }, { "epoch": 61.33433283358321, "grad_norm": 0.2471570074558258, "learning_rate": 6.869949377923442e-05, "loss": 0.0084, "step": 40910 }, { "epoch": 61.34932533733134, "grad_norm": 0.1920926868915558, "learning_rate": 6.868415793584022e-05, "loss": 0.0086, "step": 40920 }, { "epoch": 61.36431784107946, "grad_norm": 0.25902059674263, "learning_rate": 6.86688200491746e-05, "loss": 0.0076, "step": 40930 }, { "epoch": 61.37931034482759, "grad_norm": 0.25754132866859436, "learning_rate": 6.865348012091488e-05, "loss": 0.0106, "step": 40940 }, { "epoch": 61.39430284857571, "grad_norm": 0.15251226723194122, "learning_rate": 6.86381381527386e-05, "loss": 0.0089, "step": 40950 }, { "epoch": 61.40929535232384, "grad_norm": 0.17040365934371948, "learning_rate": 6.862279414632359e-05, "loss": 0.0124, "step": 40960 }, { "epoch": 61.42428785607196, "grad_norm": 0.2570486068725586, "learning_rate": 6.860744810334778e-05, "loss": 0.01, "step": 40970 }, { "epoch": 61.43928035982009, "grad_norm": 0.2584071159362793, "learning_rate": 6.859210002548943e-05, "loss": 0.0085, "step": 40980 }, { "epoch": 61.454272863568214, "grad_norm": 0.297982782125473, "learning_rate": 6.8576749914427e-05, "loss": 0.012, "step": 40990 }, { "epoch": 61.46926536731634, "grad_norm": 0.21554912626743317, "learning_rate": 6.85613977718391e-05, "loss": 0.0074, "step": 41000 }, { "epoch": 61.484257871064464, "grad_norm": 0.21346595883369446, "learning_rate": 6.854604359940464e-05, "loss": 0.0093, "step": 41010 }, { "epoch": 61.4992503748126, "grad_norm": 0.3179599642753601, "learning_rate": 6.853068739880276e-05, "loss": 0.0089, "step": 41020 }, { "epoch": 61.51424287856072, "grad_norm": 0.19862349331378937, "learning_rate": 6.851532917171276e-05, "loss": 0.009, "step": 41030 }, { "epoch": 61.52923538230885, "grad_norm": 0.2542065382003784, "learning_rate": 6.849996891981418e-05, "loss": 0.0108, "step": 41040 }, { "epoch": 61.54422788605697, "grad_norm": 0.24582251906394958, "learning_rate": 6.848460664478684e-05, "loss": 0.0085, "step": 41050 }, { "epoch": 61.5592203898051, "grad_norm": 0.29077446460723877, "learning_rate": 6.846924234831069e-05, "loss": 0.0115, "step": 41060 }, { "epoch": 61.574212893553224, "grad_norm": 0.34389299154281616, "learning_rate": 6.845387603206597e-05, "loss": 0.0116, "step": 41070 }, { "epoch": 61.58920539730135, "grad_norm": 0.5266637206077576, "learning_rate": 6.843850769773312e-05, "loss": 0.0106, "step": 41080 }, { "epoch": 61.604197901049474, "grad_norm": 0.2906956672668457, "learning_rate": 6.842313734699278e-05, "loss": 0.0102, "step": 41090 }, { "epoch": 61.6191904047976, "grad_norm": 0.1459580361843109, "learning_rate": 6.840776498152584e-05, "loss": 0.0089, "step": 41100 }, { "epoch": 61.634182908545725, "grad_norm": 0.25388267636299133, "learning_rate": 6.83923906030134e-05, "loss": 0.008, "step": 41110 }, { "epoch": 61.64917541229385, "grad_norm": 0.2356993556022644, "learning_rate": 6.837701421313677e-05, "loss": 0.0094, "step": 41120 }, { "epoch": 61.664167916041976, "grad_norm": 0.16648711264133453, "learning_rate": 6.83616358135775e-05, "loss": 0.0093, "step": 41130 }, { "epoch": 61.67916041979011, "grad_norm": 0.14572884142398834, "learning_rate": 6.834625540601734e-05, "loss": 0.0096, "step": 41140 }, { "epoch": 61.69415292353823, "grad_norm": 0.19646866619586945, "learning_rate": 6.833087299213829e-05, "loss": 0.0102, "step": 41150 }, { "epoch": 61.70914542728636, "grad_norm": 0.22563625872135162, "learning_rate": 6.83154885736225e-05, "loss": 0.0091, "step": 41160 }, { "epoch": 61.724137931034484, "grad_norm": 0.1115480288863182, "learning_rate": 6.830010215215244e-05, "loss": 0.0093, "step": 41170 }, { "epoch": 61.73913043478261, "grad_norm": 0.2509739398956299, "learning_rate": 6.828471372941072e-05, "loss": 0.0098, "step": 41180 }, { "epoch": 61.754122938530735, "grad_norm": 0.2180892378091812, "learning_rate": 6.826932330708018e-05, "loss": 0.0083, "step": 41190 }, { "epoch": 61.76911544227886, "grad_norm": 0.2819080650806427, "learning_rate": 6.825393088684393e-05, "loss": 0.0091, "step": 41200 }, { "epoch": 61.784107946026985, "grad_norm": 0.24342557787895203, "learning_rate": 6.823853647038524e-05, "loss": 0.0125, "step": 41210 }, { "epoch": 61.79910044977511, "grad_norm": 0.30360251665115356, "learning_rate": 6.822314005938762e-05, "loss": 0.0095, "step": 41220 }, { "epoch": 61.814092953523236, "grad_norm": 0.34868231415748596, "learning_rate": 6.820774165553479e-05, "loss": 0.0104, "step": 41230 }, { "epoch": 61.82908545727136, "grad_norm": 0.3066380023956299, "learning_rate": 6.819234126051072e-05, "loss": 0.0079, "step": 41240 }, { "epoch": 61.844077961019494, "grad_norm": 0.3109382092952728, "learning_rate": 6.817693887599956e-05, "loss": 0.0103, "step": 41250 }, { "epoch": 61.85907046476762, "grad_norm": 0.3516307771205902, "learning_rate": 6.816153450368568e-05, "loss": 0.0105, "step": 41260 }, { "epoch": 61.874062968515744, "grad_norm": 0.12996555864810944, "learning_rate": 6.814612814525372e-05, "loss": 0.0096, "step": 41270 }, { "epoch": 61.88905547226387, "grad_norm": 0.1922287791967392, "learning_rate": 6.813071980238846e-05, "loss": 0.009, "step": 41280 }, { "epoch": 61.904047976011995, "grad_norm": 0.3404574692249298, "learning_rate": 6.811530947677492e-05, "loss": 0.0091, "step": 41290 }, { "epoch": 61.91904047976012, "grad_norm": 2.9053094387054443, "learning_rate": 6.809989717009839e-05, "loss": 0.0089, "step": 41300 }, { "epoch": 61.934032983508246, "grad_norm": 0.21657387912273407, "learning_rate": 6.808448288404431e-05, "loss": 0.0089, "step": 41310 }, { "epoch": 61.94902548725637, "grad_norm": 0.9476584792137146, "learning_rate": 6.806906662029838e-05, "loss": 0.012, "step": 41320 }, { "epoch": 61.964017991004496, "grad_norm": 0.2438255250453949, "learning_rate": 6.805364838054647e-05, "loss": 0.0099, "step": 41330 }, { "epoch": 61.97901049475262, "grad_norm": 0.2331646829843521, "learning_rate": 6.803822816647474e-05, "loss": 0.0073, "step": 41340 }, { "epoch": 61.99400299850075, "grad_norm": 0.20102500915527344, "learning_rate": 6.802280597976949e-05, "loss": 0.0064, "step": 41350 }, { "epoch": 62.00899550224887, "grad_norm": 0.19955480098724365, "learning_rate": 6.800738182211727e-05, "loss": 0.0108, "step": 41360 }, { "epoch": 62.023988005997005, "grad_norm": 0.19838832318782806, "learning_rate": 6.799195569520485e-05, "loss": 0.0112, "step": 41370 }, { "epoch": 62.03898050974513, "grad_norm": 0.24901169538497925, "learning_rate": 6.79765276007192e-05, "loss": 0.0107, "step": 41380 }, { "epoch": 62.053973013493255, "grad_norm": 0.2721596360206604, "learning_rate": 6.796109754034753e-05, "loss": 0.0095, "step": 41390 }, { "epoch": 62.06896551724138, "grad_norm": 0.3011579215526581, "learning_rate": 6.794566551577724e-05, "loss": 0.0118, "step": 41400 }, { "epoch": 62.083958020989506, "grad_norm": 0.1670246720314026, "learning_rate": 6.793023152869594e-05, "loss": 0.0106, "step": 41410 }, { "epoch": 62.09895052473763, "grad_norm": 0.3019803464412689, "learning_rate": 6.791479558079147e-05, "loss": 0.011, "step": 41420 }, { "epoch": 62.11394302848576, "grad_norm": 0.3825138211250305, "learning_rate": 6.789935767375191e-05, "loss": 0.0135, "step": 41430 }, { "epoch": 62.12893553223388, "grad_norm": 0.26736918091773987, "learning_rate": 6.78839178092655e-05, "loss": 0.0122, "step": 41440 }, { "epoch": 62.14392803598201, "grad_norm": 0.21729737520217896, "learning_rate": 6.786847598902072e-05, "loss": 0.0119, "step": 41450 }, { "epoch": 62.15892053973013, "grad_norm": 0.2565448582172394, "learning_rate": 6.785303221470627e-05, "loss": 0.0106, "step": 41460 }, { "epoch": 62.17391304347826, "grad_norm": 0.24842575192451477, "learning_rate": 6.783758648801108e-05, "loss": 0.0094, "step": 41470 }, { "epoch": 62.18890554722638, "grad_norm": 0.24315868318080902, "learning_rate": 6.782213881062422e-05, "loss": 0.0108, "step": 41480 }, { "epoch": 62.203898050974516, "grad_norm": 1.113722562789917, "learning_rate": 6.780668918423508e-05, "loss": 0.0085, "step": 41490 }, { "epoch": 62.21889055472264, "grad_norm": 0.17294247448444366, "learning_rate": 6.779123761053317e-05, "loss": 0.0095, "step": 41500 }, { "epoch": 62.23388305847077, "grad_norm": 0.21016532182693481, "learning_rate": 6.777578409120826e-05, "loss": 0.0076, "step": 41510 }, { "epoch": 62.24887556221889, "grad_norm": 0.2174341380596161, "learning_rate": 6.776032862795033e-05, "loss": 0.0092, "step": 41520 }, { "epoch": 62.26386806596702, "grad_norm": 0.1795654445886612, "learning_rate": 6.774487122244956e-05, "loss": 0.0079, "step": 41530 }, { "epoch": 62.27886056971514, "grad_norm": 0.2826717495918274, "learning_rate": 6.772941187639637e-05, "loss": 0.0075, "step": 41540 }, { "epoch": 62.29385307346327, "grad_norm": 0.17743812501430511, "learning_rate": 6.771395059148134e-05, "loss": 0.0095, "step": 41550 }, { "epoch": 62.30884557721139, "grad_norm": 0.20153765380382538, "learning_rate": 6.76984873693953e-05, "loss": 0.0088, "step": 41560 }, { "epoch": 62.32383808095952, "grad_norm": 0.15470246970653534, "learning_rate": 6.768302221182931e-05, "loss": 0.0088, "step": 41570 }, { "epoch": 62.338830584707644, "grad_norm": 0.13173577189445496, "learning_rate": 6.766755512047457e-05, "loss": 0.0071, "step": 41580 }, { "epoch": 62.35382308845577, "grad_norm": 0.13355684280395508, "learning_rate": 6.765208609702259e-05, "loss": 0.0075, "step": 41590 }, { "epoch": 62.3688155922039, "grad_norm": 0.20335601270198822, "learning_rate": 6.763661514316499e-05, "loss": 0.0084, "step": 41600 }, { "epoch": 62.38380809595203, "grad_norm": 0.16333310306072235, "learning_rate": 6.76211422605937e-05, "loss": 0.0101, "step": 41610 }, { "epoch": 62.39880059970015, "grad_norm": 0.7162943482398987, "learning_rate": 6.760566745100076e-05, "loss": 0.0099, "step": 41620 }, { "epoch": 62.41379310344828, "grad_norm": 0.20555774867534637, "learning_rate": 6.75901907160785e-05, "loss": 0.0102, "step": 41630 }, { "epoch": 62.4287856071964, "grad_norm": 0.22609162330627441, "learning_rate": 6.757471205751943e-05, "loss": 0.0095, "step": 41640 }, { "epoch": 62.44377811094453, "grad_norm": 0.25400736927986145, "learning_rate": 6.75592314770163e-05, "loss": 0.0106, "step": 41650 }, { "epoch": 62.458770614692654, "grad_norm": 0.39086630940437317, "learning_rate": 6.7543748976262e-05, "loss": 0.0089, "step": 41660 }, { "epoch": 62.47376311844078, "grad_norm": 0.20382164418697357, "learning_rate": 6.752826455694968e-05, "loss": 0.0096, "step": 41670 }, { "epoch": 62.488755622188904, "grad_norm": 0.1569714993238449, "learning_rate": 6.751277822077271e-05, "loss": 0.0084, "step": 41680 }, { "epoch": 62.50374812593703, "grad_norm": 0.1750292181968689, "learning_rate": 6.749728996942465e-05, "loss": 0.0108, "step": 41690 }, { "epoch": 62.518740629685155, "grad_norm": 0.15215589106082916, "learning_rate": 6.748179980459924e-05, "loss": 0.0103, "step": 41700 }, { "epoch": 62.53373313343328, "grad_norm": 0.19400352239608765, "learning_rate": 6.746630772799052e-05, "loss": 0.0089, "step": 41710 }, { "epoch": 62.54872563718141, "grad_norm": 0.20215465128421783, "learning_rate": 6.745081374129262e-05, "loss": 0.0098, "step": 41720 }, { "epoch": 62.56371814092954, "grad_norm": 0.3654896914958954, "learning_rate": 6.743531784619998e-05, "loss": 0.0136, "step": 41730 }, { "epoch": 62.57871064467766, "grad_norm": 0.35574156045913696, "learning_rate": 6.741982004440719e-05, "loss": 0.009, "step": 41740 }, { "epoch": 62.59370314842579, "grad_norm": 0.23323547840118408, "learning_rate": 6.740432033760907e-05, "loss": 0.0078, "step": 41750 }, { "epoch": 62.608695652173914, "grad_norm": 0.20797447860240936, "learning_rate": 6.738881872750066e-05, "loss": 0.0083, "step": 41760 }, { "epoch": 62.62368815592204, "grad_norm": 0.15115904808044434, "learning_rate": 6.737331521577715e-05, "loss": 0.0078, "step": 41770 }, { "epoch": 62.638680659670165, "grad_norm": 0.3119097948074341, "learning_rate": 6.735780980413403e-05, "loss": 0.0087, "step": 41780 }, { "epoch": 62.65367316341829, "grad_norm": 0.22628654539585114, "learning_rate": 6.734230249426692e-05, "loss": 0.0092, "step": 41790 }, { "epoch": 62.668665667166415, "grad_norm": 0.21559162437915802, "learning_rate": 6.732679328787168e-05, "loss": 0.0099, "step": 41800 }, { "epoch": 62.68365817091454, "grad_norm": 0.18640929460525513, "learning_rate": 6.731128218664438e-05, "loss": 0.0115, "step": 41810 }, { "epoch": 62.698650674662666, "grad_norm": 0.311330109834671, "learning_rate": 6.729576919228129e-05, "loss": 0.0107, "step": 41820 }, { "epoch": 62.71364317841079, "grad_norm": 0.14857003092765808, "learning_rate": 6.728025430647888e-05, "loss": 0.0098, "step": 41830 }, { "epoch": 62.728635682158924, "grad_norm": 0.20629285275936127, "learning_rate": 6.726473753093383e-05, "loss": 0.0094, "step": 41840 }, { "epoch": 62.74362818590705, "grad_norm": 0.24414844810962677, "learning_rate": 6.724921886734305e-05, "loss": 0.0081, "step": 41850 }, { "epoch": 62.758620689655174, "grad_norm": 0.21515385806560516, "learning_rate": 6.723369831740363e-05, "loss": 0.0086, "step": 41860 }, { "epoch": 62.7736131934033, "grad_norm": 0.21275655925273895, "learning_rate": 6.721817588281288e-05, "loss": 0.0097, "step": 41870 }, { "epoch": 62.788605697151425, "grad_norm": 0.23953543603420258, "learning_rate": 6.720265156526828e-05, "loss": 0.0119, "step": 41880 }, { "epoch": 62.80359820089955, "grad_norm": 0.15081937611103058, "learning_rate": 6.718712536646758e-05, "loss": 0.0078, "step": 41890 }, { "epoch": 62.818590704647676, "grad_norm": 0.17890390753746033, "learning_rate": 6.71715972881087e-05, "loss": 0.0105, "step": 41900 }, { "epoch": 62.8335832083958, "grad_norm": 0.14911672472953796, "learning_rate": 6.715606733188974e-05, "loss": 0.0081, "step": 41910 }, { "epoch": 62.848575712143926, "grad_norm": 0.264635294675827, "learning_rate": 6.714053549950909e-05, "loss": 0.0093, "step": 41920 }, { "epoch": 62.86356821589205, "grad_norm": 0.2710894048213959, "learning_rate": 6.71250017926652e-05, "loss": 0.012, "step": 41930 }, { "epoch": 62.87856071964018, "grad_norm": 0.2535136342048645, "learning_rate": 6.71094662130569e-05, "loss": 0.0094, "step": 41940 }, { "epoch": 62.89355322338831, "grad_norm": 0.2599274814128876, "learning_rate": 6.709392876238307e-05, "loss": 0.012, "step": 41950 }, { "epoch": 62.908545727136435, "grad_norm": 0.1784544438123703, "learning_rate": 6.707838944234289e-05, "loss": 0.0119, "step": 41960 }, { "epoch": 62.92353823088456, "grad_norm": 0.2097054123878479, "learning_rate": 6.706284825463574e-05, "loss": 0.0079, "step": 41970 }, { "epoch": 62.938530734632685, "grad_norm": 0.3166912794113159, "learning_rate": 6.704730520096114e-05, "loss": 0.0108, "step": 41980 }, { "epoch": 62.95352323838081, "grad_norm": 0.2756068706512451, "learning_rate": 6.703176028301888e-05, "loss": 0.0079, "step": 41990 }, { "epoch": 62.968515742128936, "grad_norm": 0.24878834187984467, "learning_rate": 6.701621350250892e-05, "loss": 0.0097, "step": 42000 }, { "epoch": 62.98350824587706, "grad_norm": 0.22379527986049652, "learning_rate": 6.700066486113144e-05, "loss": 0.0099, "step": 42010 }, { "epoch": 62.99850074962519, "grad_norm": 0.26389080286026, "learning_rate": 6.69851143605868e-05, "loss": 0.0112, "step": 42020 }, { "epoch": 63.01349325337331, "grad_norm": 0.2212541550397873, "learning_rate": 6.696956200257561e-05, "loss": 0.0095, "step": 42030 }, { "epoch": 63.02848575712144, "grad_norm": 0.2001921534538269, "learning_rate": 6.695400778879863e-05, "loss": 0.0095, "step": 42040 }, { "epoch": 63.04347826086956, "grad_norm": 0.19585935771465302, "learning_rate": 6.693845172095683e-05, "loss": 0.0104, "step": 42050 }, { "epoch": 63.05847076461769, "grad_norm": 0.31472811102867126, "learning_rate": 6.692289380075142e-05, "loss": 0.0096, "step": 42060 }, { "epoch": 63.07346326836582, "grad_norm": 0.32526564598083496, "learning_rate": 6.690733402988379e-05, "loss": 0.0089, "step": 42070 }, { "epoch": 63.088455772113946, "grad_norm": 0.21243616938591003, "learning_rate": 6.689177241005553e-05, "loss": 0.0138, "step": 42080 }, { "epoch": 63.10344827586207, "grad_norm": 0.19234979152679443, "learning_rate": 6.687620894296844e-05, "loss": 0.0111, "step": 42090 }, { "epoch": 63.1184407796102, "grad_norm": 0.2037801742553711, "learning_rate": 6.686064363032451e-05, "loss": 0.0079, "step": 42100 }, { "epoch": 63.13343328335832, "grad_norm": 0.16192063689231873, "learning_rate": 6.684507647382596e-05, "loss": 0.0085, "step": 42110 }, { "epoch": 63.14842578710645, "grad_norm": 0.18745508790016174, "learning_rate": 6.682950747517513e-05, "loss": 0.0072, "step": 42120 }, { "epoch": 63.16341829085457, "grad_norm": 0.3178294897079468, "learning_rate": 6.68139366360747e-05, "loss": 0.0112, "step": 42130 }, { "epoch": 63.1784107946027, "grad_norm": 0.24783580005168915, "learning_rate": 6.679836395822744e-05, "loss": 0.0078, "step": 42140 }, { "epoch": 63.19340329835082, "grad_norm": 1.0292057991027832, "learning_rate": 6.678278944333633e-05, "loss": 0.0084, "step": 42150 }, { "epoch": 63.20839580209895, "grad_norm": 0.22086739540100098, "learning_rate": 6.676721309310462e-05, "loss": 0.0135, "step": 42160 }, { "epoch": 63.223388305847074, "grad_norm": 0.20095351338386536, "learning_rate": 6.67516349092357e-05, "loss": 0.0082, "step": 42170 }, { "epoch": 63.2383808095952, "grad_norm": 0.17212797701358795, "learning_rate": 6.673605489343315e-05, "loss": 0.0097, "step": 42180 }, { "epoch": 63.25337331334333, "grad_norm": 0.18478970229625702, "learning_rate": 6.672047304740081e-05, "loss": 0.0072, "step": 42190 }, { "epoch": 63.26836581709146, "grad_norm": 0.15276969969272614, "learning_rate": 6.670488937284268e-05, "loss": 0.0093, "step": 42200 }, { "epoch": 63.28335832083958, "grad_norm": 0.20459847152233124, "learning_rate": 6.668930387146294e-05, "loss": 0.0092, "step": 42210 }, { "epoch": 63.29835082458771, "grad_norm": 0.21244317293167114, "learning_rate": 6.667371654496605e-05, "loss": 0.0096, "step": 42220 }, { "epoch": 63.31334332833583, "grad_norm": 0.12075549364089966, "learning_rate": 6.665812739505659e-05, "loss": 0.0082, "step": 42230 }, { "epoch": 63.32833583208396, "grad_norm": 0.2178458571434021, "learning_rate": 6.664253642343935e-05, "loss": 0.0096, "step": 42240 }, { "epoch": 63.343328335832084, "grad_norm": 0.24523383378982544, "learning_rate": 6.662694363181935e-05, "loss": 0.0099, "step": 42250 }, { "epoch": 63.35832083958021, "grad_norm": 0.2271161675453186, "learning_rate": 6.66113490219018e-05, "loss": 0.0105, "step": 42260 }, { "epoch": 63.373313343328334, "grad_norm": 0.17344078421592712, "learning_rate": 6.659575259539207e-05, "loss": 0.0103, "step": 42270 }, { "epoch": 63.38830584707646, "grad_norm": 0.26259011030197144, "learning_rate": 6.65801543539958e-05, "loss": 0.01, "step": 42280 }, { "epoch": 63.403298350824585, "grad_norm": 0.2157876044511795, "learning_rate": 6.65645542994188e-05, "loss": 0.0103, "step": 42290 }, { "epoch": 63.41829085457271, "grad_norm": 0.14546039700508118, "learning_rate": 6.654895243336702e-05, "loss": 0.0098, "step": 42300 }, { "epoch": 63.43328335832084, "grad_norm": 0.21818901598453522, "learning_rate": 6.65333487575467e-05, "loss": 0.0084, "step": 42310 }, { "epoch": 63.44827586206897, "grad_norm": 0.25919175148010254, "learning_rate": 6.651774327366422e-05, "loss": 0.0096, "step": 42320 }, { "epoch": 63.46326836581709, "grad_norm": 0.18502208590507507, "learning_rate": 6.650213598342613e-05, "loss": 0.0092, "step": 42330 }, { "epoch": 63.47826086956522, "grad_norm": 0.2548598647117615, "learning_rate": 6.648652688853931e-05, "loss": 0.0093, "step": 42340 }, { "epoch": 63.493253373313344, "grad_norm": 0.21020476520061493, "learning_rate": 6.647091599071066e-05, "loss": 0.009, "step": 42350 }, { "epoch": 63.50824587706147, "grad_norm": 0.1791004091501236, "learning_rate": 6.645530329164743e-05, "loss": 0.0077, "step": 42360 }, { "epoch": 63.523238380809595, "grad_norm": 0.24569955468177795, "learning_rate": 6.643968879305694e-05, "loss": 0.0096, "step": 42370 }, { "epoch": 63.53823088455772, "grad_norm": 0.2638804018497467, "learning_rate": 6.642407249664683e-05, "loss": 0.0104, "step": 42380 }, { "epoch": 63.553223388305845, "grad_norm": 0.22235912084579468, "learning_rate": 6.640845440412483e-05, "loss": 0.0115, "step": 42390 }, { "epoch": 63.56821589205397, "grad_norm": 0.27674007415771484, "learning_rate": 6.639283451719893e-05, "loss": 0.009, "step": 42400 }, { "epoch": 63.583208395802096, "grad_norm": 0.469996839761734, "learning_rate": 6.637721283757729e-05, "loss": 0.0084, "step": 42410 }, { "epoch": 63.59820089955023, "grad_norm": 0.2505301833152771, "learning_rate": 6.636158936696828e-05, "loss": 0.0095, "step": 42420 }, { "epoch": 63.613193403298354, "grad_norm": 0.31550031900405884, "learning_rate": 6.634596410708047e-05, "loss": 0.0101, "step": 42430 }, { "epoch": 63.62818590704648, "grad_norm": 0.16535848379135132, "learning_rate": 6.633033705962257e-05, "loss": 0.0081, "step": 42440 }, { "epoch": 63.643178410794604, "grad_norm": 0.20312577486038208, "learning_rate": 6.631470822630359e-05, "loss": 0.0111, "step": 42450 }, { "epoch": 63.65817091454273, "grad_norm": 0.2110724300146103, "learning_rate": 6.629907760883263e-05, "loss": 0.0088, "step": 42460 }, { "epoch": 63.673163418290855, "grad_norm": 0.259861558675766, "learning_rate": 6.628344520891905e-05, "loss": 0.0092, "step": 42470 }, { "epoch": 63.68815592203898, "grad_norm": 0.16416431963443756, "learning_rate": 6.62678110282724e-05, "loss": 0.0091, "step": 42480 }, { "epoch": 63.703148425787106, "grad_norm": 0.3204078674316406, "learning_rate": 6.625217506860237e-05, "loss": 0.0113, "step": 42490 }, { "epoch": 63.71814092953523, "grad_norm": 0.23495963215827942, "learning_rate": 6.623653733161892e-05, "loss": 0.0087, "step": 42500 }, { "epoch": 63.733133433283356, "grad_norm": 0.20671342313289642, "learning_rate": 6.622089781903217e-05, "loss": 0.008, "step": 42510 }, { "epoch": 63.74812593703148, "grad_norm": 0.19495277106761932, "learning_rate": 6.620525653255241e-05, "loss": 0.0079, "step": 42520 }, { "epoch": 63.76311844077961, "grad_norm": 0.20290344953536987, "learning_rate": 6.618961347389015e-05, "loss": 0.0071, "step": 42530 }, { "epoch": 63.77811094452774, "grad_norm": 0.13163910806179047, "learning_rate": 6.617396864475613e-05, "loss": 0.0096, "step": 42540 }, { "epoch": 63.793103448275865, "grad_norm": 0.18339630961418152, "learning_rate": 6.61583220468612e-05, "loss": 0.007, "step": 42550 }, { "epoch": 63.80809595202399, "grad_norm": 0.22720125317573547, "learning_rate": 6.614267368191645e-05, "loss": 0.0093, "step": 42560 }, { "epoch": 63.823088455772115, "grad_norm": 0.2379552572965622, "learning_rate": 6.61270235516332e-05, "loss": 0.0093, "step": 42570 }, { "epoch": 63.83808095952024, "grad_norm": 0.1883123815059662, "learning_rate": 6.611137165772287e-05, "loss": 0.01, "step": 42580 }, { "epoch": 63.853073463268366, "grad_norm": 0.17418815195560455, "learning_rate": 6.60957180018972e-05, "loss": 0.0094, "step": 42590 }, { "epoch": 63.86806596701649, "grad_norm": 0.23384465277194977, "learning_rate": 6.608006258586797e-05, "loss": 0.0087, "step": 42600 }, { "epoch": 63.88305847076462, "grad_norm": 0.20343457162380219, "learning_rate": 6.606440541134731e-05, "loss": 0.0099, "step": 42610 }, { "epoch": 63.89805097451274, "grad_norm": 0.1526123434305191, "learning_rate": 6.60487464800474e-05, "loss": 0.0116, "step": 42620 }, { "epoch": 63.91304347826087, "grad_norm": 0.1456962674856186, "learning_rate": 6.603308579368071e-05, "loss": 0.0087, "step": 42630 }, { "epoch": 63.92803598200899, "grad_norm": 0.1668677181005478, "learning_rate": 6.601742335395987e-05, "loss": 0.0071, "step": 42640 }, { "epoch": 63.94302848575712, "grad_norm": 0.2303205281496048, "learning_rate": 6.600175916259769e-05, "loss": 0.0103, "step": 42650 }, { "epoch": 63.95802098950525, "grad_norm": 0.2836666703224182, "learning_rate": 6.598609322130718e-05, "loss": 0.0125, "step": 42660 }, { "epoch": 63.973013493253376, "grad_norm": 0.1459188461303711, "learning_rate": 6.597042553180155e-05, "loss": 0.0073, "step": 42670 }, { "epoch": 63.9880059970015, "grad_norm": 0.21863830089569092, "learning_rate": 6.59547560957942e-05, "loss": 0.01, "step": 42680 }, { "epoch": 64.00299850074963, "grad_norm": 0.208054780960083, "learning_rate": 6.59390849149987e-05, "loss": 0.0081, "step": 42690 }, { "epoch": 64.01799100449774, "grad_norm": 0.13120394945144653, "learning_rate": 6.592341199112886e-05, "loss": 0.0101, "step": 42700 }, { "epoch": 64.03298350824588, "grad_norm": 0.1723673939704895, "learning_rate": 6.590773732589861e-05, "loss": 0.0114, "step": 42710 }, { "epoch": 64.04797601199401, "grad_norm": 0.2096526175737381, "learning_rate": 6.589206092102213e-05, "loss": 0.009, "step": 42720 }, { "epoch": 64.06296851574213, "grad_norm": 0.2670370936393738, "learning_rate": 6.587638277821375e-05, "loss": 0.0105, "step": 42730 }, { "epoch": 64.07796101949026, "grad_norm": 0.22993430495262146, "learning_rate": 6.586070289918803e-05, "loss": 0.0088, "step": 42740 }, { "epoch": 64.09295352323838, "grad_norm": 0.4255233705043793, "learning_rate": 6.584502128565968e-05, "loss": 0.0092, "step": 42750 }, { "epoch": 64.10794602698651, "grad_norm": 0.22983892261981964, "learning_rate": 6.582933793934363e-05, "loss": 0.0086, "step": 42760 }, { "epoch": 64.12293853073463, "grad_norm": 0.18695086240768433, "learning_rate": 6.581365286195498e-05, "loss": 0.0079, "step": 42770 }, { "epoch": 64.13793103448276, "grad_norm": 0.17640523612499237, "learning_rate": 6.579796605520903e-05, "loss": 0.0077, "step": 42780 }, { "epoch": 64.15292353823088, "grad_norm": 0.23675693571567535, "learning_rate": 6.578227752082127e-05, "loss": 0.0098, "step": 42790 }, { "epoch": 64.16791604197901, "grad_norm": 0.2668551206588745, "learning_rate": 6.576658726050735e-05, "loss": 0.0105, "step": 42800 }, { "epoch": 64.18290854572713, "grad_norm": 0.19690535962581635, "learning_rate": 6.575089527598316e-05, "loss": 0.0092, "step": 42810 }, { "epoch": 64.19790104947526, "grad_norm": 0.11941397190093994, "learning_rate": 6.573520156896476e-05, "loss": 0.0095, "step": 42820 }, { "epoch": 64.2128935532234, "grad_norm": 0.19175398349761963, "learning_rate": 6.571950614116835e-05, "loss": 0.009, "step": 42830 }, { "epoch": 64.22788605697151, "grad_norm": 0.3556591272354126, "learning_rate": 6.570380899431039e-05, "loss": 0.0091, "step": 42840 }, { "epoch": 64.24287856071965, "grad_norm": 0.14099465310573578, "learning_rate": 6.568811013010749e-05, "loss": 0.0098, "step": 42850 }, { "epoch": 64.25787106446776, "grad_norm": 0.2639104127883911, "learning_rate": 6.567240955027645e-05, "loss": 0.0082, "step": 42860 }, { "epoch": 64.2728635682159, "grad_norm": 0.18660469353199005, "learning_rate": 6.565670725653427e-05, "loss": 0.0099, "step": 42870 }, { "epoch": 64.28785607196401, "grad_norm": 0.16675277054309845, "learning_rate": 6.564100325059812e-05, "loss": 0.0084, "step": 42880 }, { "epoch": 64.30284857571215, "grad_norm": 0.19179369509220123, "learning_rate": 6.562529753418538e-05, "loss": 0.0079, "step": 42890 }, { "epoch": 64.31784107946027, "grad_norm": 0.2912796437740326, "learning_rate": 6.56095901090136e-05, "loss": 0.0091, "step": 42900 }, { "epoch": 64.3328335832084, "grad_norm": 0.20263373851776123, "learning_rate": 6.55938809768005e-05, "loss": 0.0089, "step": 42910 }, { "epoch": 64.34782608695652, "grad_norm": 0.19870147109031677, "learning_rate": 6.557817013926402e-05, "loss": 0.0101, "step": 42920 }, { "epoch": 64.36281859070465, "grad_norm": 0.31779342889785767, "learning_rate": 6.556245759812227e-05, "loss": 0.0125, "step": 42930 }, { "epoch": 64.37781109445277, "grad_norm": 0.29717254638671875, "learning_rate": 6.554674335509357e-05, "loss": 0.0104, "step": 42940 }, { "epoch": 64.3928035982009, "grad_norm": 0.1642453372478485, "learning_rate": 6.553102741189638e-05, "loss": 0.0118, "step": 42950 }, { "epoch": 64.40779610194903, "grad_norm": 0.18631602823734283, "learning_rate": 6.55153097702494e-05, "loss": 0.0091, "step": 42960 }, { "epoch": 64.42278860569715, "grad_norm": 0.21303972601890564, "learning_rate": 6.549959043187146e-05, "loss": 0.0104, "step": 42970 }, { "epoch": 64.43778110944528, "grad_norm": 0.2072198987007141, "learning_rate": 6.54838693984816e-05, "loss": 0.0082, "step": 42980 }, { "epoch": 64.4527736131934, "grad_norm": 0.2127680629491806, "learning_rate": 6.546814667179909e-05, "loss": 0.0081, "step": 42990 }, { "epoch": 64.46776611694153, "grad_norm": 0.13266462087631226, "learning_rate": 6.545242225354328e-05, "loss": 0.007, "step": 43000 }, { "epoch": 64.48275862068965, "grad_norm": 0.15114013850688934, "learning_rate": 6.543669614543383e-05, "loss": 0.0098, "step": 43010 }, { "epoch": 64.49775112443778, "grad_norm": 0.21054039895534515, "learning_rate": 6.542096834919049e-05, "loss": 0.0097, "step": 43020 }, { "epoch": 64.5127436281859, "grad_norm": 0.21371959149837494, "learning_rate": 6.540523886653324e-05, "loss": 0.0085, "step": 43030 }, { "epoch": 64.52773613193403, "grad_norm": 0.2559235095977783, "learning_rate": 6.538950769918222e-05, "loss": 0.0091, "step": 43040 }, { "epoch": 64.54272863568215, "grad_norm": 0.1355641782283783, "learning_rate": 6.537377484885779e-05, "loss": 0.0115, "step": 43050 }, { "epoch": 64.55772113943029, "grad_norm": 0.18095211684703827, "learning_rate": 6.535804031728045e-05, "loss": 0.0069, "step": 43060 }, { "epoch": 64.57271364317842, "grad_norm": 0.1677771657705307, "learning_rate": 6.534230410617092e-05, "loss": 0.0098, "step": 43070 }, { "epoch": 64.58770614692654, "grad_norm": 0.19299638271331787, "learning_rate": 6.532656621725007e-05, "loss": 0.0084, "step": 43080 }, { "epoch": 64.60269865067467, "grad_norm": 0.19038823246955872, "learning_rate": 6.531082665223898e-05, "loss": 0.0081, "step": 43090 }, { "epoch": 64.61769115442279, "grad_norm": 0.261261910200119, "learning_rate": 6.529508541285889e-05, "loss": 0.0098, "step": 43100 }, { "epoch": 64.63268365817092, "grad_norm": 0.222450390458107, "learning_rate": 6.527934250083128e-05, "loss": 0.0074, "step": 43110 }, { "epoch": 64.64767616191904, "grad_norm": 0.29058837890625, "learning_rate": 6.526359791787775e-05, "loss": 0.0073, "step": 43120 }, { "epoch": 64.66266866566717, "grad_norm": 0.2229239046573639, "learning_rate": 6.524785166572009e-05, "loss": 0.0085, "step": 43130 }, { "epoch": 64.67766116941529, "grad_norm": 0.2094084471464157, "learning_rate": 6.523210374608029e-05, "loss": 0.0129, "step": 43140 }, { "epoch": 64.69265367316342, "grad_norm": 0.33048003911972046, "learning_rate": 6.521635416068054e-05, "loss": 0.0101, "step": 43150 }, { "epoch": 64.70764617691154, "grad_norm": 0.17382554709911346, "learning_rate": 6.520060291124317e-05, "loss": 0.0108, "step": 43160 }, { "epoch": 64.72263868065967, "grad_norm": 0.16892297565937042, "learning_rate": 6.518484999949072e-05, "loss": 0.009, "step": 43170 }, { "epoch": 64.7376311844078, "grad_norm": 0.22539904713630676, "learning_rate": 6.516909542714591e-05, "loss": 0.0095, "step": 43180 }, { "epoch": 64.75262368815592, "grad_norm": 0.23465900123119354, "learning_rate": 6.515333919593161e-05, "loss": 0.0095, "step": 43190 }, { "epoch": 64.76761619190405, "grad_norm": 0.2696768045425415, "learning_rate": 6.513758130757094e-05, "loss": 0.0081, "step": 43200 }, { "epoch": 64.78260869565217, "grad_norm": 0.21731899678707123, "learning_rate": 6.512182176378713e-05, "loss": 0.0093, "step": 43210 }, { "epoch": 64.7976011994003, "grad_norm": 0.19995656609535217, "learning_rate": 6.510606056630362e-05, "loss": 0.0085, "step": 43220 }, { "epoch": 64.81259370314842, "grad_norm": 0.22978469729423523, "learning_rate": 6.509029771684405e-05, "loss": 0.0136, "step": 43230 }, { "epoch": 64.82758620689656, "grad_norm": 0.26568686962127686, "learning_rate": 6.50745332171322e-05, "loss": 0.0094, "step": 43240 }, { "epoch": 64.84257871064467, "grad_norm": 0.1869182586669922, "learning_rate": 6.505876706889207e-05, "loss": 0.0082, "step": 43250 }, { "epoch": 64.8575712143928, "grad_norm": 0.22258694469928741, "learning_rate": 6.504299927384781e-05, "loss": 0.012, "step": 43260 }, { "epoch": 64.87256371814092, "grad_norm": 0.1958281695842743, "learning_rate": 6.502722983372378e-05, "loss": 0.0088, "step": 43270 }, { "epoch": 64.88755622188906, "grad_norm": 0.16433487832546234, "learning_rate": 6.501145875024446e-05, "loss": 0.0078, "step": 43280 }, { "epoch": 64.90254872563717, "grad_norm": 0.15582339465618134, "learning_rate": 6.499568602513459e-05, "loss": 0.0079, "step": 43290 }, { "epoch": 64.91754122938531, "grad_norm": 0.22899949550628662, "learning_rate": 6.497991166011903e-05, "loss": 0.0092, "step": 43300 }, { "epoch": 64.93253373313344, "grad_norm": 0.1752614825963974, "learning_rate": 6.49641356569229e-05, "loss": 0.0081, "step": 43310 }, { "epoch": 64.94752623688156, "grad_norm": 0.22367772459983826, "learning_rate": 6.494835801727135e-05, "loss": 0.009, "step": 43320 }, { "epoch": 64.96251874062969, "grad_norm": 0.24942325055599213, "learning_rate": 6.493257874288986e-05, "loss": 0.0078, "step": 43330 }, { "epoch": 64.97751124437781, "grad_norm": 0.20661887526512146, "learning_rate": 6.491679783550399e-05, "loss": 0.0079, "step": 43340 }, { "epoch": 64.99250374812594, "grad_norm": 0.4800874590873718, "learning_rate": 6.490101529683957e-05, "loss": 0.0108, "step": 43350 }, { "epoch": 65.00749625187406, "grad_norm": 0.19111311435699463, "learning_rate": 6.488523112862248e-05, "loss": 0.0102, "step": 43360 }, { "epoch": 65.02248875562219, "grad_norm": 0.2716827094554901, "learning_rate": 6.486944533257891e-05, "loss": 0.0096, "step": 43370 }, { "epoch": 65.03748125937031, "grad_norm": 0.21145065128803253, "learning_rate": 6.485365791043517e-05, "loss": 0.0079, "step": 43380 }, { "epoch": 65.05247376311844, "grad_norm": 0.1520065814256668, "learning_rate": 6.483786886391772e-05, "loss": 0.0095, "step": 43390 }, { "epoch": 65.06746626686656, "grad_norm": 0.29219940304756165, "learning_rate": 6.482207819475323e-05, "loss": 0.011, "step": 43400 }, { "epoch": 65.08245877061469, "grad_norm": 0.21106480062007904, "learning_rate": 6.480628590466857e-05, "loss": 0.0091, "step": 43410 }, { "epoch": 65.09745127436283, "grad_norm": 0.3334713280200958, "learning_rate": 6.479049199539074e-05, "loss": 0.0086, "step": 43420 }, { "epoch": 65.11244377811094, "grad_norm": 0.22618518769741058, "learning_rate": 6.477469646864693e-05, "loss": 0.0106, "step": 43430 }, { "epoch": 65.12743628185908, "grad_norm": 0.21762311458587646, "learning_rate": 6.475889932616454e-05, "loss": 0.0118, "step": 43440 }, { "epoch": 65.1424287856072, "grad_norm": 0.301949143409729, "learning_rate": 6.474310056967111e-05, "loss": 0.009, "step": 43450 }, { "epoch": 65.15742128935533, "grad_norm": 0.24903523921966553, "learning_rate": 6.472730020089437e-05, "loss": 0.0097, "step": 43460 }, { "epoch": 65.17241379310344, "grad_norm": 0.2318630963563919, "learning_rate": 6.471149822156222e-05, "loss": 0.0099, "step": 43470 }, { "epoch": 65.18740629685158, "grad_norm": 0.20891189575195312, "learning_rate": 6.469569463340275e-05, "loss": 0.0112, "step": 43480 }, { "epoch": 65.2023988005997, "grad_norm": 0.30476102232933044, "learning_rate": 6.467988943814418e-05, "loss": 0.009, "step": 43490 }, { "epoch": 65.21739130434783, "grad_norm": 0.20243801176548004, "learning_rate": 6.4664082637515e-05, "loss": 0.0118, "step": 43500 }, { "epoch": 65.23238380809595, "grad_norm": 0.29568764567375183, "learning_rate": 6.464827423324377e-05, "loss": 0.011, "step": 43510 }, { "epoch": 65.24737631184408, "grad_norm": 0.33471494913101196, "learning_rate": 6.463246422705931e-05, "loss": 0.0081, "step": 43520 }, { "epoch": 65.2623688155922, "grad_norm": 0.21376553177833557, "learning_rate": 6.461665262069055e-05, "loss": 0.0089, "step": 43530 }, { "epoch": 65.27736131934033, "grad_norm": 0.1861022710800171, "learning_rate": 6.460083941586665e-05, "loss": 0.0087, "step": 43540 }, { "epoch": 65.29235382308846, "grad_norm": 0.20566166937351227, "learning_rate": 6.45850246143169e-05, "loss": 0.0085, "step": 43550 }, { "epoch": 65.30734632683658, "grad_norm": 0.21982383728027344, "learning_rate": 6.456920821777077e-05, "loss": 0.0105, "step": 43560 }, { "epoch": 65.32233883058471, "grad_norm": 0.21340447664260864, "learning_rate": 6.455339022795796e-05, "loss": 0.0113, "step": 43570 }, { "epoch": 65.33733133433283, "grad_norm": 0.16766473650932312, "learning_rate": 6.453757064660825e-05, "loss": 0.0069, "step": 43580 }, { "epoch": 65.35232383808096, "grad_norm": 0.2078612595796585, "learning_rate": 6.452174947545169e-05, "loss": 0.0079, "step": 43590 }, { "epoch": 65.36731634182908, "grad_norm": 0.3976500630378723, "learning_rate": 6.450592671621842e-05, "loss": 0.0079, "step": 43600 }, { "epoch": 65.38230884557721, "grad_norm": 0.25417181849479675, "learning_rate": 6.449010237063882e-05, "loss": 0.0095, "step": 43610 }, { "epoch": 65.39730134932533, "grad_norm": 0.3108106255531311, "learning_rate": 6.447427644044343e-05, "loss": 0.0104, "step": 43620 }, { "epoch": 65.41229385307346, "grad_norm": 0.2052381932735443, "learning_rate": 6.44584489273629e-05, "loss": 0.0099, "step": 43630 }, { "epoch": 65.42728635682158, "grad_norm": 0.1985926628112793, "learning_rate": 6.444261983312817e-05, "loss": 0.0087, "step": 43640 }, { "epoch": 65.44227886056971, "grad_norm": 0.1593591570854187, "learning_rate": 6.442678915947023e-05, "loss": 0.0089, "step": 43650 }, { "epoch": 65.45727136431785, "grad_norm": 0.35633060336112976, "learning_rate": 6.441095690812032e-05, "loss": 0.0091, "step": 43660 }, { "epoch": 65.47226386806597, "grad_norm": 0.26780569553375244, "learning_rate": 6.439512308080982e-05, "loss": 0.0097, "step": 43670 }, { "epoch": 65.4872563718141, "grad_norm": 0.22512829303741455, "learning_rate": 6.43792876792703e-05, "loss": 0.0081, "step": 43680 }, { "epoch": 65.50224887556222, "grad_norm": 0.14307929575443268, "learning_rate": 6.43634507052335e-05, "loss": 0.0102, "step": 43690 }, { "epoch": 65.51724137931035, "grad_norm": 0.2624329626560211, "learning_rate": 6.434761216043135e-05, "loss": 0.0094, "step": 43700 }, { "epoch": 65.53223388305847, "grad_norm": 0.5546102523803711, "learning_rate": 6.433177204659589e-05, "loss": 0.0116, "step": 43710 }, { "epoch": 65.5472263868066, "grad_norm": 0.22463969886302948, "learning_rate": 6.431593036545938e-05, "loss": 0.0099, "step": 43720 }, { "epoch": 65.56221889055472, "grad_norm": 0.2403312772512436, "learning_rate": 6.430008711875426e-05, "loss": 0.0095, "step": 43730 }, { "epoch": 65.57721139430285, "grad_norm": 0.26585543155670166, "learning_rate": 6.42842423082131e-05, "loss": 0.0079, "step": 43740 }, { "epoch": 65.59220389805097, "grad_norm": 0.2388473004102707, "learning_rate": 6.426839593556868e-05, "loss": 0.0091, "step": 43750 }, { "epoch": 65.6071964017991, "grad_norm": 0.18724679946899414, "learning_rate": 6.425254800255392e-05, "loss": 0.0093, "step": 43760 }, { "epoch": 65.62218890554723, "grad_norm": 0.18334046006202698, "learning_rate": 6.423669851090196e-05, "loss": 0.0077, "step": 43770 }, { "epoch": 65.63718140929535, "grad_norm": 0.13830798864364624, "learning_rate": 6.422084746234604e-05, "loss": 0.0088, "step": 43780 }, { "epoch": 65.65217391304348, "grad_norm": 0.1851339489221573, "learning_rate": 6.420499485861965e-05, "loss": 0.01, "step": 43790 }, { "epoch": 65.6671664167916, "grad_norm": 0.24458755552768707, "learning_rate": 6.418914070145636e-05, "loss": 0.0078, "step": 43800 }, { "epoch": 65.68215892053973, "grad_norm": 0.31977522373199463, "learning_rate": 6.417328499258996e-05, "loss": 0.0075, "step": 43810 }, { "epoch": 65.69715142428785, "grad_norm": 0.24735450744628906, "learning_rate": 6.415742773375446e-05, "loss": 0.0091, "step": 43820 }, { "epoch": 65.71214392803599, "grad_norm": 0.25109267234802246, "learning_rate": 6.414156892668393e-05, "loss": 0.0108, "step": 43830 }, { "epoch": 65.7271364317841, "grad_norm": 0.13959532976150513, "learning_rate": 6.412570857311267e-05, "loss": 0.0067, "step": 43840 }, { "epoch": 65.74212893553224, "grad_norm": 0.14699849486351013, "learning_rate": 6.410984667477518e-05, "loss": 0.0088, "step": 43850 }, { "epoch": 65.75712143928035, "grad_norm": 0.18658088147640228, "learning_rate": 6.409398323340607e-05, "loss": 0.0097, "step": 43860 }, { "epoch": 65.77211394302849, "grad_norm": 0.2406596541404724, "learning_rate": 6.407811825074012e-05, "loss": 0.0107, "step": 43870 }, { "epoch": 65.7871064467766, "grad_norm": 0.1427861601114273, "learning_rate": 6.406225172851234e-05, "loss": 0.0091, "step": 43880 }, { "epoch": 65.80209895052474, "grad_norm": 0.1495729684829712, "learning_rate": 6.404638366845786e-05, "loss": 0.0079, "step": 43890 }, { "epoch": 65.81709145427287, "grad_norm": 0.14845891296863556, "learning_rate": 6.403051407231196e-05, "loss": 0.0087, "step": 43900 }, { "epoch": 65.83208395802099, "grad_norm": 0.19168205559253693, "learning_rate": 6.401464294181016e-05, "loss": 0.01, "step": 43910 }, { "epoch": 65.84707646176912, "grad_norm": 0.19133298099040985, "learning_rate": 6.399877027868808e-05, "loss": 0.0067, "step": 43920 }, { "epoch": 65.86206896551724, "grad_norm": 0.13548362255096436, "learning_rate": 6.39828960846815e-05, "loss": 0.0085, "step": 43930 }, { "epoch": 65.87706146926537, "grad_norm": 0.15664005279541016, "learning_rate": 6.396702036152644e-05, "loss": 0.0103, "step": 43940 }, { "epoch": 65.89205397301349, "grad_norm": 0.21562178432941437, "learning_rate": 6.395114311095905e-05, "loss": 0.0096, "step": 43950 }, { "epoch": 65.90704647676162, "grad_norm": 0.1602819859981537, "learning_rate": 6.393526433471559e-05, "loss": 0.0096, "step": 43960 }, { "epoch": 65.92203898050974, "grad_norm": 0.24870933592319489, "learning_rate": 6.391938403453262e-05, "loss": 0.0092, "step": 43970 }, { "epoch": 65.93703148425787, "grad_norm": 0.22714532911777496, "learning_rate": 6.390350221214671e-05, "loss": 0.0105, "step": 43980 }, { "epoch": 65.95202398800599, "grad_norm": 3.025848627090454, "learning_rate": 6.38876188692947e-05, "loss": 0.0117, "step": 43990 }, { "epoch": 65.96701649175412, "grad_norm": 0.2243766188621521, "learning_rate": 6.387173400771358e-05, "loss": 0.0115, "step": 44000 }, { "epoch": 65.98200899550226, "grad_norm": 0.24706105887889862, "learning_rate": 6.385584762914047e-05, "loss": 0.0086, "step": 44010 }, { "epoch": 65.99700149925037, "grad_norm": 0.18296168744564056, "learning_rate": 6.38399597353127e-05, "loss": 0.0071, "step": 44020 }, { "epoch": 66.0119940029985, "grad_norm": 0.24505023658275604, "learning_rate": 6.382407032796775e-05, "loss": 0.0108, "step": 44030 }, { "epoch": 66.02698650674662, "grad_norm": 0.1981518566608429, "learning_rate": 6.380817940884325e-05, "loss": 0.0095, "step": 44040 }, { "epoch": 66.04197901049476, "grad_norm": 0.12713268399238586, "learning_rate": 6.379228697967702e-05, "loss": 0.0101, "step": 44050 }, { "epoch": 66.05697151424287, "grad_norm": 0.25319555401802063, "learning_rate": 6.3776393042207e-05, "loss": 0.0091, "step": 44060 }, { "epoch": 66.07196401799101, "grad_norm": 0.2222716361284256, "learning_rate": 6.376049759817137e-05, "loss": 0.0078, "step": 44070 }, { "epoch": 66.08695652173913, "grad_norm": 0.19906094670295715, "learning_rate": 6.37446006493084e-05, "loss": 0.0088, "step": 44080 }, { "epoch": 66.10194902548726, "grad_norm": 1.207668662071228, "learning_rate": 6.372870219735656e-05, "loss": 0.0086, "step": 44090 }, { "epoch": 66.11694152923538, "grad_norm": 0.21781238913536072, "learning_rate": 6.371280224405452e-05, "loss": 0.0087, "step": 44100 }, { "epoch": 66.13193403298351, "grad_norm": 0.28848719596862793, "learning_rate": 6.369690079114101e-05, "loss": 0.0085, "step": 44110 }, { "epoch": 66.14692653673164, "grad_norm": 0.19575408101081848, "learning_rate": 6.368099784035504e-05, "loss": 0.0078, "step": 44120 }, { "epoch": 66.16191904047976, "grad_norm": 0.19261565804481506, "learning_rate": 6.366509339343572e-05, "loss": 0.0075, "step": 44130 }, { "epoch": 66.17691154422789, "grad_norm": 0.17022596299648285, "learning_rate": 6.364918745212232e-05, "loss": 0.0089, "step": 44140 }, { "epoch": 66.19190404797601, "grad_norm": 0.2868979871273041, "learning_rate": 6.363328001815431e-05, "loss": 0.0095, "step": 44150 }, { "epoch": 66.20689655172414, "grad_norm": 0.268483430147171, "learning_rate": 6.361737109327128e-05, "loss": 0.0091, "step": 44160 }, { "epoch": 66.22188905547226, "grad_norm": 0.23864857852458954, "learning_rate": 6.360146067921301e-05, "loss": 0.0103, "step": 44170 }, { "epoch": 66.2368815592204, "grad_norm": 0.12785160541534424, "learning_rate": 6.35855487777195e-05, "loss": 0.0085, "step": 44180 }, { "epoch": 66.25187406296851, "grad_norm": 0.2765457332134247, "learning_rate": 6.356963539053076e-05, "loss": 0.0101, "step": 44190 }, { "epoch": 66.26686656671664, "grad_norm": 0.24089297652244568, "learning_rate": 6.355372051938711e-05, "loss": 0.0081, "step": 44200 }, { "epoch": 66.28185907046476, "grad_norm": 0.24110978841781616, "learning_rate": 6.353780416602894e-05, "loss": 0.01, "step": 44210 }, { "epoch": 66.2968515742129, "grad_norm": 0.19571258127689362, "learning_rate": 6.352188633219689e-05, "loss": 0.0114, "step": 44220 }, { "epoch": 66.31184407796101, "grad_norm": 0.18938061594963074, "learning_rate": 6.350596701963166e-05, "loss": 0.0092, "step": 44230 }, { "epoch": 66.32683658170914, "grad_norm": 0.2585778534412384, "learning_rate": 6.349004623007419e-05, "loss": 0.0077, "step": 44240 }, { "epoch": 66.34182908545728, "grad_norm": 0.14538124203681946, "learning_rate": 6.347412396526555e-05, "loss": 0.0081, "step": 44250 }, { "epoch": 66.3568215892054, "grad_norm": 0.3193557858467102, "learning_rate": 6.345820022694696e-05, "loss": 0.0111, "step": 44260 }, { "epoch": 66.37181409295353, "grad_norm": 0.21011674404144287, "learning_rate": 6.344227501685984e-05, "loss": 0.0102, "step": 44270 }, { "epoch": 66.38680659670165, "grad_norm": 0.22650887072086334, "learning_rate": 6.342634833674572e-05, "loss": 0.008, "step": 44280 }, { "epoch": 66.40179910044978, "grad_norm": 0.31212618947029114, "learning_rate": 6.341042018834635e-05, "loss": 0.0106, "step": 44290 }, { "epoch": 66.4167916041979, "grad_norm": 0.24608097970485687, "learning_rate": 6.339449057340359e-05, "loss": 0.0074, "step": 44300 }, { "epoch": 66.43178410794603, "grad_norm": 0.2838013470172882, "learning_rate": 6.337855949365945e-05, "loss": 0.0097, "step": 44310 }, { "epoch": 66.44677661169415, "grad_norm": 0.17686417698860168, "learning_rate": 6.336262695085619e-05, "loss": 0.0121, "step": 44320 }, { "epoch": 66.46176911544228, "grad_norm": 0.1828097105026245, "learning_rate": 6.334669294673612e-05, "loss": 0.0076, "step": 44330 }, { "epoch": 66.4767616191904, "grad_norm": 0.19082069396972656, "learning_rate": 6.333075748304179e-05, "loss": 0.0072, "step": 44340 }, { "epoch": 66.49175412293853, "grad_norm": 0.2184850573539734, "learning_rate": 6.331482056151585e-05, "loss": 0.0104, "step": 44350 }, { "epoch": 66.50674662668666, "grad_norm": 0.31474626064300537, "learning_rate": 6.329888218390117e-05, "loss": 0.0087, "step": 44360 }, { "epoch": 66.52173913043478, "grad_norm": 0.1756933480501175, "learning_rate": 6.328294235194072e-05, "loss": 0.0079, "step": 44370 }, { "epoch": 66.53673163418291, "grad_norm": 0.287635862827301, "learning_rate": 6.326700106737765e-05, "loss": 0.0098, "step": 44380 }, { "epoch": 66.55172413793103, "grad_norm": 0.28413861989974976, "learning_rate": 6.325105833195531e-05, "loss": 0.0082, "step": 44390 }, { "epoch": 66.56671664167916, "grad_norm": 0.608826756477356, "learning_rate": 6.323511414741715e-05, "loss": 0.0094, "step": 44400 }, { "epoch": 66.58170914542728, "grad_norm": 0.14398466050624847, "learning_rate": 6.321916851550678e-05, "loss": 0.0092, "step": 44410 }, { "epoch": 66.59670164917542, "grad_norm": 0.1791120171546936, "learning_rate": 6.320322143796806e-05, "loss": 0.0104, "step": 44420 }, { "epoch": 66.61169415292353, "grad_norm": 0.23910771310329437, "learning_rate": 6.318727291654488e-05, "loss": 0.0122, "step": 44430 }, { "epoch": 66.62668665667167, "grad_norm": 0.4466886818408966, "learning_rate": 6.317132295298134e-05, "loss": 0.0093, "step": 44440 }, { "epoch": 66.64167916041978, "grad_norm": 0.171759694814682, "learning_rate": 6.315537154902173e-05, "loss": 0.009, "step": 44450 }, { "epoch": 66.65667166416792, "grad_norm": 0.20763541758060455, "learning_rate": 6.313941870641048e-05, "loss": 0.0079, "step": 44460 }, { "epoch": 66.67166416791605, "grad_norm": 0.23782101273536682, "learning_rate": 6.312346442689214e-05, "loss": 0.0108, "step": 44470 }, { "epoch": 66.68665667166417, "grad_norm": 0.31570085883140564, "learning_rate": 6.310750871221147e-05, "loss": 0.0103, "step": 44480 }, { "epoch": 66.7016491754123, "grad_norm": 0.1804061383008957, "learning_rate": 6.309155156411335e-05, "loss": 0.0076, "step": 44490 }, { "epoch": 66.71664167916042, "grad_norm": 0.23336544632911682, "learning_rate": 6.307559298434284e-05, "loss": 0.0085, "step": 44500 }, { "epoch": 66.73163418290855, "grad_norm": 0.2066640853881836, "learning_rate": 6.305963297464512e-05, "loss": 0.0082, "step": 44510 }, { "epoch": 66.74662668665667, "grad_norm": 0.20284900069236755, "learning_rate": 6.304367153676561e-05, "loss": 0.0074, "step": 44520 }, { "epoch": 66.7616191904048, "grad_norm": 0.181282639503479, "learning_rate": 6.302770867244978e-05, "loss": 0.0082, "step": 44530 }, { "epoch": 66.77661169415292, "grad_norm": 0.20196980237960815, "learning_rate": 6.301174438344328e-05, "loss": 0.01, "step": 44540 }, { "epoch": 66.79160419790105, "grad_norm": 2.6330339908599854, "learning_rate": 6.299577867149202e-05, "loss": 0.0088, "step": 44550 }, { "epoch": 66.80659670164917, "grad_norm": 0.20209796726703644, "learning_rate": 6.297981153834192e-05, "loss": 0.0094, "step": 44560 }, { "epoch": 66.8215892053973, "grad_norm": 0.14902900159358978, "learning_rate": 6.296384298573916e-05, "loss": 0.0088, "step": 44570 }, { "epoch": 66.83658170914542, "grad_norm": 0.24672646820545197, "learning_rate": 6.294787301543001e-05, "loss": 0.0097, "step": 44580 }, { "epoch": 66.85157421289355, "grad_norm": 0.2502458095550537, "learning_rate": 6.293190162916095e-05, "loss": 0.0125, "step": 44590 }, { "epoch": 66.86656671664169, "grad_norm": 0.1689179390668869, "learning_rate": 6.291592882867855e-05, "loss": 0.0153, "step": 44600 }, { "epoch": 66.8815592203898, "grad_norm": 0.22048108279705048, "learning_rate": 6.28999546157296e-05, "loss": 0.0074, "step": 44610 }, { "epoch": 66.89655172413794, "grad_norm": 0.19120271503925323, "learning_rate": 6.288397899206102e-05, "loss": 0.0082, "step": 44620 }, { "epoch": 66.91154422788605, "grad_norm": 0.2576259970664978, "learning_rate": 6.286800195941984e-05, "loss": 0.0087, "step": 44630 }, { "epoch": 66.92653673163419, "grad_norm": 0.21188749372959137, "learning_rate": 6.285202351955334e-05, "loss": 0.007, "step": 44640 }, { "epoch": 66.9415292353823, "grad_norm": 0.27019673585891724, "learning_rate": 6.283604367420887e-05, "loss": 0.0108, "step": 44650 }, { "epoch": 66.95652173913044, "grad_norm": 0.1971282809972763, "learning_rate": 6.282006242513394e-05, "loss": 0.0124, "step": 44660 }, { "epoch": 66.97151424287856, "grad_norm": 0.45574402809143066, "learning_rate": 6.280407977407628e-05, "loss": 0.0074, "step": 44670 }, { "epoch": 66.98650674662669, "grad_norm": 0.25440067052841187, "learning_rate": 6.27880957227837e-05, "loss": 0.01, "step": 44680 }, { "epoch": 67.0014992503748, "grad_norm": 0.17702391743659973, "learning_rate": 6.27721102730042e-05, "loss": 0.0093, "step": 44690 }, { "epoch": 67.01649175412294, "grad_norm": 0.3039454221725464, "learning_rate": 6.275612342648592e-05, "loss": 0.0104, "step": 44700 }, { "epoch": 67.03148425787107, "grad_norm": 0.2543415129184723, "learning_rate": 6.274013518497716e-05, "loss": 0.0116, "step": 44710 }, { "epoch": 67.04647676161919, "grad_norm": 0.1716868281364441, "learning_rate": 6.272414555022636e-05, "loss": 0.0083, "step": 44720 }, { "epoch": 67.06146926536732, "grad_norm": 0.3649664521217346, "learning_rate": 6.270815452398215e-05, "loss": 0.009, "step": 44730 }, { "epoch": 67.07646176911544, "grad_norm": 0.23617017269134521, "learning_rate": 6.269216210799326e-05, "loss": 0.0087, "step": 44740 }, { "epoch": 67.09145427286357, "grad_norm": 0.16901010274887085, "learning_rate": 6.26761683040086e-05, "loss": 0.0103, "step": 44750 }, { "epoch": 67.10644677661169, "grad_norm": 0.18296268582344055, "learning_rate": 6.266017311377723e-05, "loss": 0.0085, "step": 44760 }, { "epoch": 67.12143928035982, "grad_norm": 0.1572292447090149, "learning_rate": 6.264417653904839e-05, "loss": 0.0073, "step": 44770 }, { "epoch": 67.13643178410794, "grad_norm": 0.3678619861602783, "learning_rate": 6.262817858157139e-05, "loss": 0.0093, "step": 44780 }, { "epoch": 67.15142428785607, "grad_norm": 0.2755364775657654, "learning_rate": 6.261217924309576e-05, "loss": 0.0116, "step": 44790 }, { "epoch": 67.16641679160419, "grad_norm": 0.3147508203983307, "learning_rate": 6.259617852537118e-05, "loss": 0.0118, "step": 44800 }, { "epoch": 67.18140929535232, "grad_norm": 0.22630125284194946, "learning_rate": 6.258017643014747e-05, "loss": 0.0084, "step": 44810 }, { "epoch": 67.19640179910046, "grad_norm": 0.27676063776016235, "learning_rate": 6.256417295917456e-05, "loss": 0.0094, "step": 44820 }, { "epoch": 67.21139430284857, "grad_norm": 0.26551178097724915, "learning_rate": 6.254816811420258e-05, "loss": 0.0132, "step": 44830 }, { "epoch": 67.22638680659671, "grad_norm": 0.20098483562469482, "learning_rate": 6.253216189698183e-05, "loss": 0.0119, "step": 44840 }, { "epoch": 67.24137931034483, "grad_norm": 0.24065300822257996, "learning_rate": 6.251615430926267e-05, "loss": 0.0101, "step": 44850 }, { "epoch": 67.25637181409296, "grad_norm": 0.23134830594062805, "learning_rate": 6.25001453527957e-05, "loss": 0.0081, "step": 44860 }, { "epoch": 67.27136431784108, "grad_norm": 0.18119394779205322, "learning_rate": 6.248413502933164e-05, "loss": 0.009, "step": 44870 }, { "epoch": 67.28635682158921, "grad_norm": 0.12130121886730194, "learning_rate": 6.246812334062133e-05, "loss": 0.014, "step": 44880 }, { "epoch": 67.30134932533733, "grad_norm": 0.148560032248497, "learning_rate": 6.245211028841579e-05, "loss": 0.0108, "step": 44890 }, { "epoch": 67.31634182908546, "grad_norm": 0.32365652918815613, "learning_rate": 6.24360958744662e-05, "loss": 0.01, "step": 44900 }, { "epoch": 67.33133433283358, "grad_norm": 0.26855576038360596, "learning_rate": 6.242008010052387e-05, "loss": 0.0085, "step": 44910 }, { "epoch": 67.34632683658171, "grad_norm": 0.16549228131771088, "learning_rate": 6.240406296834024e-05, "loss": 0.0089, "step": 44920 }, { "epoch": 67.36131934032983, "grad_norm": 0.3359263241291046, "learning_rate": 6.238804447966694e-05, "loss": 0.0123, "step": 44930 }, { "epoch": 67.37631184407796, "grad_norm": 0.22741463780403137, "learning_rate": 6.237202463625573e-05, "loss": 0.0079, "step": 44940 }, { "epoch": 67.3913043478261, "grad_norm": 0.18549419939517975, "learning_rate": 6.235600343985848e-05, "loss": 0.0086, "step": 44950 }, { "epoch": 67.40629685157421, "grad_norm": 0.18581926822662354, "learning_rate": 6.233998089222729e-05, "loss": 0.0078, "step": 44960 }, { "epoch": 67.42128935532234, "grad_norm": 0.25373104214668274, "learning_rate": 6.232395699511433e-05, "loss": 0.0095, "step": 44970 }, { "epoch": 67.43628185907046, "grad_norm": 0.1885027140378952, "learning_rate": 6.230793175027199e-05, "loss": 0.006, "step": 44980 }, { "epoch": 67.4512743628186, "grad_norm": 0.1283474713563919, "learning_rate": 6.22919051594527e-05, "loss": 0.0071, "step": 44990 }, { "epoch": 67.46626686656671, "grad_norm": 0.2000417709350586, "learning_rate": 6.227587722440917e-05, "loss": 0.0082, "step": 45000 }, { "epoch": 67.48125937031485, "grad_norm": 0.19375891983509064, "learning_rate": 6.225984794689414e-05, "loss": 0.0094, "step": 45010 }, { "epoch": 67.49625187406296, "grad_norm": 0.18099583685398102, "learning_rate": 6.22438173286606e-05, "loss": 0.0084, "step": 45020 }, { "epoch": 67.5112443778111, "grad_norm": 0.3084367513656616, "learning_rate": 6.222778537146159e-05, "loss": 0.0113, "step": 45030 }, { "epoch": 67.52623688155921, "grad_norm": 0.2026389092206955, "learning_rate": 6.221175207705037e-05, "loss": 0.0099, "step": 45040 }, { "epoch": 67.54122938530735, "grad_norm": 0.29327529668807983, "learning_rate": 6.219571744718027e-05, "loss": 0.0106, "step": 45050 }, { "epoch": 67.55622188905548, "grad_norm": 0.2487562596797943, "learning_rate": 6.217968148360487e-05, "loss": 0.0077, "step": 45060 }, { "epoch": 67.5712143928036, "grad_norm": 0.42683422565460205, "learning_rate": 6.216364418807782e-05, "loss": 0.0071, "step": 45070 }, { "epoch": 67.58620689655173, "grad_norm": 0.43344298005104065, "learning_rate": 6.214760556235292e-05, "loss": 0.0117, "step": 45080 }, { "epoch": 67.60119940029985, "grad_norm": 0.15284977853298187, "learning_rate": 6.213156560818414e-05, "loss": 0.0105, "step": 45090 }, { "epoch": 67.61619190404798, "grad_norm": 0.16136041283607483, "learning_rate": 6.211552432732559e-05, "loss": 0.0122, "step": 45100 }, { "epoch": 67.6311844077961, "grad_norm": 0.3573756515979767, "learning_rate": 6.20994817215315e-05, "loss": 0.0096, "step": 45110 }, { "epoch": 67.64617691154423, "grad_norm": 0.15655502676963806, "learning_rate": 6.208343779255628e-05, "loss": 0.0094, "step": 45120 }, { "epoch": 67.66116941529235, "grad_norm": 0.19571268558502197, "learning_rate": 6.206739254215449e-05, "loss": 0.0094, "step": 45130 }, { "epoch": 67.67616191904048, "grad_norm": 0.23159131407737732, "learning_rate": 6.205134597208077e-05, "loss": 0.0105, "step": 45140 }, { "epoch": 67.6911544227886, "grad_norm": 0.17987878620624542, "learning_rate": 6.203529808409e-05, "loss": 0.0097, "step": 45150 }, { "epoch": 67.70614692653673, "grad_norm": 0.602019190788269, "learning_rate": 6.201924887993712e-05, "loss": 0.0113, "step": 45160 }, { "epoch": 67.72113943028486, "grad_norm": 0.20577651262283325, "learning_rate": 6.200319836137725e-05, "loss": 0.0106, "step": 45170 }, { "epoch": 67.73613193403298, "grad_norm": 0.20851874351501465, "learning_rate": 6.198714653016565e-05, "loss": 0.009, "step": 45180 }, { "epoch": 67.75112443778112, "grad_norm": 0.1874229460954666, "learning_rate": 6.197109338805774e-05, "loss": 0.0072, "step": 45190 }, { "epoch": 67.76611694152923, "grad_norm": 0.2684672474861145, "learning_rate": 6.195503893680903e-05, "loss": 0.0112, "step": 45200 }, { "epoch": 67.78110944527737, "grad_norm": 0.2934964597225189, "learning_rate": 6.193898317817524e-05, "loss": 0.0086, "step": 45210 }, { "epoch": 67.79610194902548, "grad_norm": 0.1957046091556549, "learning_rate": 6.192292611391221e-05, "loss": 0.0139, "step": 45220 }, { "epoch": 67.81109445277362, "grad_norm": 0.21844299137592316, "learning_rate": 6.190686774577591e-05, "loss": 0.0109, "step": 45230 }, { "epoch": 67.82608695652173, "grad_norm": 0.4028872549533844, "learning_rate": 6.189080807552245e-05, "loss": 0.0092, "step": 45240 }, { "epoch": 67.84107946026987, "grad_norm": 0.4330024719238281, "learning_rate": 6.187474710490809e-05, "loss": 0.0096, "step": 45250 }, { "epoch": 67.85607196401799, "grad_norm": 0.19526390731334686, "learning_rate": 6.185868483568926e-05, "loss": 0.0143, "step": 45260 }, { "epoch": 67.87106446776612, "grad_norm": 0.21333763003349304, "learning_rate": 6.184262126962245e-05, "loss": 0.0086, "step": 45270 }, { "epoch": 67.88605697151424, "grad_norm": 0.16668947041034698, "learning_rate": 6.182655640846442e-05, "loss": 0.0105, "step": 45280 }, { "epoch": 67.90104947526237, "grad_norm": 0.19454434514045715, "learning_rate": 6.181049025397196e-05, "loss": 0.0072, "step": 45290 }, { "epoch": 67.9160419790105, "grad_norm": 0.6311210989952087, "learning_rate": 6.179442280790202e-05, "loss": 0.011, "step": 45300 }, { "epoch": 67.93103448275862, "grad_norm": 0.2119598388671875, "learning_rate": 6.177835407201174e-05, "loss": 0.011, "step": 45310 }, { "epoch": 67.94602698650675, "grad_norm": 0.44758370518684387, "learning_rate": 6.176228404805839e-05, "loss": 0.0095, "step": 45320 }, { "epoch": 67.96101949025487, "grad_norm": 0.5903586149215698, "learning_rate": 6.174621273779932e-05, "loss": 0.008, "step": 45330 }, { "epoch": 67.976011994003, "grad_norm": 0.171368807554245, "learning_rate": 6.17301401429921e-05, "loss": 0.0133, "step": 45340 }, { "epoch": 67.99100449775112, "grad_norm": 0.2621597349643707, "learning_rate": 6.171406626539436e-05, "loss": 0.0097, "step": 45350 }, { "epoch": 68.00599700149925, "grad_norm": 0.19115903973579407, "learning_rate": 6.169799110676398e-05, "loss": 0.0101, "step": 45360 }, { "epoch": 68.02098950524737, "grad_norm": 0.20725171267986298, "learning_rate": 6.168191466885885e-05, "loss": 0.0098, "step": 45370 }, { "epoch": 68.0359820089955, "grad_norm": 0.20797531306743622, "learning_rate": 6.166583695343711e-05, "loss": 0.0096, "step": 45380 }, { "epoch": 68.05097451274362, "grad_norm": 0.18754203617572784, "learning_rate": 6.164975796225698e-05, "loss": 0.0132, "step": 45390 }, { "epoch": 68.06596701649175, "grad_norm": 0.30307698249816895, "learning_rate": 6.163367769707683e-05, "loss": 0.0086, "step": 45400 }, { "epoch": 68.08095952023989, "grad_norm": 0.1919562816619873, "learning_rate": 6.161759615965519e-05, "loss": 0.0111, "step": 45410 }, { "epoch": 68.095952023988, "grad_norm": 0.17932578921318054, "learning_rate": 6.16015133517507e-05, "loss": 0.0114, "step": 45420 }, { "epoch": 68.11094452773614, "grad_norm": 0.1818743646144867, "learning_rate": 6.158542927512214e-05, "loss": 0.0112, "step": 45430 }, { "epoch": 68.12593703148426, "grad_norm": 0.13163195550441742, "learning_rate": 6.156934393152846e-05, "loss": 0.0086, "step": 45440 }, { "epoch": 68.14092953523239, "grad_norm": 0.2315814346075058, "learning_rate": 6.15532573227287e-05, "loss": 0.0082, "step": 45450 }, { "epoch": 68.1559220389805, "grad_norm": 0.3281916081905365, "learning_rate": 6.153716945048212e-05, "loss": 0.01, "step": 45460 }, { "epoch": 68.17091454272864, "grad_norm": 0.32230398058891296, "learning_rate": 6.152108031654802e-05, "loss": 0.0099, "step": 45470 }, { "epoch": 68.18590704647676, "grad_norm": 0.18688535690307617, "learning_rate": 6.15049899226859e-05, "loss": 0.0085, "step": 45480 }, { "epoch": 68.20089955022489, "grad_norm": 0.11337091028690338, "learning_rate": 6.148889827065537e-05, "loss": 0.009, "step": 45490 }, { "epoch": 68.21589205397301, "grad_norm": 0.1993703991174698, "learning_rate": 6.147280536221622e-05, "loss": 0.0074, "step": 45500 }, { "epoch": 68.23088455772114, "grad_norm": 0.15723474323749542, "learning_rate": 6.145671119912832e-05, "loss": 0.0106, "step": 45510 }, { "epoch": 68.24587706146926, "grad_norm": 1.3742139339447021, "learning_rate": 6.144061578315169e-05, "loss": 0.0079, "step": 45520 }, { "epoch": 68.26086956521739, "grad_norm": 0.25337672233581543, "learning_rate": 6.142451911604654e-05, "loss": 0.0082, "step": 45530 }, { "epoch": 68.27586206896552, "grad_norm": 0.14646977186203003, "learning_rate": 6.140842119957315e-05, "loss": 0.0083, "step": 45540 }, { "epoch": 68.29085457271364, "grad_norm": 0.2414305955171585, "learning_rate": 6.139232203549197e-05, "loss": 0.0112, "step": 45550 }, { "epoch": 68.30584707646177, "grad_norm": 0.3239732086658478, "learning_rate": 6.137622162556357e-05, "loss": 0.0084, "step": 45560 }, { "epoch": 68.32083958020989, "grad_norm": 0.14219027757644653, "learning_rate": 6.13601199715487e-05, "loss": 0.0081, "step": 45570 }, { "epoch": 68.33583208395802, "grad_norm": 0.5486688017845154, "learning_rate": 6.134401707520816e-05, "loss": 0.0094, "step": 45580 }, { "epoch": 68.35082458770614, "grad_norm": 0.19009271264076233, "learning_rate": 6.132791293830298e-05, "loss": 0.0092, "step": 45590 }, { "epoch": 68.36581709145428, "grad_norm": 0.19895167648792267, "learning_rate": 6.131180756259428e-05, "loss": 0.0091, "step": 45600 }, { "epoch": 68.3808095952024, "grad_norm": 0.24069440364837646, "learning_rate": 6.129570094984331e-05, "loss": 0.0063, "step": 45610 }, { "epoch": 68.39580209895053, "grad_norm": 0.2339254915714264, "learning_rate": 6.127959310181145e-05, "loss": 0.0096, "step": 45620 }, { "epoch": 68.41079460269864, "grad_norm": 0.178462952375412, "learning_rate": 6.126348402026026e-05, "loss": 0.0109, "step": 45630 }, { "epoch": 68.42578710644678, "grad_norm": 0.4755729138851166, "learning_rate": 6.12473737069514e-05, "loss": 0.0065, "step": 45640 }, { "epoch": 68.44077961019491, "grad_norm": 0.12106480449438095, "learning_rate": 6.123126216364665e-05, "loss": 0.0054, "step": 45650 }, { "epoch": 68.45577211394303, "grad_norm": 0.13350962102413177, "learning_rate": 6.121514939210797e-05, "loss": 0.0063, "step": 45660 }, { "epoch": 68.47076461769116, "grad_norm": 0.30023396015167236, "learning_rate": 6.119903539409741e-05, "loss": 0.0077, "step": 45670 }, { "epoch": 68.48575712143928, "grad_norm": 0.17118120193481445, "learning_rate": 6.118292017137716e-05, "loss": 0.008, "step": 45680 }, { "epoch": 68.50074962518741, "grad_norm": 0.1587221473455429, "learning_rate": 6.116680372570959e-05, "loss": 0.0068, "step": 45690 }, { "epoch": 68.51574212893553, "grad_norm": 0.18694406747817993, "learning_rate": 6.115068605885713e-05, "loss": 0.0076, "step": 45700 }, { "epoch": 68.53073463268366, "grad_norm": 0.7993088960647583, "learning_rate": 6.113456717258243e-05, "loss": 0.0074, "step": 45710 }, { "epoch": 68.54572713643178, "grad_norm": 0.1697278916835785, "learning_rate": 6.11184470686482e-05, "loss": 0.0077, "step": 45720 }, { "epoch": 68.56071964017991, "grad_norm": 0.25211021304130554, "learning_rate": 6.110232574881731e-05, "loss": 0.0068, "step": 45730 }, { "epoch": 68.57571214392803, "grad_norm": 0.2969314754009247, "learning_rate": 6.108620321485277e-05, "loss": 0.0093, "step": 45740 }, { "epoch": 68.59070464767616, "grad_norm": 0.2773915231227875, "learning_rate": 6.107007946851773e-05, "loss": 0.009, "step": 45750 }, { "epoch": 68.6056971514243, "grad_norm": 0.15740880370140076, "learning_rate": 6.105395451157542e-05, "loss": 0.0089, "step": 45760 }, { "epoch": 68.62068965517241, "grad_norm": 0.24416904151439667, "learning_rate": 6.103782834578928e-05, "loss": 0.0087, "step": 45770 }, { "epoch": 68.63568215892055, "grad_norm": 0.18783634901046753, "learning_rate": 6.102170097292281e-05, "loss": 0.0092, "step": 45780 }, { "epoch": 68.65067466266866, "grad_norm": 0.2044980823993683, "learning_rate": 6.1005572394739716e-05, "loss": 0.0086, "step": 45790 }, { "epoch": 68.6656671664168, "grad_norm": 0.18898016214370728, "learning_rate": 6.0989442613003765e-05, "loss": 0.0108, "step": 45800 }, { "epoch": 68.68065967016491, "grad_norm": 0.16171826422214508, "learning_rate": 6.097331162947888e-05, "loss": 0.0084, "step": 45810 }, { "epoch": 68.69565217391305, "grad_norm": 0.18169072270393372, "learning_rate": 6.095717944592914e-05, "loss": 0.009, "step": 45820 }, { "epoch": 68.71064467766116, "grad_norm": 0.2601836621761322, "learning_rate": 6.094104606411873e-05, "loss": 0.008, "step": 45830 }, { "epoch": 68.7256371814093, "grad_norm": 0.286857932806015, "learning_rate": 6.0924911485811966e-05, "loss": 0.0119, "step": 45840 }, { "epoch": 68.74062968515742, "grad_norm": 0.21036787331104279, "learning_rate": 6.090877571277331e-05, "loss": 0.0099, "step": 45850 }, { "epoch": 68.75562218890555, "grad_norm": 0.1260380744934082, "learning_rate": 6.089263874676736e-05, "loss": 0.0076, "step": 45860 }, { "epoch": 68.77061469265368, "grad_norm": 0.14967603981494904, "learning_rate": 6.0876500589558796e-05, "loss": 0.0127, "step": 45870 }, { "epoch": 68.7856071964018, "grad_norm": 0.16452164947986603, "learning_rate": 6.086036124291248e-05, "loss": 0.011, "step": 45880 }, { "epoch": 68.80059970014993, "grad_norm": 0.17807552218437195, "learning_rate": 6.084422070859339e-05, "loss": 0.0069, "step": 45890 }, { "epoch": 68.81559220389805, "grad_norm": 0.12858465313911438, "learning_rate": 6.082807898836663e-05, "loss": 0.0073, "step": 45900 }, { "epoch": 68.83058470764618, "grad_norm": 0.2766937017440796, "learning_rate": 6.081193608399742e-05, "loss": 0.0084, "step": 45910 }, { "epoch": 68.8455772113943, "grad_norm": 0.1897144913673401, "learning_rate": 6.0795791997251164e-05, "loss": 0.0082, "step": 45920 }, { "epoch": 68.86056971514243, "grad_norm": 0.1239485889673233, "learning_rate": 6.0779646729893294e-05, "loss": 0.0083, "step": 45930 }, { "epoch": 68.87556221889055, "grad_norm": 0.21271169185638428, "learning_rate": 6.0763500283689476e-05, "loss": 0.0085, "step": 45940 }, { "epoch": 68.89055472263868, "grad_norm": 0.18938668072223663, "learning_rate": 6.0747352660405455e-05, "loss": 0.0088, "step": 45950 }, { "epoch": 68.9055472263868, "grad_norm": 0.44686469435691833, "learning_rate": 6.073120386180709e-05, "loss": 0.0087, "step": 45960 }, { "epoch": 68.92053973013493, "grad_norm": 0.3384849727153778, "learning_rate": 6.0715053889660425e-05, "loss": 0.0083, "step": 45970 }, { "epoch": 68.93553223388305, "grad_norm": 0.23348873853683472, "learning_rate": 6.069890274573157e-05, "loss": 0.0094, "step": 45980 }, { "epoch": 68.95052473763118, "grad_norm": 0.3056527376174927, "learning_rate": 6.068275043178679e-05, "loss": 0.01, "step": 45990 }, { "epoch": 68.96551724137932, "grad_norm": 0.24841080605983734, "learning_rate": 6.066659694959248e-05, "loss": 0.01, "step": 46000 }, { "epoch": 68.98050974512743, "grad_norm": 0.19521574676036835, "learning_rate": 6.0650442300915176e-05, "loss": 0.0075, "step": 46010 }, { "epoch": 68.99550224887557, "grad_norm": 0.20994636416435242, "learning_rate": 6.063428648752152e-05, "loss": 0.0105, "step": 46020 }, { "epoch": 69.01049475262369, "grad_norm": 0.17352259159088135, "learning_rate": 6.0618129511178266e-05, "loss": 0.0086, "step": 46030 }, { "epoch": 69.02548725637182, "grad_norm": 0.20032793283462524, "learning_rate": 6.060197137365234e-05, "loss": 0.0096, "step": 46040 }, { "epoch": 69.04047976011994, "grad_norm": 0.21581265330314636, "learning_rate": 6.058581207671077e-05, "loss": 0.0082, "step": 46050 }, { "epoch": 69.05547226386807, "grad_norm": 0.18495751917362213, "learning_rate": 6.056965162212072e-05, "loss": 0.0097, "step": 46060 }, { "epoch": 69.07046476761619, "grad_norm": 0.14618830382823944, "learning_rate": 6.055349001164943e-05, "loss": 0.01, "step": 46070 }, { "epoch": 69.08545727136432, "grad_norm": 0.19746334850788116, "learning_rate": 6.0537327247064347e-05, "loss": 0.0087, "step": 46080 }, { "epoch": 69.10044977511244, "grad_norm": 0.29289188981056213, "learning_rate": 6.0521163330133014e-05, "loss": 0.0082, "step": 46090 }, { "epoch": 69.11544227886057, "grad_norm": 0.22311073541641235, "learning_rate": 6.050499826262306e-05, "loss": 0.0123, "step": 46100 }, { "epoch": 69.1304347826087, "grad_norm": 0.3763863742351532, "learning_rate": 6.0488832046302294e-05, "loss": 0.0089, "step": 46110 }, { "epoch": 69.14542728635682, "grad_norm": 0.240422785282135, "learning_rate": 6.0472664682938626e-05, "loss": 0.0102, "step": 46120 }, { "epoch": 69.16041979010495, "grad_norm": 0.20970799028873444, "learning_rate": 6.045649617430009e-05, "loss": 0.0084, "step": 46130 }, { "epoch": 69.17541229385307, "grad_norm": 0.2809280455112457, "learning_rate": 6.0440326522154866e-05, "loss": 0.008, "step": 46140 }, { "epoch": 69.1904047976012, "grad_norm": 0.24119719862937927, "learning_rate": 6.0424155728271224e-05, "loss": 0.0083, "step": 46150 }, { "epoch": 69.20539730134932, "grad_norm": 0.1548628956079483, "learning_rate": 6.040798379441758e-05, "loss": 0.0073, "step": 46160 }, { "epoch": 69.22038980509745, "grad_norm": 0.8445356488227844, "learning_rate": 6.0391810722362485e-05, "loss": 0.0107, "step": 46170 }, { "epoch": 69.23538230884557, "grad_norm": 0.2595800757408142, "learning_rate": 6.037563651387458e-05, "loss": 0.0078, "step": 46180 }, { "epoch": 69.2503748125937, "grad_norm": 0.16999101638793945, "learning_rate": 6.0359461170722666e-05, "loss": 0.0072, "step": 46190 }, { "epoch": 69.26536731634182, "grad_norm": 0.19601283967494965, "learning_rate": 6.034328469467566e-05, "loss": 0.0093, "step": 46200 }, { "epoch": 69.28035982008996, "grad_norm": 0.4419628381729126, "learning_rate": 6.0327107087502596e-05, "loss": 0.0091, "step": 46210 }, { "epoch": 69.29535232383807, "grad_norm": 0.25170934200286865, "learning_rate": 6.031092835097262e-05, "loss": 0.0116, "step": 46220 }, { "epoch": 69.3103448275862, "grad_norm": 0.20194092392921448, "learning_rate": 6.0294748486855024e-05, "loss": 0.008, "step": 46230 }, { "epoch": 69.32533733133434, "grad_norm": 0.27985236048698425, "learning_rate": 6.0278567496919216e-05, "loss": 0.0102, "step": 46240 }, { "epoch": 69.34032983508246, "grad_norm": 0.3714190125465393, "learning_rate": 6.026238538293472e-05, "loss": 0.0076, "step": 46250 }, { "epoch": 69.35532233883059, "grad_norm": 0.20801474153995514, "learning_rate": 6.024620214667118e-05, "loss": 0.0126, "step": 46260 }, { "epoch": 69.37031484257871, "grad_norm": 0.12980997562408447, "learning_rate": 6.0230017789898384e-05, "loss": 0.0064, "step": 46270 }, { "epoch": 69.38530734632684, "grad_norm": 0.31753358244895935, "learning_rate": 6.021383231438622e-05, "loss": 0.0094, "step": 46280 }, { "epoch": 69.40029985007496, "grad_norm": 0.24967148900032043, "learning_rate": 6.0197645721904704e-05, "loss": 0.0084, "step": 46290 }, { "epoch": 69.41529235382309, "grad_norm": 0.23327741026878357, "learning_rate": 6.0181458014224e-05, "loss": 0.0106, "step": 46300 }, { "epoch": 69.43028485757121, "grad_norm": 0.3345581591129303, "learning_rate": 6.016526919311435e-05, "loss": 0.0129, "step": 46310 }, { "epoch": 69.44527736131934, "grad_norm": 0.33483439683914185, "learning_rate": 6.014907926034613e-05, "loss": 0.0107, "step": 46320 }, { "epoch": 69.46026986506746, "grad_norm": 0.2607833445072174, "learning_rate": 6.0132888217689875e-05, "loss": 0.0105, "step": 46330 }, { "epoch": 69.47526236881559, "grad_norm": 0.13351793587207794, "learning_rate": 6.01166960669162e-05, "loss": 0.0072, "step": 46340 }, { "epoch": 69.49025487256372, "grad_norm": 0.19442686438560486, "learning_rate": 6.0100502809795845e-05, "loss": 0.0105, "step": 46350 }, { "epoch": 69.50524737631184, "grad_norm": 0.1672198325395584, "learning_rate": 6.00843084480997e-05, "loss": 0.0082, "step": 46360 }, { "epoch": 69.52023988005998, "grad_norm": 0.11828671395778656, "learning_rate": 6.0068112983598736e-05, "loss": 0.0073, "step": 46370 }, { "epoch": 69.5352323838081, "grad_norm": 0.2167581468820572, "learning_rate": 6.005191641806407e-05, "loss": 0.0089, "step": 46380 }, { "epoch": 69.55022488755623, "grad_norm": 0.30078789591789246, "learning_rate": 6.003571875326694e-05, "loss": 0.0099, "step": 46390 }, { "epoch": 69.56521739130434, "grad_norm": 0.17719468474388123, "learning_rate": 6.00195199909787e-05, "loss": 0.0082, "step": 46400 }, { "epoch": 69.58020989505248, "grad_norm": 0.21828672289848328, "learning_rate": 6.000332013297082e-05, "loss": 0.0084, "step": 46410 }, { "epoch": 69.5952023988006, "grad_norm": 0.15236428380012512, "learning_rate": 5.998711918101487e-05, "loss": 0.0074, "step": 46420 }, { "epoch": 69.61019490254873, "grad_norm": 0.14455203711986542, "learning_rate": 5.997091713688261e-05, "loss": 0.0066, "step": 46430 }, { "epoch": 69.62518740629685, "grad_norm": 0.12999126315116882, "learning_rate": 5.9954714002345836e-05, "loss": 0.0078, "step": 46440 }, { "epoch": 69.64017991004498, "grad_norm": 0.13320840895175934, "learning_rate": 5.993850977917649e-05, "loss": 0.011, "step": 46450 }, { "epoch": 69.65517241379311, "grad_norm": 0.1836727261543274, "learning_rate": 5.992230446914667e-05, "loss": 0.0074, "step": 46460 }, { "epoch": 69.67016491754123, "grad_norm": 0.21744957566261292, "learning_rate": 5.990609807402855e-05, "loss": 0.0074, "step": 46470 }, { "epoch": 69.68515742128936, "grad_norm": 0.36234405636787415, "learning_rate": 5.988989059559443e-05, "loss": 0.01, "step": 46480 }, { "epoch": 69.70014992503748, "grad_norm": 0.12227320671081543, "learning_rate": 5.987368203561675e-05, "loss": 0.0097, "step": 46490 }, { "epoch": 69.71514242878561, "grad_norm": 0.1684630960226059, "learning_rate": 5.9857472395868055e-05, "loss": 0.0075, "step": 46500 }, { "epoch": 69.73013493253373, "grad_norm": 0.22663870453834534, "learning_rate": 5.9841261678120983e-05, "loss": 0.0084, "step": 46510 }, { "epoch": 69.74512743628186, "grad_norm": 0.20931246876716614, "learning_rate": 5.982504988414834e-05, "loss": 0.0064, "step": 46520 }, { "epoch": 69.76011994002998, "grad_norm": 0.1813020557165146, "learning_rate": 5.9808837015723015e-05, "loss": 0.0081, "step": 46530 }, { "epoch": 69.77511244377811, "grad_norm": 0.16753247380256653, "learning_rate": 5.9792623074618016e-05, "loss": 0.0073, "step": 46540 }, { "epoch": 69.79010494752623, "grad_norm": 0.17626674473285675, "learning_rate": 5.977640806260648e-05, "loss": 0.0064, "step": 46550 }, { "epoch": 69.80509745127436, "grad_norm": 0.15803851187229156, "learning_rate": 5.976019198146166e-05, "loss": 0.0085, "step": 46560 }, { "epoch": 69.82008995502248, "grad_norm": 0.12067187577486038, "learning_rate": 5.974397483295692e-05, "loss": 0.0106, "step": 46570 }, { "epoch": 69.83508245877061, "grad_norm": 0.3728278577327728, "learning_rate": 5.972775661886575e-05, "loss": 0.0096, "step": 46580 }, { "epoch": 69.85007496251875, "grad_norm": 0.22860310971736908, "learning_rate": 5.971153734096173e-05, "loss": 0.0084, "step": 46590 }, { "epoch": 69.86506746626686, "grad_norm": 0.38225188851356506, "learning_rate": 5.969531700101859e-05, "loss": 0.01, "step": 46600 }, { "epoch": 69.880059970015, "grad_norm": 0.21280603110790253, "learning_rate": 5.9679095600810155e-05, "loss": 0.0094, "step": 46610 }, { "epoch": 69.89505247376312, "grad_norm": 0.2961291968822479, "learning_rate": 5.9662873142110384e-05, "loss": 0.0107, "step": 46620 }, { "epoch": 69.91004497751125, "grad_norm": 0.2034272700548172, "learning_rate": 5.964664962669333e-05, "loss": 0.0077, "step": 46630 }, { "epoch": 69.92503748125937, "grad_norm": 0.2422684282064438, "learning_rate": 5.9630425056333186e-05, "loss": 0.0099, "step": 46640 }, { "epoch": 69.9400299850075, "grad_norm": 0.1833038032054901, "learning_rate": 5.961419943280422e-05, "loss": 0.008, "step": 46650 }, { "epoch": 69.95502248875562, "grad_norm": 0.18921178579330444, "learning_rate": 5.959797275788087e-05, "loss": 0.0083, "step": 46660 }, { "epoch": 69.97001499250375, "grad_norm": 0.19771325588226318, "learning_rate": 5.958174503333765e-05, "loss": 0.0083, "step": 46670 }, { "epoch": 69.98500749625187, "grad_norm": 0.22273308038711548, "learning_rate": 5.9565516260949195e-05, "loss": 0.0121, "step": 46680 }, { "epoch": 70.0, "grad_norm": 0.1437939703464508, "learning_rate": 5.9549286442490273e-05, "loss": 0.0078, "step": 46690 }, { "epoch": 70.01499250374813, "grad_norm": 0.16949613392353058, "learning_rate": 5.953305557973572e-05, "loss": 0.0083, "step": 46700 }, { "epoch": 70.02998500749625, "grad_norm": 0.8589863181114197, "learning_rate": 5.9516823674460564e-05, "loss": 0.007, "step": 46710 }, { "epoch": 70.04497751124438, "grad_norm": 0.21052540838718414, "learning_rate": 5.9500590728439875e-05, "loss": 0.0095, "step": 46720 }, { "epoch": 70.0599700149925, "grad_norm": 0.25735849142074585, "learning_rate": 5.948435674344886e-05, "loss": 0.0075, "step": 46730 }, { "epoch": 70.07496251874063, "grad_norm": 0.31006476283073425, "learning_rate": 5.946812172126285e-05, "loss": 0.0096, "step": 46740 }, { "epoch": 70.08995502248875, "grad_norm": 0.22815391421318054, "learning_rate": 5.945188566365729e-05, "loss": 0.0088, "step": 46750 }, { "epoch": 70.10494752623688, "grad_norm": 0.5795372128486633, "learning_rate": 5.943564857240773e-05, "loss": 0.0099, "step": 46760 }, { "epoch": 70.119940029985, "grad_norm": 0.1736893355846405, "learning_rate": 5.941941044928981e-05, "loss": 0.0077, "step": 46770 }, { "epoch": 70.13493253373314, "grad_norm": 0.19514623284339905, "learning_rate": 5.940317129607935e-05, "loss": 0.0099, "step": 46780 }, { "epoch": 70.14992503748125, "grad_norm": 0.23052293062210083, "learning_rate": 5.9386931114552204e-05, "loss": 0.0066, "step": 46790 }, { "epoch": 70.16491754122939, "grad_norm": 0.20406275987625122, "learning_rate": 5.937068990648438e-05, "loss": 0.0069, "step": 46800 }, { "epoch": 70.17991004497752, "grad_norm": 0.36157456040382385, "learning_rate": 5.935444767365199e-05, "loss": 0.0078, "step": 46810 }, { "epoch": 70.19490254872564, "grad_norm": 0.20738595724105835, "learning_rate": 5.933820441783129e-05, "loss": 0.0095, "step": 46820 }, { "epoch": 70.20989505247377, "grad_norm": 0.16002607345581055, "learning_rate": 5.932196014079857e-05, "loss": 0.0092, "step": 46830 }, { "epoch": 70.22488755622189, "grad_norm": 0.25719931721687317, "learning_rate": 5.930571484433032e-05, "loss": 0.0127, "step": 46840 }, { "epoch": 70.23988005997002, "grad_norm": 0.49432089924812317, "learning_rate": 5.928946853020309e-05, "loss": 0.0103, "step": 46850 }, { "epoch": 70.25487256371814, "grad_norm": 0.24667127430438995, "learning_rate": 5.927322120019352e-05, "loss": 0.0134, "step": 46860 }, { "epoch": 70.26986506746627, "grad_norm": 0.14673855900764465, "learning_rate": 5.925697285607844e-05, "loss": 0.008, "step": 46870 }, { "epoch": 70.28485757121439, "grad_norm": 0.17808856070041656, "learning_rate": 5.924072349963472e-05, "loss": 0.0095, "step": 46880 }, { "epoch": 70.29985007496252, "grad_norm": 0.19231514632701874, "learning_rate": 5.922447313263938e-05, "loss": 0.0092, "step": 46890 }, { "epoch": 70.31484257871064, "grad_norm": 0.1744297295808792, "learning_rate": 5.920822175686952e-05, "loss": 0.0066, "step": 46900 }, { "epoch": 70.32983508245877, "grad_norm": 0.18300318717956543, "learning_rate": 5.919196937410239e-05, "loss": 0.0111, "step": 46910 }, { "epoch": 70.34482758620689, "grad_norm": 0.25022509694099426, "learning_rate": 5.917571598611529e-05, "loss": 0.0091, "step": 46920 }, { "epoch": 70.35982008995502, "grad_norm": 0.17071402072906494, "learning_rate": 5.9159461594685686e-05, "loss": 0.0134, "step": 46930 }, { "epoch": 70.37481259370315, "grad_norm": 0.2468300461769104, "learning_rate": 5.914320620159114e-05, "loss": 0.0112, "step": 46940 }, { "epoch": 70.38980509745127, "grad_norm": 0.1938854306936264, "learning_rate": 5.912694980860931e-05, "loss": 0.0112, "step": 46950 }, { "epoch": 70.4047976011994, "grad_norm": 0.31630223989486694, "learning_rate": 5.9110692417517964e-05, "loss": 0.0104, "step": 46960 }, { "epoch": 70.41979010494752, "grad_norm": 0.27855825424194336, "learning_rate": 5.9094434030095e-05, "loss": 0.0089, "step": 46970 }, { "epoch": 70.43478260869566, "grad_norm": 0.17383214831352234, "learning_rate": 5.907817464811841e-05, "loss": 0.0084, "step": 46980 }, { "epoch": 70.44977511244377, "grad_norm": 0.21920114755630493, "learning_rate": 5.906191427336627e-05, "loss": 0.0069, "step": 46990 }, { "epoch": 70.4647676161919, "grad_norm": 0.24707166850566864, "learning_rate": 5.9045652907616835e-05, "loss": 0.0092, "step": 47000 }, { "epoch": 70.47976011994002, "grad_norm": 0.2611231207847595, "learning_rate": 5.902939055264838e-05, "loss": 0.0102, "step": 47010 }, { "epoch": 70.49475262368816, "grad_norm": 0.2980685830116272, "learning_rate": 5.901312721023935e-05, "loss": 0.0087, "step": 47020 }, { "epoch": 70.50974512743628, "grad_norm": 0.2621264159679413, "learning_rate": 5.8996862882168294e-05, "loss": 0.0081, "step": 47030 }, { "epoch": 70.52473763118441, "grad_norm": 0.17079640924930573, "learning_rate": 5.8980597570213826e-05, "loss": 0.0081, "step": 47040 }, { "epoch": 70.53973013493254, "grad_norm": 0.13036660850048065, "learning_rate": 5.896433127615471e-05, "loss": 0.0086, "step": 47050 }, { "epoch": 70.55472263868066, "grad_norm": 0.3308461904525757, "learning_rate": 5.894806400176981e-05, "loss": 0.0113, "step": 47060 }, { "epoch": 70.56971514242879, "grad_norm": 0.17634300887584686, "learning_rate": 5.893179574883808e-05, "loss": 0.0067, "step": 47070 }, { "epoch": 70.58470764617691, "grad_norm": 0.13341721892356873, "learning_rate": 5.8915526519138585e-05, "loss": 0.0077, "step": 47080 }, { "epoch": 70.59970014992504, "grad_norm": 0.22932638227939606, "learning_rate": 5.889925631445053e-05, "loss": 0.0091, "step": 47090 }, { "epoch": 70.61469265367316, "grad_norm": 0.19975240528583527, "learning_rate": 5.888298513655318e-05, "loss": 0.0093, "step": 47100 }, { "epoch": 70.62968515742129, "grad_norm": 0.19867731630802155, "learning_rate": 5.886671298722592e-05, "loss": 0.0063, "step": 47110 }, { "epoch": 70.64467766116941, "grad_norm": 0.2415948212146759, "learning_rate": 5.885043986824828e-05, "loss": 0.0103, "step": 47120 }, { "epoch": 70.65967016491754, "grad_norm": 0.12583255767822266, "learning_rate": 5.883416578139982e-05, "loss": 0.0092, "step": 47130 }, { "epoch": 70.67466266866566, "grad_norm": 0.19441422820091248, "learning_rate": 5.881789072846029e-05, "loss": 0.0112, "step": 47140 }, { "epoch": 70.6896551724138, "grad_norm": 0.31497883796691895, "learning_rate": 5.8801614711209474e-05, "loss": 0.0093, "step": 47150 }, { "epoch": 70.70464767616193, "grad_norm": 0.254273921251297, "learning_rate": 5.878533773142732e-05, "loss": 0.0085, "step": 47160 }, { "epoch": 70.71964017991004, "grad_norm": 0.13996122777462006, "learning_rate": 5.8769059790893834e-05, "loss": 0.0085, "step": 47170 }, { "epoch": 70.73463268365818, "grad_norm": 0.1796526163816452, "learning_rate": 5.8752780891389145e-05, "loss": 0.0077, "step": 47180 }, { "epoch": 70.7496251874063, "grad_norm": 0.16782020032405853, "learning_rate": 5.873650103469351e-05, "loss": 0.0077, "step": 47190 }, { "epoch": 70.76461769115443, "grad_norm": 0.18191133439540863, "learning_rate": 5.872022022258725e-05, "loss": 0.0088, "step": 47200 }, { "epoch": 70.77961019490255, "grad_norm": 0.14976951479911804, "learning_rate": 5.8703938456850814e-05, "loss": 0.0087, "step": 47210 }, { "epoch": 70.79460269865068, "grad_norm": 0.15807601809501648, "learning_rate": 5.868765573926477e-05, "loss": 0.0079, "step": 47220 }, { "epoch": 70.8095952023988, "grad_norm": 0.29520177841186523, "learning_rate": 5.867137207160974e-05, "loss": 0.0079, "step": 47230 }, { "epoch": 70.82458770614693, "grad_norm": 0.1571073979139328, "learning_rate": 5.86550874556665e-05, "loss": 0.0097, "step": 47240 }, { "epoch": 70.83958020989505, "grad_norm": 0.11450138688087463, "learning_rate": 5.863880189321592e-05, "loss": 0.0071, "step": 47250 }, { "epoch": 70.85457271364318, "grad_norm": 0.19998571276664734, "learning_rate": 5.862251538603894e-05, "loss": 0.0088, "step": 47260 }, { "epoch": 70.8695652173913, "grad_norm": 0.1966048926115036, "learning_rate": 5.8606227935916656e-05, "loss": 0.0092, "step": 47270 }, { "epoch": 70.88455772113943, "grad_norm": 0.1340935081243515, "learning_rate": 5.858993954463021e-05, "loss": 0.0079, "step": 47280 }, { "epoch": 70.89955022488756, "grad_norm": 0.12534314393997192, "learning_rate": 5.8573650213960896e-05, "loss": 0.007, "step": 47290 }, { "epoch": 70.91454272863568, "grad_norm": 0.21190831065177917, "learning_rate": 5.8557359945690084e-05, "loss": 0.0075, "step": 47300 }, { "epoch": 70.92953523238381, "grad_norm": 0.2544231712818146, "learning_rate": 5.8541068741599246e-05, "loss": 0.0108, "step": 47310 }, { "epoch": 70.94452773613193, "grad_norm": 0.22767166793346405, "learning_rate": 5.8524776603469985e-05, "loss": 0.0088, "step": 47320 }, { "epoch": 70.95952023988006, "grad_norm": 0.3487575650215149, "learning_rate": 5.850848353308397e-05, "loss": 0.0122, "step": 47330 }, { "epoch": 70.97451274362818, "grad_norm": 0.2538786232471466, "learning_rate": 5.849218953222297e-05, "loss": 0.0072, "step": 47340 }, { "epoch": 70.98950524737631, "grad_norm": 0.15416502952575684, "learning_rate": 5.847589460266891e-05, "loss": 0.0079, "step": 47350 }, { "epoch": 71.00449775112443, "grad_norm": 0.2162753939628601, "learning_rate": 5.845959874620375e-05, "loss": 0.006, "step": 47360 }, { "epoch": 71.01949025487257, "grad_norm": 1.0705455541610718, "learning_rate": 5.84433019646096e-05, "loss": 0.0082, "step": 47370 }, { "epoch": 71.03448275862068, "grad_norm": 0.22212457656860352, "learning_rate": 5.842700425966863e-05, "loss": 0.0076, "step": 47380 }, { "epoch": 71.04947526236882, "grad_norm": 0.18737837672233582, "learning_rate": 5.841070563316315e-05, "loss": 0.0072, "step": 47390 }, { "epoch": 71.06446776611695, "grad_norm": 0.20474986732006073, "learning_rate": 5.839440608687554e-05, "loss": 0.0081, "step": 47400 }, { "epoch": 71.07946026986507, "grad_norm": 0.32842981815338135, "learning_rate": 5.837810562258831e-05, "loss": 0.009, "step": 47410 }, { "epoch": 71.0944527736132, "grad_norm": 0.22438102960586548, "learning_rate": 5.836180424208405e-05, "loss": 0.0062, "step": 47420 }, { "epoch": 71.10944527736132, "grad_norm": 0.3677392899990082, "learning_rate": 5.834550194714543e-05, "loss": 0.0074, "step": 47430 }, { "epoch": 71.12443778110945, "grad_norm": 0.1415470689535141, "learning_rate": 5.832919873955528e-05, "loss": 0.0109, "step": 47440 }, { "epoch": 71.13943028485757, "grad_norm": 2.0081892013549805, "learning_rate": 5.831289462109648e-05, "loss": 0.0059, "step": 47450 }, { "epoch": 71.1544227886057, "grad_norm": 0.3551347553730011, "learning_rate": 5.829658959355202e-05, "loss": 0.0123, "step": 47460 }, { "epoch": 71.16941529235382, "grad_norm": 0.19141922891139984, "learning_rate": 5.8280283658704995e-05, "loss": 0.0086, "step": 47470 }, { "epoch": 71.18440779610195, "grad_norm": 0.13849976658821106, "learning_rate": 5.826397681833859e-05, "loss": 0.0077, "step": 47480 }, { "epoch": 71.19940029985007, "grad_norm": 0.1860930472612381, "learning_rate": 5.824766907423612e-05, "loss": 0.0091, "step": 47490 }, { "epoch": 71.2143928035982, "grad_norm": 0.19679996371269226, "learning_rate": 5.8231360428180945e-05, "loss": 0.0101, "step": 47500 }, { "epoch": 71.22938530734633, "grad_norm": 0.1962919384241104, "learning_rate": 5.821505088195658e-05, "loss": 0.0072, "step": 47510 }, { "epoch": 71.24437781109445, "grad_norm": 0.223564013838768, "learning_rate": 5.819874043734661e-05, "loss": 0.0108, "step": 47520 }, { "epoch": 71.25937031484258, "grad_norm": 0.13568346202373505, "learning_rate": 5.8182429096134695e-05, "loss": 0.009, "step": 47530 }, { "epoch": 71.2743628185907, "grad_norm": 0.2241826206445694, "learning_rate": 5.816611686010465e-05, "loss": 0.0072, "step": 47540 }, { "epoch": 71.28935532233884, "grad_norm": 0.24116317927837372, "learning_rate": 5.814980373104033e-05, "loss": 0.0103, "step": 47550 }, { "epoch": 71.30434782608695, "grad_norm": 0.28616002202033997, "learning_rate": 5.813348971072572e-05, "loss": 0.0081, "step": 47560 }, { "epoch": 71.31934032983509, "grad_norm": 0.3230336904525757, "learning_rate": 5.811717480094492e-05, "loss": 0.0067, "step": 47570 }, { "epoch": 71.3343328335832, "grad_norm": 0.3973793685436249, "learning_rate": 5.810085900348209e-05, "loss": 0.0098, "step": 47580 }, { "epoch": 71.34932533733134, "grad_norm": 0.17354951798915863, "learning_rate": 5.8084542320121483e-05, "loss": 0.0055, "step": 47590 }, { "epoch": 71.36431784107945, "grad_norm": 0.26724889874458313, "learning_rate": 5.8068224752647497e-05, "loss": 0.0075, "step": 47600 }, { "epoch": 71.37931034482759, "grad_norm": 0.20100794732570648, "learning_rate": 5.805190630284457e-05, "loss": 0.0091, "step": 47610 }, { "epoch": 71.3943028485757, "grad_norm": 0.2937687933444977, "learning_rate": 5.803558697249729e-05, "loss": 0.0083, "step": 47620 }, { "epoch": 71.40929535232384, "grad_norm": 0.24992212653160095, "learning_rate": 5.8019266763390276e-05, "loss": 0.0101, "step": 47630 }, { "epoch": 71.42428785607197, "grad_norm": 0.17679578065872192, "learning_rate": 5.800294567730833e-05, "loss": 0.0088, "step": 47640 }, { "epoch": 71.43928035982009, "grad_norm": 0.34321126341819763, "learning_rate": 5.798662371603628e-05, "loss": 0.0091, "step": 47650 }, { "epoch": 71.45427286356822, "grad_norm": 0.16202780604362488, "learning_rate": 5.797030088135904e-05, "loss": 0.0081, "step": 47660 }, { "epoch": 71.46926536731634, "grad_norm": 0.12902311980724335, "learning_rate": 5.7953977175061705e-05, "loss": 0.0101, "step": 47670 }, { "epoch": 71.48425787106447, "grad_norm": 0.14878323674201965, "learning_rate": 5.793765259892938e-05, "loss": 0.0082, "step": 47680 }, { "epoch": 71.49925037481259, "grad_norm": 0.23097416758537292, "learning_rate": 5.792132715474729e-05, "loss": 0.0072, "step": 47690 }, { "epoch": 71.51424287856072, "grad_norm": 0.28434714674949646, "learning_rate": 5.790500084430078e-05, "loss": 0.0118, "step": 47700 }, { "epoch": 71.52923538230884, "grad_norm": 0.11263742297887802, "learning_rate": 5.7888673669375274e-05, "loss": 0.0078, "step": 47710 }, { "epoch": 71.54422788605697, "grad_norm": 0.16310182213783264, "learning_rate": 5.787234563175625e-05, "loss": 0.01, "step": 47720 }, { "epoch": 71.55922038980509, "grad_norm": 0.21627259254455566, "learning_rate": 5.7856016733229355e-05, "loss": 0.009, "step": 47730 }, { "epoch": 71.57421289355322, "grad_norm": 0.15087732672691345, "learning_rate": 5.7839686975580297e-05, "loss": 0.0065, "step": 47740 }, { "epoch": 71.58920539730136, "grad_norm": 0.24906933307647705, "learning_rate": 5.782335636059484e-05, "loss": 0.0083, "step": 47750 }, { "epoch": 71.60419790104947, "grad_norm": 0.13184195756912231, "learning_rate": 5.780702489005889e-05, "loss": 0.0072, "step": 47760 }, { "epoch": 71.6191904047976, "grad_norm": 0.1133052408695221, "learning_rate": 5.779069256575845e-05, "loss": 0.0071, "step": 47770 }, { "epoch": 71.63418290854572, "grad_norm": 0.1899164915084839, "learning_rate": 5.7774359389479574e-05, "loss": 0.0082, "step": 47780 }, { "epoch": 71.64917541229386, "grad_norm": 0.13063974678516388, "learning_rate": 5.775802536300845e-05, "loss": 0.0075, "step": 47790 }, { "epoch": 71.66416791604198, "grad_norm": 0.3873598277568817, "learning_rate": 5.774169048813134e-05, "loss": 0.0085, "step": 47800 }, { "epoch": 71.67916041979011, "grad_norm": 0.22691112756729126, "learning_rate": 5.77253547666346e-05, "loss": 0.0086, "step": 47810 }, { "epoch": 71.69415292353823, "grad_norm": 0.18136730790138245, "learning_rate": 5.770901820030465e-05, "loss": 0.0077, "step": 47820 }, { "epoch": 71.70914542728636, "grad_norm": 0.17701396346092224, "learning_rate": 5.769268079092809e-05, "loss": 0.0071, "step": 47830 }, { "epoch": 71.72413793103448, "grad_norm": 0.17598141729831696, "learning_rate": 5.767634254029151e-05, "loss": 0.007, "step": 47840 }, { "epoch": 71.73913043478261, "grad_norm": 0.21141161024570465, "learning_rate": 5.7660003450181655e-05, "loss": 0.0074, "step": 47850 }, { "epoch": 71.75412293853074, "grad_norm": 0.24000009894371033, "learning_rate": 5.764366352238534e-05, "loss": 0.0059, "step": 47860 }, { "epoch": 71.76911544227886, "grad_norm": 0.16833776235580444, "learning_rate": 5.7627322758689474e-05, "loss": 0.011, "step": 47870 }, { "epoch": 71.78410794602699, "grad_norm": 0.21777603030204773, "learning_rate": 5.761098116088105e-05, "loss": 0.0093, "step": 47880 }, { "epoch": 71.79910044977511, "grad_norm": 0.1954641342163086, "learning_rate": 5.759463873074717e-05, "loss": 0.0073, "step": 47890 }, { "epoch": 71.81409295352324, "grad_norm": 0.2755787968635559, "learning_rate": 5.757829547007504e-05, "loss": 0.0084, "step": 47900 }, { "epoch": 71.82908545727136, "grad_norm": 0.16875427961349487, "learning_rate": 5.756195138065189e-05, "loss": 0.009, "step": 47910 }, { "epoch": 71.8440779610195, "grad_norm": 0.16622695326805115, "learning_rate": 5.754560646426511e-05, "loss": 0.009, "step": 47920 }, { "epoch": 71.85907046476761, "grad_norm": 0.15995663404464722, "learning_rate": 5.752926072270216e-05, "loss": 0.0087, "step": 47930 }, { "epoch": 71.87406296851574, "grad_norm": 0.5903404951095581, "learning_rate": 5.7512914157750563e-05, "loss": 0.0114, "step": 47940 }, { "epoch": 71.88905547226386, "grad_norm": 0.5184208750724792, "learning_rate": 5.749656677119798e-05, "loss": 0.0081, "step": 47950 }, { "epoch": 71.904047976012, "grad_norm": 0.1828460544347763, "learning_rate": 5.7480218564832125e-05, "loss": 0.0096, "step": 47960 }, { "epoch": 71.91904047976011, "grad_norm": 0.18785366415977478, "learning_rate": 5.746386954044082e-05, "loss": 0.009, "step": 47970 }, { "epoch": 71.93403298350825, "grad_norm": 0.2227935492992401, "learning_rate": 5.744751969981195e-05, "loss": 0.0095, "step": 47980 }, { "epoch": 71.94902548725638, "grad_norm": 0.2682977020740509, "learning_rate": 5.7431169044733526e-05, "loss": 0.0087, "step": 47990 }, { "epoch": 71.9640179910045, "grad_norm": 0.21541263163089752, "learning_rate": 5.741481757699364e-05, "loss": 0.007, "step": 48000 }, { "epoch": 71.97901049475263, "grad_norm": 0.172296404838562, "learning_rate": 5.7398465298380434e-05, "loss": 0.009, "step": 48010 }, { "epoch": 71.99400299850075, "grad_norm": 0.2670452296733856, "learning_rate": 5.7382112210682193e-05, "loss": 0.0111, "step": 48020 }, { "epoch": 72.00899550224888, "grad_norm": 0.2391091138124466, "learning_rate": 5.7365758315687266e-05, "loss": 0.0082, "step": 48030 }, { "epoch": 72.023988005997, "grad_norm": 0.20972129702568054, "learning_rate": 5.734940361518407e-05, "loss": 0.0081, "step": 48040 }, { "epoch": 72.03898050974513, "grad_norm": 0.23595935106277466, "learning_rate": 5.733304811096116e-05, "loss": 0.0079, "step": 48050 }, { "epoch": 72.05397301349325, "grad_norm": 0.2131650745868683, "learning_rate": 5.731669180480713e-05, "loss": 0.0076, "step": 48060 }, { "epoch": 72.06896551724138, "grad_norm": 0.1698208749294281, "learning_rate": 5.730033469851067e-05, "loss": 0.006, "step": 48070 }, { "epoch": 72.0839580209895, "grad_norm": 0.09981311857700348, "learning_rate": 5.7283976793860607e-05, "loss": 0.0056, "step": 48080 }, { "epoch": 72.09895052473763, "grad_norm": 1.3783453702926636, "learning_rate": 5.726761809264577e-05, "loss": 0.0067, "step": 48090 }, { "epoch": 72.11394302848576, "grad_norm": 0.9491590261459351, "learning_rate": 5.7251258596655155e-05, "loss": 0.0086, "step": 48100 }, { "epoch": 72.12893553223388, "grad_norm": 0.19754871726036072, "learning_rate": 5.72348983076778e-05, "loss": 0.0077, "step": 48110 }, { "epoch": 72.14392803598201, "grad_norm": 0.2407853901386261, "learning_rate": 5.7218537227502854e-05, "loss": 0.0089, "step": 48120 }, { "epoch": 72.15892053973013, "grad_norm": 0.4087379574775696, "learning_rate": 5.720217535791951e-05, "loss": 0.0098, "step": 48130 }, { "epoch": 72.17391304347827, "grad_norm": 0.32011616230010986, "learning_rate": 5.718581270071711e-05, "loss": 0.0094, "step": 48140 }, { "epoch": 72.18890554722638, "grad_norm": 0.14531780779361725, "learning_rate": 5.716944925768505e-05, "loss": 0.0069, "step": 48150 }, { "epoch": 72.20389805097452, "grad_norm": 0.20751173794269562, "learning_rate": 5.7153085030612786e-05, "loss": 0.0087, "step": 48160 }, { "epoch": 72.21889055472263, "grad_norm": 0.12686431407928467, "learning_rate": 5.713672002128989e-05, "loss": 0.0086, "step": 48170 }, { "epoch": 72.23388305847077, "grad_norm": 0.2626514434814453, "learning_rate": 5.712035423150602e-05, "loss": 0.0087, "step": 48180 }, { "epoch": 72.24887556221888, "grad_norm": 0.28086307644844055, "learning_rate": 5.710398766305094e-05, "loss": 0.0093, "step": 48190 }, { "epoch": 72.26386806596702, "grad_norm": 0.2599424719810486, "learning_rate": 5.708762031771442e-05, "loss": 0.0098, "step": 48200 }, { "epoch": 72.27886056971514, "grad_norm": 0.14574937522411346, "learning_rate": 5.707125219728642e-05, "loss": 0.0099, "step": 48210 }, { "epoch": 72.29385307346327, "grad_norm": 0.2401014119386673, "learning_rate": 5.7054883303556905e-05, "loss": 0.0093, "step": 48220 }, { "epoch": 72.3088455772114, "grad_norm": 0.1305832415819168, "learning_rate": 5.703851363831595e-05, "loss": 0.0097, "step": 48230 }, { "epoch": 72.32383808095952, "grad_norm": 0.15597781538963318, "learning_rate": 5.702214320335374e-05, "loss": 0.0073, "step": 48240 }, { "epoch": 72.33883058470765, "grad_norm": 0.1547631025314331, "learning_rate": 5.7005772000460514e-05, "loss": 0.0083, "step": 48250 }, { "epoch": 72.35382308845577, "grad_norm": 0.1784517616033554, "learning_rate": 5.698940003142656e-05, "loss": 0.0057, "step": 48260 }, { "epoch": 72.3688155922039, "grad_norm": 0.15379369258880615, "learning_rate": 5.697302729804236e-05, "loss": 0.0093, "step": 48270 }, { "epoch": 72.38380809595202, "grad_norm": 0.1818801760673523, "learning_rate": 5.695665380209837e-05, "loss": 0.0092, "step": 48280 }, { "epoch": 72.39880059970015, "grad_norm": 0.19409528374671936, "learning_rate": 5.6940279545385165e-05, "loss": 0.0074, "step": 48290 }, { "epoch": 72.41379310344827, "grad_norm": 0.1835193634033203, "learning_rate": 5.692390452969344e-05, "loss": 0.0078, "step": 48300 }, { "epoch": 72.4287856071964, "grad_norm": 0.19453011453151703, "learning_rate": 5.690752875681392e-05, "loss": 0.0075, "step": 48310 }, { "epoch": 72.44377811094452, "grad_norm": 0.25833144783973694, "learning_rate": 5.6891152228537435e-05, "loss": 0.0151, "step": 48320 }, { "epoch": 72.45877061469265, "grad_norm": 0.1670675277709961, "learning_rate": 5.687477494665492e-05, "loss": 0.0073, "step": 48330 }, { "epoch": 72.47376311844079, "grad_norm": 0.2951676547527313, "learning_rate": 5.685839691295733e-05, "loss": 0.0112, "step": 48340 }, { "epoch": 72.4887556221889, "grad_norm": 0.17148274183273315, "learning_rate": 5.6842018129235786e-05, "loss": 0.0079, "step": 48350 }, { "epoch": 72.50374812593704, "grad_norm": 0.23680050671100616, "learning_rate": 5.6825638597281404e-05, "loss": 0.0059, "step": 48360 }, { "epoch": 72.51874062968515, "grad_norm": 0.13721397519111633, "learning_rate": 5.680925831888546e-05, "loss": 0.0096, "step": 48370 }, { "epoch": 72.53373313343329, "grad_norm": 0.14905992150306702, "learning_rate": 5.6792877295839274e-05, "loss": 0.0066, "step": 48380 }, { "epoch": 72.5487256371814, "grad_norm": 0.20419110357761383, "learning_rate": 5.6776495529934224e-05, "loss": 0.0095, "step": 48390 }, { "epoch": 72.56371814092954, "grad_norm": 0.22976239025592804, "learning_rate": 5.6760113022961824e-05, "loss": 0.0111, "step": 48400 }, { "epoch": 72.57871064467766, "grad_norm": 0.1760520040988922, "learning_rate": 5.6743729776713617e-05, "loss": 0.01, "step": 48410 }, { "epoch": 72.59370314842579, "grad_norm": 0.16951341927051544, "learning_rate": 5.672734579298126e-05, "loss": 0.0072, "step": 48420 }, { "epoch": 72.6086956521739, "grad_norm": 0.14857201278209686, "learning_rate": 5.671096107355649e-05, "loss": 0.0092, "step": 48430 }, { "epoch": 72.62368815592204, "grad_norm": 0.14702536165714264, "learning_rate": 5.66945756202311e-05, "loss": 0.0072, "step": 48440 }, { "epoch": 72.63868065967017, "grad_norm": 0.2700873017311096, "learning_rate": 5.667818943479699e-05, "loss": 0.009, "step": 48450 }, { "epoch": 72.65367316341829, "grad_norm": 0.29960232973098755, "learning_rate": 5.666180251904612e-05, "loss": 0.0079, "step": 48460 }, { "epoch": 72.66866566716642, "grad_norm": 0.2337082177400589, "learning_rate": 5.6645414874770555e-05, "loss": 0.007, "step": 48470 }, { "epoch": 72.68365817091454, "grad_norm": 0.17551814019680023, "learning_rate": 5.66290265037624e-05, "loss": 0.0081, "step": 48480 }, { "epoch": 72.69865067466267, "grad_norm": 0.2795223593711853, "learning_rate": 5.661263740781386e-05, "loss": 0.0069, "step": 48490 }, { "epoch": 72.71364317841079, "grad_norm": 0.25419268012046814, "learning_rate": 5.6596247588717254e-05, "loss": 0.0081, "step": 48500 }, { "epoch": 72.72863568215892, "grad_norm": 0.23237359523773193, "learning_rate": 5.6579857048264926e-05, "loss": 0.0101, "step": 48510 }, { "epoch": 72.74362818590704, "grad_norm": 0.2605423629283905, "learning_rate": 5.6563465788249314e-05, "loss": 0.0085, "step": 48520 }, { "epoch": 72.75862068965517, "grad_norm": 0.20888397097587585, "learning_rate": 5.6547073810462956e-05, "loss": 0.0075, "step": 48530 }, { "epoch": 72.77361319340329, "grad_norm": 0.17863325774669647, "learning_rate": 5.653068111669846e-05, "loss": 0.0096, "step": 48540 }, { "epoch": 72.78860569715143, "grad_norm": 0.18145231902599335, "learning_rate": 5.651428770874848e-05, "loss": 0.0085, "step": 48550 }, { "epoch": 72.80359820089954, "grad_norm": 0.21324694156646729, "learning_rate": 5.64978935884058e-05, "loss": 0.0078, "step": 48560 }, { "epoch": 72.81859070464768, "grad_norm": 0.19497473537921906, "learning_rate": 5.6481498757463244e-05, "loss": 0.0081, "step": 48570 }, { "epoch": 72.83358320839581, "grad_norm": 0.2315046638250351, "learning_rate": 5.646510321771373e-05, "loss": 0.0073, "step": 48580 }, { "epoch": 72.84857571214393, "grad_norm": 0.21730129420757294, "learning_rate": 5.644870697095024e-05, "loss": 0.0085, "step": 48590 }, { "epoch": 72.86356821589206, "grad_norm": 0.2602124512195587, "learning_rate": 5.643231001896586e-05, "loss": 0.0131, "step": 48600 }, { "epoch": 72.87856071964018, "grad_norm": 0.1932385116815567, "learning_rate": 5.6415912363553726e-05, "loss": 0.0103, "step": 48610 }, { "epoch": 72.89355322338831, "grad_norm": 0.6753186583518982, "learning_rate": 5.639951400650706e-05, "loss": 0.0135, "step": 48620 }, { "epoch": 72.90854572713643, "grad_norm": 0.1883881539106369, "learning_rate": 5.6383114949619165e-05, "loss": 0.0076, "step": 48630 }, { "epoch": 72.92353823088456, "grad_norm": 0.3521372377872467, "learning_rate": 5.636671519468342e-05, "loss": 0.0112, "step": 48640 }, { "epoch": 72.93853073463268, "grad_norm": 0.15034741163253784, "learning_rate": 5.635031474349327e-05, "loss": 0.0068, "step": 48650 }, { "epoch": 72.95352323838081, "grad_norm": 0.15654084086418152, "learning_rate": 5.6333913597842246e-05, "loss": 0.0086, "step": 48660 }, { "epoch": 72.96851574212893, "grad_norm": 0.1922297477722168, "learning_rate": 5.6317511759523955e-05, "loss": 0.0096, "step": 48670 }, { "epoch": 72.98350824587706, "grad_norm": 0.21129228174686432, "learning_rate": 5.630110923033207e-05, "loss": 0.0092, "step": 48680 }, { "epoch": 72.9985007496252, "grad_norm": 0.23436768352985382, "learning_rate": 5.628470601206036e-05, "loss": 0.0091, "step": 48690 }, { "epoch": 73.01349325337331, "grad_norm": 0.2460079938173294, "learning_rate": 5.626830210650266e-05, "loss": 0.0111, "step": 48700 }, { "epoch": 73.02848575712144, "grad_norm": 0.2622836232185364, "learning_rate": 5.625189751545285e-05, "loss": 0.009, "step": 48710 }, { "epoch": 73.04347826086956, "grad_norm": 0.13092991709709167, "learning_rate": 5.6235492240704936e-05, "loss": 0.0077, "step": 48720 }, { "epoch": 73.0584707646177, "grad_norm": 0.1778021901845932, "learning_rate": 5.621908628405296e-05, "loss": 0.0084, "step": 48730 }, { "epoch": 73.07346326836581, "grad_norm": 0.17477107048034668, "learning_rate": 5.620267964729106e-05, "loss": 0.0104, "step": 48740 }, { "epoch": 73.08845577211395, "grad_norm": 0.19435545802116394, "learning_rate": 5.618627233221344e-05, "loss": 0.0085, "step": 48750 }, { "epoch": 73.10344827586206, "grad_norm": 0.215741366147995, "learning_rate": 5.616986434061438e-05, "loss": 0.0073, "step": 48760 }, { "epoch": 73.1184407796102, "grad_norm": 0.1358271837234497, "learning_rate": 5.615345567428822e-05, "loss": 0.0068, "step": 48770 }, { "epoch": 73.13343328335831, "grad_norm": 0.23133869469165802, "learning_rate": 5.613704633502941e-05, "loss": 0.0073, "step": 48780 }, { "epoch": 73.14842578710645, "grad_norm": 0.12414601445198059, "learning_rate": 5.612063632463245e-05, "loss": 0.0076, "step": 48790 }, { "epoch": 73.16341829085458, "grad_norm": 0.1783679574728012, "learning_rate": 5.610422564489188e-05, "loss": 0.0091, "step": 48800 }, { "epoch": 73.1784107946027, "grad_norm": 0.21775364875793457, "learning_rate": 5.608781429760239e-05, "loss": 0.0114, "step": 48810 }, { "epoch": 73.19340329835083, "grad_norm": 0.20674094557762146, "learning_rate": 5.607140228455866e-05, "loss": 0.0111, "step": 48820 }, { "epoch": 73.20839580209895, "grad_norm": 0.2501046359539032, "learning_rate": 5.605498960755553e-05, "loss": 0.0087, "step": 48830 }, { "epoch": 73.22338830584708, "grad_norm": 0.14065435528755188, "learning_rate": 5.603857626838782e-05, "loss": 0.0087, "step": 48840 }, { "epoch": 73.2383808095952, "grad_norm": 0.18043214082717896, "learning_rate": 5.60221622688505e-05, "loss": 0.0092, "step": 48850 }, { "epoch": 73.25337331334333, "grad_norm": 0.3167119324207306, "learning_rate": 5.6005747610738565e-05, "loss": 0.0075, "step": 48860 }, { "epoch": 73.26836581709145, "grad_norm": 0.26986685395240784, "learning_rate": 5.59893322958471e-05, "loss": 0.0089, "step": 48870 }, { "epoch": 73.28335832083958, "grad_norm": 0.24743029475212097, "learning_rate": 5.5972916325971256e-05, "loss": 0.012, "step": 48880 }, { "epoch": 73.2983508245877, "grad_norm": 0.42231547832489014, "learning_rate": 5.595649970290628e-05, "loss": 0.0116, "step": 48890 }, { "epoch": 73.31334332833583, "grad_norm": 0.2751283347606659, "learning_rate": 5.5940082428447426e-05, "loss": 0.0103, "step": 48900 }, { "epoch": 73.32833583208395, "grad_norm": 0.15503130853176117, "learning_rate": 5.592366450439012e-05, "loss": 0.0099, "step": 48910 }, { "epoch": 73.34332833583208, "grad_norm": 0.2956176698207855, "learning_rate": 5.590724593252975e-05, "loss": 0.0079, "step": 48920 }, { "epoch": 73.35832083958022, "grad_norm": 0.2255377322435379, "learning_rate": 5.589082671466184e-05, "loss": 0.0079, "step": 48930 }, { "epoch": 73.37331334332833, "grad_norm": 0.1981758028268814, "learning_rate": 5.587440685258199e-05, "loss": 0.0075, "step": 48940 }, { "epoch": 73.38830584707647, "grad_norm": 0.18378916382789612, "learning_rate": 5.585798634808583e-05, "loss": 0.0092, "step": 48950 }, { "epoch": 73.40329835082458, "grad_norm": 0.21032539010047913, "learning_rate": 5.584156520296909e-05, "loss": 0.0103, "step": 48960 }, { "epoch": 73.41829085457272, "grad_norm": 0.2142455279827118, "learning_rate": 5.582514341902757e-05, "loss": 0.0093, "step": 48970 }, { "epoch": 73.43328335832084, "grad_norm": 0.3143920600414276, "learning_rate": 5.580872099805713e-05, "loss": 0.0077, "step": 48980 }, { "epoch": 73.44827586206897, "grad_norm": 0.2265474796295166, "learning_rate": 5.5792297941853674e-05, "loss": 0.007, "step": 48990 }, { "epoch": 73.46326836581709, "grad_norm": 0.1930873990058899, "learning_rate": 5.5775874252213247e-05, "loss": 0.0094, "step": 49000 }, { "epoch": 73.47826086956522, "grad_norm": 0.22022151947021484, "learning_rate": 5.575944993093189e-05, "loss": 0.0078, "step": 49010 }, { "epoch": 73.49325337331334, "grad_norm": 0.2380155771970749, "learning_rate": 5.574302497980574e-05, "loss": 0.0083, "step": 49020 }, { "epoch": 73.50824587706147, "grad_norm": 0.34197187423706055, "learning_rate": 5.5726599400631005e-05, "loss": 0.0079, "step": 49030 }, { "epoch": 73.5232383808096, "grad_norm": 0.12685531377792358, "learning_rate": 5.571017319520399e-05, "loss": 0.0071, "step": 49040 }, { "epoch": 73.53823088455772, "grad_norm": 0.20723354816436768, "learning_rate": 5.5693746365320984e-05, "loss": 0.008, "step": 49050 }, { "epoch": 73.55322338830585, "grad_norm": 0.34822723269462585, "learning_rate": 5.567731891277848e-05, "loss": 0.0089, "step": 49060 }, { "epoch": 73.56821589205397, "grad_norm": 0.29309868812561035, "learning_rate": 5.566089083937288e-05, "loss": 0.0083, "step": 49070 }, { "epoch": 73.5832083958021, "grad_norm": 0.23855651915073395, "learning_rate": 5.564446214690079e-05, "loss": 0.0083, "step": 49080 }, { "epoch": 73.59820089955022, "grad_norm": 0.225834921002388, "learning_rate": 5.56280328371588e-05, "loss": 0.0059, "step": 49090 }, { "epoch": 73.61319340329835, "grad_norm": 0.22987312078475952, "learning_rate": 5.5611602911943596e-05, "loss": 0.0101, "step": 49100 }, { "epoch": 73.62818590704647, "grad_norm": 0.13202685117721558, "learning_rate": 5.559517237305194e-05, "loss": 0.0085, "step": 49110 }, { "epoch": 73.6431784107946, "grad_norm": 0.20537742972373962, "learning_rate": 5.557874122228064e-05, "loss": 0.0062, "step": 49120 }, { "epoch": 73.65817091454272, "grad_norm": 0.1634889841079712, "learning_rate": 5.55623094614266e-05, "loss": 0.0099, "step": 49130 }, { "epoch": 73.67316341829086, "grad_norm": 0.14736407995224, "learning_rate": 5.5545877092286743e-05, "loss": 0.0074, "step": 49140 }, { "epoch": 73.68815592203899, "grad_norm": 0.14899003505706787, "learning_rate": 5.552944411665811e-05, "loss": 0.007, "step": 49150 }, { "epoch": 73.7031484257871, "grad_norm": 0.15315157175064087, "learning_rate": 5.5513010536337795e-05, "loss": 0.0088, "step": 49160 }, { "epoch": 73.71814092953524, "grad_norm": 0.22751390933990479, "learning_rate": 5.5496576353122934e-05, "loss": 0.0101, "step": 49170 }, { "epoch": 73.73313343328336, "grad_norm": 0.27148959040641785, "learning_rate": 5.548014156881074e-05, "loss": 0.0081, "step": 49180 }, { "epoch": 73.74812593703149, "grad_norm": 0.26398977637290955, "learning_rate": 5.5463706185198494e-05, "loss": 0.0068, "step": 49190 }, { "epoch": 73.7631184407796, "grad_norm": 0.22959189116954803, "learning_rate": 5.544727020408358e-05, "loss": 0.0077, "step": 49200 }, { "epoch": 73.77811094452774, "grad_norm": 0.8541477918624878, "learning_rate": 5.5430833627263366e-05, "loss": 0.0094, "step": 49210 }, { "epoch": 73.79310344827586, "grad_norm": 0.20449967682361603, "learning_rate": 5.5414396456535354e-05, "loss": 0.0066, "step": 49220 }, { "epoch": 73.80809595202399, "grad_norm": 0.3620331883430481, "learning_rate": 5.539795869369709e-05, "loss": 0.0084, "step": 49230 }, { "epoch": 73.82308845577211, "grad_norm": 0.1785815805196762, "learning_rate": 5.538152034054618e-05, "loss": 0.0079, "step": 49240 }, { "epoch": 73.83808095952024, "grad_norm": 0.24976083636283875, "learning_rate": 5.536508139888028e-05, "loss": 0.0104, "step": 49250 }, { "epoch": 73.85307346326836, "grad_norm": 0.38219934701919556, "learning_rate": 5.534864187049716e-05, "loss": 0.0075, "step": 49260 }, { "epoch": 73.86806596701649, "grad_norm": 0.16291983425617218, "learning_rate": 5.5332201757194615e-05, "loss": 0.0077, "step": 49270 }, { "epoch": 73.88305847076462, "grad_norm": 0.18015684187412262, "learning_rate": 5.531576106077049e-05, "loss": 0.0079, "step": 49280 }, { "epoch": 73.89805097451274, "grad_norm": 0.17828941345214844, "learning_rate": 5.529931978302272e-05, "loss": 0.0072, "step": 49290 }, { "epoch": 73.91304347826087, "grad_norm": 0.27066779136657715, "learning_rate": 5.528287792574932e-05, "loss": 0.0067, "step": 49300 }, { "epoch": 73.92803598200899, "grad_norm": 0.15262077748775482, "learning_rate": 5.5266435490748324e-05, "loss": 0.0074, "step": 49310 }, { "epoch": 73.94302848575713, "grad_norm": 0.22831612825393677, "learning_rate": 5.524999247981787e-05, "loss": 0.0115, "step": 49320 }, { "epoch": 73.95802098950524, "grad_norm": 0.23410749435424805, "learning_rate": 5.523354889475613e-05, "loss": 0.0085, "step": 49330 }, { "epoch": 73.97301349325338, "grad_norm": 0.1700093150138855, "learning_rate": 5.521710473736134e-05, "loss": 0.0093, "step": 49340 }, { "epoch": 73.9880059970015, "grad_norm": 0.35515841841697693, "learning_rate": 5.5200660009431826e-05, "loss": 0.0064, "step": 49350 }, { "epoch": 74.00299850074963, "grad_norm": 0.14198482036590576, "learning_rate": 5.518421471276596e-05, "loss": 0.0087, "step": 49360 }, { "epoch": 74.01799100449774, "grad_norm": 0.1720486730337143, "learning_rate": 5.5167768849162174e-05, "loss": 0.0061, "step": 49370 }, { "epoch": 74.03298350824588, "grad_norm": 0.16796539723873138, "learning_rate": 5.515132242041893e-05, "loss": 0.0085, "step": 49380 }, { "epoch": 74.04797601199401, "grad_norm": 0.12603038549423218, "learning_rate": 5.513487542833483e-05, "loss": 0.0091, "step": 49390 }, { "epoch": 74.06296851574213, "grad_norm": 0.22619052231311798, "learning_rate": 5.5118427874708466e-05, "loss": 0.008, "step": 49400 }, { "epoch": 74.07796101949026, "grad_norm": 0.4389044940471649, "learning_rate": 5.510197976133853e-05, "loss": 0.0076, "step": 49410 }, { "epoch": 74.09295352323838, "grad_norm": 0.12761421501636505, "learning_rate": 5.508553109002376e-05, "loss": 0.0086, "step": 49420 }, { "epoch": 74.10794602698651, "grad_norm": 0.1740448921918869, "learning_rate": 5.5069081862562957e-05, "loss": 0.007, "step": 49430 }, { "epoch": 74.12293853073463, "grad_norm": 0.13840560615062714, "learning_rate": 5.5052632080754965e-05, "loss": 0.008, "step": 49440 }, { "epoch": 74.13793103448276, "grad_norm": 0.09757491201162338, "learning_rate": 5.503618174639874e-05, "loss": 0.0058, "step": 49450 }, { "epoch": 74.15292353823088, "grad_norm": 0.30646008253097534, "learning_rate": 5.501973086129325e-05, "loss": 0.0056, "step": 49460 }, { "epoch": 74.16791604197901, "grad_norm": 0.12860321998596191, "learning_rate": 5.500327942723753e-05, "loss": 0.0073, "step": 49470 }, { "epoch": 74.18290854572713, "grad_norm": 0.10180036723613739, "learning_rate": 5.498682744603071e-05, "loss": 0.0068, "step": 49480 }, { "epoch": 74.19790104947526, "grad_norm": 0.19102619588375092, "learning_rate": 5.497037491947194e-05, "loss": 0.0105, "step": 49490 }, { "epoch": 74.2128935532234, "grad_norm": 0.25972625613212585, "learning_rate": 5.4953921849360424e-05, "loss": 0.0084, "step": 49500 }, { "epoch": 74.22788605697151, "grad_norm": 0.21571595966815948, "learning_rate": 5.493746823749547e-05, "loss": 0.0072, "step": 49510 }, { "epoch": 74.24287856071965, "grad_norm": 0.18290643393993378, "learning_rate": 5.49210140856764e-05, "loss": 0.0062, "step": 49520 }, { "epoch": 74.25787106446776, "grad_norm": 0.11295374482870102, "learning_rate": 5.4904559395702647e-05, "loss": 0.0089, "step": 49530 }, { "epoch": 74.2728635682159, "grad_norm": 0.10808534175157547, "learning_rate": 5.488810416937364e-05, "loss": 0.0078, "step": 49540 }, { "epoch": 74.28785607196401, "grad_norm": 0.27394723892211914, "learning_rate": 5.4871648408488926e-05, "loss": 0.0118, "step": 49550 }, { "epoch": 74.30284857571215, "grad_norm": 0.4578370451927185, "learning_rate": 5.485519211484807e-05, "loss": 0.0087, "step": 49560 }, { "epoch": 74.31784107946027, "grad_norm": 0.2592524588108063, "learning_rate": 5.4838735290250676e-05, "loss": 0.0093, "step": 49570 }, { "epoch": 74.3328335832084, "grad_norm": 0.20820359885692596, "learning_rate": 5.482227793649649e-05, "loss": 0.009, "step": 49580 }, { "epoch": 74.34782608695652, "grad_norm": 0.19575437903404236, "learning_rate": 5.480582005538524e-05, "loss": 0.0093, "step": 49590 }, { "epoch": 74.36281859070465, "grad_norm": 0.14326339960098267, "learning_rate": 5.478936164871671e-05, "loss": 0.0087, "step": 49600 }, { "epoch": 74.37781109445277, "grad_norm": 0.2537511885166168, "learning_rate": 5.477290271829081e-05, "loss": 0.0085, "step": 49610 }, { "epoch": 74.3928035982009, "grad_norm": 0.23132221400737762, "learning_rate": 5.475644326590744e-05, "loss": 0.0059, "step": 49620 }, { "epoch": 74.40779610194903, "grad_norm": 0.37607696652412415, "learning_rate": 5.473998329336658e-05, "loss": 0.0123, "step": 49630 }, { "epoch": 74.42278860569715, "grad_norm": 0.19951379299163818, "learning_rate": 5.4723522802468286e-05, "loss": 0.0065, "step": 49640 }, { "epoch": 74.43778110944528, "grad_norm": 0.37457799911499023, "learning_rate": 5.470706179501264e-05, "loss": 0.0094, "step": 49650 }, { "epoch": 74.4527736131934, "grad_norm": 0.2816176414489746, "learning_rate": 5.4690600272799776e-05, "loss": 0.0077, "step": 49660 }, { "epoch": 74.46776611694153, "grad_norm": 0.2383369356393814, "learning_rate": 5.467413823762993e-05, "loss": 0.0073, "step": 49670 }, { "epoch": 74.48275862068965, "grad_norm": 0.1881512701511383, "learning_rate": 5.465767569130335e-05, "loss": 0.0063, "step": 49680 }, { "epoch": 74.49775112443778, "grad_norm": 0.24681897461414337, "learning_rate": 5.464121263562036e-05, "loss": 0.0076, "step": 49690 }, { "epoch": 74.5127436281859, "grad_norm": 0.18122874200344086, "learning_rate": 5.4624749072381343e-05, "loss": 0.0073, "step": 49700 }, { "epoch": 74.52773613193403, "grad_norm": 0.12836629152297974, "learning_rate": 5.460828500338672e-05, "loss": 0.0067, "step": 49710 }, { "epoch": 74.54272863568215, "grad_norm": 0.2561025619506836, "learning_rate": 5.459182043043698e-05, "loss": 0.0121, "step": 49720 }, { "epoch": 74.55772113943029, "grad_norm": 0.22780254483222961, "learning_rate": 5.457535535533265e-05, "loss": 0.0113, "step": 49730 }, { "epoch": 74.57271364317842, "grad_norm": 0.2654738426208496, "learning_rate": 5.4558889779874334e-05, "loss": 0.0066, "step": 49740 }, { "epoch": 74.58770614692654, "grad_norm": 0.21807588636875153, "learning_rate": 5.454242370586269e-05, "loss": 0.0092, "step": 49750 }, { "epoch": 74.60269865067467, "grad_norm": 0.2181917130947113, "learning_rate": 5.452595713509843e-05, "loss": 0.0101, "step": 49760 }, { "epoch": 74.61769115442279, "grad_norm": 0.21809792518615723, "learning_rate": 5.4509490069382274e-05, "loss": 0.0077, "step": 49770 }, { "epoch": 74.63268365817092, "grad_norm": 0.2870003581047058, "learning_rate": 5.449302251051509e-05, "loss": 0.0104, "step": 49780 }, { "epoch": 74.64767616191904, "grad_norm": 0.13203908503055573, "learning_rate": 5.44765544602977e-05, "loss": 0.0074, "step": 49790 }, { "epoch": 74.66266866566717, "grad_norm": 0.19350963830947876, "learning_rate": 5.446008592053107e-05, "loss": 0.0082, "step": 49800 }, { "epoch": 74.67766116941529, "grad_norm": 0.30193811655044556, "learning_rate": 5.4443616893016135e-05, "loss": 0.0074, "step": 49810 }, { "epoch": 74.69265367316342, "grad_norm": 0.32124024629592896, "learning_rate": 5.4427147379553934e-05, "loss": 0.0126, "step": 49820 }, { "epoch": 74.70764617691154, "grad_norm": 0.1784159243106842, "learning_rate": 5.441067738194556e-05, "loss": 0.0076, "step": 49830 }, { "epoch": 74.72263868065967, "grad_norm": 0.18137721717357635, "learning_rate": 5.439420690199214e-05, "loss": 0.009, "step": 49840 }, { "epoch": 74.7376311844078, "grad_norm": 0.15699924528598785, "learning_rate": 5.437773594149484e-05, "loss": 0.0104, "step": 49850 }, { "epoch": 74.75262368815592, "grad_norm": 0.1960747390985489, "learning_rate": 5.436126450225495e-05, "loss": 0.0112, "step": 49860 }, { "epoch": 74.76761619190405, "grad_norm": 0.14615002274513245, "learning_rate": 5.434479258607371e-05, "loss": 0.0072, "step": 49870 }, { "epoch": 74.78260869565217, "grad_norm": 0.18943212926387787, "learning_rate": 5.43283201947525e-05, "loss": 0.0093, "step": 49880 }, { "epoch": 74.7976011994003, "grad_norm": 0.16573767364025116, "learning_rate": 5.431184733009268e-05, "loss": 0.0096, "step": 49890 }, { "epoch": 74.81259370314842, "grad_norm": 0.22615106403827667, "learning_rate": 5.4295373993895736e-05, "loss": 0.0071, "step": 49900 }, { "epoch": 74.82758620689656, "grad_norm": 0.33822736144065857, "learning_rate": 5.4278900187963157e-05, "loss": 0.0065, "step": 49910 }, { "epoch": 74.84257871064467, "grad_norm": 0.24116981029510498, "learning_rate": 5.426242591409646e-05, "loss": 0.0104, "step": 49920 }, { "epoch": 74.8575712143928, "grad_norm": 0.23884794116020203, "learning_rate": 5.42459511740973e-05, "loss": 0.0119, "step": 49930 }, { "epoch": 74.87256371814092, "grad_norm": 0.14889481663703918, "learning_rate": 5.422947596976729e-05, "loss": 0.007, "step": 49940 }, { "epoch": 74.88755622188906, "grad_norm": 0.1560920923948288, "learning_rate": 5.4213000302908134e-05, "loss": 0.0084, "step": 49950 }, { "epoch": 74.90254872563717, "grad_norm": 0.23886188864707947, "learning_rate": 5.419652417532162e-05, "loss": 0.0075, "step": 49960 }, { "epoch": 74.91754122938531, "grad_norm": 0.14238475263118744, "learning_rate": 5.4180047588809534e-05, "loss": 0.0059, "step": 49970 }, { "epoch": 74.93253373313344, "grad_norm": 0.1474340558052063, "learning_rate": 5.4163570545173704e-05, "loss": 0.0078, "step": 49980 }, { "epoch": 74.94752623688156, "grad_norm": 0.19910156726837158, "learning_rate": 5.414709304621608e-05, "loss": 0.0079, "step": 49990 }, { "epoch": 74.96251874062969, "grad_norm": 0.25300946831703186, "learning_rate": 5.413061509373858e-05, "loss": 0.0113, "step": 50000 }, { "epoch": 74.97751124437781, "grad_norm": 0.2751460373401642, "learning_rate": 5.411413668954324e-05, "loss": 0.0092, "step": 50010 }, { "epoch": 74.99250374812594, "grad_norm": 0.17237238585948944, "learning_rate": 5.4097657835432083e-05, "loss": 0.0071, "step": 50020 }, { "epoch": 75.00749625187406, "grad_norm": 0.20580564439296722, "learning_rate": 5.408117853320723e-05, "loss": 0.0065, "step": 50030 }, { "epoch": 75.02248875562219, "grad_norm": 0.17738230526447296, "learning_rate": 5.406469878467084e-05, "loss": 0.0078, "step": 50040 }, { "epoch": 75.03748125937031, "grad_norm": 0.2064322531223297, "learning_rate": 5.404821859162509e-05, "loss": 0.0079, "step": 50050 }, { "epoch": 75.05247376311844, "grad_norm": 0.29289552569389343, "learning_rate": 5.403173795587225e-05, "loss": 0.0069, "step": 50060 }, { "epoch": 75.06746626686656, "grad_norm": 0.1199192926287651, "learning_rate": 5.4015256879214606e-05, "loss": 0.0071, "step": 50070 }, { "epoch": 75.08245877061469, "grad_norm": 0.21462847292423248, "learning_rate": 5.39987753634545e-05, "loss": 0.009, "step": 50080 }, { "epoch": 75.09745127436283, "grad_norm": 0.13011686503887177, "learning_rate": 5.3982293410394335e-05, "loss": 0.0091, "step": 50090 }, { "epoch": 75.11244377811094, "grad_norm": 0.19688323140144348, "learning_rate": 5.396581102183655e-05, "loss": 0.008, "step": 50100 }, { "epoch": 75.12743628185908, "grad_norm": 0.2377600222826004, "learning_rate": 5.3949328199583634e-05, "loss": 0.0071, "step": 50110 }, { "epoch": 75.1424287856072, "grad_norm": 0.1516793817281723, "learning_rate": 5.393284494543813e-05, "loss": 0.007, "step": 50120 }, { "epoch": 75.15742128935533, "grad_norm": 0.350892037153244, "learning_rate": 5.391636126120262e-05, "loss": 0.0084, "step": 50130 }, { "epoch": 75.17241379310344, "grad_norm": 0.2825583815574646, "learning_rate": 5.389987714867971e-05, "loss": 0.0072, "step": 50140 }, { "epoch": 75.18740629685158, "grad_norm": 0.279250830411911, "learning_rate": 5.3883392609672114e-05, "loss": 0.0077, "step": 50150 }, { "epoch": 75.2023988005997, "grad_norm": 0.19921965897083282, "learning_rate": 5.3866907645982545e-05, "loss": 0.012, "step": 50160 }, { "epoch": 75.21739130434783, "grad_norm": 0.189363032579422, "learning_rate": 5.385042225941375e-05, "loss": 0.0093, "step": 50170 }, { "epoch": 75.23238380809595, "grad_norm": 0.23459173738956451, "learning_rate": 5.3833936451768585e-05, "loss": 0.0082, "step": 50180 }, { "epoch": 75.24737631184408, "grad_norm": 0.15955795347690582, "learning_rate": 5.3817450224849884e-05, "loss": 0.0069, "step": 50190 }, { "epoch": 75.2623688155922, "grad_norm": 0.21557852625846863, "learning_rate": 5.380096358046056e-05, "loss": 0.0063, "step": 50200 }, { "epoch": 75.27736131934033, "grad_norm": 0.37960052490234375, "learning_rate": 5.378447652040359e-05, "loss": 0.0091, "step": 50210 }, { "epoch": 75.29235382308846, "grad_norm": 0.1496538370847702, "learning_rate": 5.376798904648195e-05, "loss": 0.0104, "step": 50220 }, { "epoch": 75.30734632683658, "grad_norm": 0.15693500638008118, "learning_rate": 5.375150116049869e-05, "loss": 0.0088, "step": 50230 }, { "epoch": 75.32233883058471, "grad_norm": 0.18830102682113647, "learning_rate": 5.373501286425691e-05, "loss": 0.0074, "step": 50240 }, { "epoch": 75.33733133433283, "grad_norm": 0.14567629992961884, "learning_rate": 5.3718524159559726e-05, "loss": 0.0066, "step": 50250 }, { "epoch": 75.35232383808096, "grad_norm": 0.7112046480178833, "learning_rate": 5.370203504821034e-05, "loss": 0.0079, "step": 50260 }, { "epoch": 75.36731634182908, "grad_norm": 0.11411165446043015, "learning_rate": 5.368554553201196e-05, "loss": 0.0099, "step": 50270 }, { "epoch": 75.38230884557721, "grad_norm": 0.1821613609790802, "learning_rate": 5.3669055612767874e-05, "loss": 0.0083, "step": 50280 }, { "epoch": 75.39730134932533, "grad_norm": 0.19796526432037354, "learning_rate": 5.3652565292281376e-05, "loss": 0.0083, "step": 50290 }, { "epoch": 75.41229385307346, "grad_norm": 0.22525739669799805, "learning_rate": 5.3636074572355824e-05, "loss": 0.0117, "step": 50300 }, { "epoch": 75.42728635682158, "grad_norm": 0.2951323390007019, "learning_rate": 5.361958345479463e-05, "loss": 0.0068, "step": 50310 }, { "epoch": 75.44227886056971, "grad_norm": 0.18257233500480652, "learning_rate": 5.360309194140123e-05, "loss": 0.0089, "step": 50320 }, { "epoch": 75.45727136431785, "grad_norm": 0.20733752846717834, "learning_rate": 5.3586600033979106e-05, "loss": 0.0088, "step": 50330 }, { "epoch": 75.47226386806597, "grad_norm": 0.15366166830062866, "learning_rate": 5.357010773433181e-05, "loss": 0.008, "step": 50340 }, { "epoch": 75.4872563718141, "grad_norm": 0.1310654580593109, "learning_rate": 5.3553615044262895e-05, "loss": 0.0078, "step": 50350 }, { "epoch": 75.50224887556222, "grad_norm": 0.24552074074745178, "learning_rate": 5.3537121965575964e-05, "loss": 0.0065, "step": 50360 }, { "epoch": 75.51724137931035, "grad_norm": 0.14352639019489288, "learning_rate": 5.352062850007471e-05, "loss": 0.0103, "step": 50370 }, { "epoch": 75.53223388305847, "grad_norm": 0.14577528834342957, "learning_rate": 5.350413464956282e-05, "loss": 0.0087, "step": 50380 }, { "epoch": 75.5472263868066, "grad_norm": 0.21278589963912964, "learning_rate": 5.348764041584403e-05, "loss": 0.0074, "step": 50390 }, { "epoch": 75.56221889055472, "grad_norm": 0.10357683897018433, "learning_rate": 5.347114580072212e-05, "loss": 0.0076, "step": 50400 }, { "epoch": 75.57721139430285, "grad_norm": 0.26294639706611633, "learning_rate": 5.345465080600093e-05, "loss": 0.0062, "step": 50410 }, { "epoch": 75.59220389805097, "grad_norm": 0.19872410595417023, "learning_rate": 5.3438155433484326e-05, "loss": 0.0096, "step": 50420 }, { "epoch": 75.6071964017991, "grad_norm": 0.14566642045974731, "learning_rate": 5.3421659684976197e-05, "loss": 0.0085, "step": 50430 }, { "epoch": 75.62218890554723, "grad_norm": 0.30416610836982727, "learning_rate": 5.340516356228052e-05, "loss": 0.008, "step": 50440 }, { "epoch": 75.63718140929535, "grad_norm": 0.24079082906246185, "learning_rate": 5.338866706720128e-05, "loss": 0.0111, "step": 50450 }, { "epoch": 75.65217391304348, "grad_norm": 0.27706483006477356, "learning_rate": 5.337217020154249e-05, "loss": 0.0067, "step": 50460 }, { "epoch": 75.6671664167916, "grad_norm": 0.1333436518907547, "learning_rate": 5.335567296710825e-05, "loss": 0.0082, "step": 50470 }, { "epoch": 75.68215892053973, "grad_norm": 0.22238148748874664, "learning_rate": 5.333917536570265e-05, "loss": 0.008, "step": 50480 }, { "epoch": 75.69715142428785, "grad_norm": 0.17998641729354858, "learning_rate": 5.332267739912986e-05, "loss": 0.008, "step": 50490 }, { "epoch": 75.71214392803599, "grad_norm": 0.28837862610816956, "learning_rate": 5.330617906919405e-05, "loss": 0.0081, "step": 50500 }, { "epoch": 75.7271364317841, "grad_norm": 0.19318802654743195, "learning_rate": 5.328968037769949e-05, "loss": 0.008, "step": 50510 }, { "epoch": 75.74212893553224, "grad_norm": 0.24652129411697388, "learning_rate": 5.327318132645042e-05, "loss": 0.0089, "step": 50520 }, { "epoch": 75.75712143928035, "grad_norm": 0.26764506101608276, "learning_rate": 5.325668191725116e-05, "loss": 0.0063, "step": 50530 }, { "epoch": 75.77211394302849, "grad_norm": 0.16341811418533325, "learning_rate": 5.324018215190606e-05, "loss": 0.0111, "step": 50540 }, { "epoch": 75.7871064467766, "grad_norm": 0.18742090463638306, "learning_rate": 5.3223682032219515e-05, "loss": 0.008, "step": 50550 }, { "epoch": 75.80209895052474, "grad_norm": 0.37411871552467346, "learning_rate": 5.320718155999595e-05, "loss": 0.0063, "step": 50560 }, { "epoch": 75.81709145427287, "grad_norm": 0.20572397112846375, "learning_rate": 5.3190680737039835e-05, "loss": 0.0085, "step": 50570 }, { "epoch": 75.83208395802099, "grad_norm": 0.34723013639450073, "learning_rate": 5.317417956515567e-05, "loss": 0.0085, "step": 50580 }, { "epoch": 75.84707646176912, "grad_norm": 0.19022512435913086, "learning_rate": 5.315767804614798e-05, "loss": 0.0107, "step": 50590 }, { "epoch": 75.86206896551724, "grad_norm": 0.19784152507781982, "learning_rate": 5.3141176181821395e-05, "loss": 0.0062, "step": 50600 }, { "epoch": 75.87706146926537, "grad_norm": 0.19115202128887177, "learning_rate": 5.31246739739805e-05, "loss": 0.0069, "step": 50610 }, { "epoch": 75.89205397301349, "grad_norm": 0.12032701075077057, "learning_rate": 5.310817142442994e-05, "loss": 0.0067, "step": 50620 }, { "epoch": 75.90704647676162, "grad_norm": 0.14170396327972412, "learning_rate": 5.309166853497445e-05, "loss": 0.0101, "step": 50630 }, { "epoch": 75.92203898050974, "grad_norm": 0.17996355891227722, "learning_rate": 5.307516530741873e-05, "loss": 0.0078, "step": 50640 }, { "epoch": 75.93703148425787, "grad_norm": 0.1486063301563263, "learning_rate": 5.305866174356754e-05, "loss": 0.0082, "step": 50650 }, { "epoch": 75.95202398800599, "grad_norm": 0.2658996284008026, "learning_rate": 5.304215784522571e-05, "loss": 0.0081, "step": 50660 }, { "epoch": 75.96701649175412, "grad_norm": 0.19695517420768738, "learning_rate": 5.302565361419808e-05, "loss": 0.008, "step": 50670 }, { "epoch": 75.98200899550226, "grad_norm": 0.1042390838265419, "learning_rate": 5.3009149052289507e-05, "loss": 0.0084, "step": 50680 }, { "epoch": 75.99700149925037, "grad_norm": 0.2641179859638214, "learning_rate": 5.299264416130493e-05, "loss": 0.0091, "step": 50690 }, { "epoch": 76.0119940029985, "grad_norm": 0.23996026813983917, "learning_rate": 5.297613894304928e-05, "loss": 0.0098, "step": 50700 }, { "epoch": 76.02698650674662, "grad_norm": 0.2590557932853699, "learning_rate": 5.2959633399327534e-05, "loss": 0.0078, "step": 50710 }, { "epoch": 76.04197901049476, "grad_norm": 0.23628874123096466, "learning_rate": 5.294312753194476e-05, "loss": 0.0076, "step": 50720 }, { "epoch": 76.05697151424287, "grad_norm": 0.23298892378807068, "learning_rate": 5.292662134270596e-05, "loss": 0.0071, "step": 50730 }, { "epoch": 76.07196401799101, "grad_norm": 0.12450206279754639, "learning_rate": 5.291011483341626e-05, "loss": 0.0057, "step": 50740 }, { "epoch": 76.08695652173913, "grad_norm": 0.13040407001972198, "learning_rate": 5.2893608005880767e-05, "loss": 0.0087, "step": 50750 }, { "epoch": 76.10194902548726, "grad_norm": 0.24861621856689453, "learning_rate": 5.287710086190467e-05, "loss": 0.0077, "step": 50760 }, { "epoch": 76.11694152923538, "grad_norm": 0.469195693731308, "learning_rate": 5.286059340329314e-05, "loss": 0.0066, "step": 50770 }, { "epoch": 76.13193403298351, "grad_norm": 0.16867566108703613, "learning_rate": 5.284408563185141e-05, "loss": 0.0071, "step": 50780 }, { "epoch": 76.14692653673164, "grad_norm": 0.1355462372303009, "learning_rate": 5.282757754938476e-05, "loss": 0.0077, "step": 50790 }, { "epoch": 76.16191904047976, "grad_norm": 0.21721342206001282, "learning_rate": 5.281106915769849e-05, "loss": 0.008, "step": 50800 }, { "epoch": 76.17691154422789, "grad_norm": 0.1854632943868637, "learning_rate": 5.2794560458597897e-05, "loss": 0.0066, "step": 50810 }, { "epoch": 76.19190404797601, "grad_norm": 0.22663679718971252, "learning_rate": 5.27780514538884e-05, "loss": 0.0076, "step": 50820 }, { "epoch": 76.20689655172414, "grad_norm": 0.18539687991142273, "learning_rate": 5.2761542145375365e-05, "loss": 0.0069, "step": 50830 }, { "epoch": 76.22188905547226, "grad_norm": 0.21438999474048615, "learning_rate": 5.274503253486421e-05, "loss": 0.009, "step": 50840 }, { "epoch": 76.2368815592204, "grad_norm": 0.1360255777835846, "learning_rate": 5.272852262416046e-05, "loss": 0.0076, "step": 50850 }, { "epoch": 76.25187406296851, "grad_norm": 0.21130387485027313, "learning_rate": 5.2712012415069555e-05, "loss": 0.0103, "step": 50860 }, { "epoch": 76.26686656671664, "grad_norm": 0.17687343060970306, "learning_rate": 5.269550190939705e-05, "loss": 0.0061, "step": 50870 }, { "epoch": 76.28185907046476, "grad_norm": 0.16682161390781403, "learning_rate": 5.267899110894852e-05, "loss": 0.0052, "step": 50880 }, { "epoch": 76.2968515742129, "grad_norm": 0.18461008369922638, "learning_rate": 5.266248001552955e-05, "loss": 0.0084, "step": 50890 }, { "epoch": 76.31184407796101, "grad_norm": 0.1650063395500183, "learning_rate": 5.264596863094575e-05, "loss": 0.0102, "step": 50900 }, { "epoch": 76.32683658170914, "grad_norm": 0.1431010216474533, "learning_rate": 5.262945695700282e-05, "loss": 0.0076, "step": 50910 }, { "epoch": 76.34182908545728, "grad_norm": 0.13307678699493408, "learning_rate": 5.261294499550643e-05, "loss": 0.0062, "step": 50920 }, { "epoch": 76.3568215892054, "grad_norm": 0.10806624591350555, "learning_rate": 5.25964327482623e-05, "loss": 0.0053, "step": 50930 }, { "epoch": 76.37181409295353, "grad_norm": 0.10584884881973267, "learning_rate": 5.257992021707617e-05, "loss": 0.0071, "step": 50940 }, { "epoch": 76.38680659670165, "grad_norm": 0.244710311293602, "learning_rate": 5.256340740375387e-05, "loss": 0.007, "step": 50950 }, { "epoch": 76.40179910044978, "grad_norm": 0.21847622096538544, "learning_rate": 5.254689431010117e-05, "loss": 0.0081, "step": 50960 }, { "epoch": 76.4167916041979, "grad_norm": 0.26403674483299255, "learning_rate": 5.253038093792395e-05, "loss": 0.011, "step": 50970 }, { "epoch": 76.43178410794603, "grad_norm": 0.22003917396068573, "learning_rate": 5.251386728902806e-05, "loss": 0.0081, "step": 50980 }, { "epoch": 76.44677661169415, "grad_norm": 0.4694698452949524, "learning_rate": 5.2497353365219446e-05, "loss": 0.0086, "step": 50990 }, { "epoch": 76.46176911544228, "grad_norm": 0.36160093545913696, "learning_rate": 5.2480839168304e-05, "loss": 0.0114, "step": 51000 } ], "logging_steps": 10, "max_steps": 100000, "num_input_tokens_seen": 0, "num_train_epochs": 150, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }