{ "best_metric": 0.63648960197002, "best_model_checkpoint": "./results/checkpoint-130000", "epoch": 0.9609414269241005, "eval_steps": 10000, "global_step": 130000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 7.391857130185388e-05, "grad_norm": 0.10461781173944473, "learning_rate": 1.0000000000000002e-06, "loss": 0.0494, "step": 10 }, { "epoch": 0.00014783714260370776, "grad_norm": 0.08169854432344437, "learning_rate": 2.0000000000000003e-06, "loss": 0.0441, "step": 20 }, { "epoch": 0.00022175571390556164, "grad_norm": 0.10416487604379654, "learning_rate": 3e-06, "loss": 0.0489, "step": 30 }, { "epoch": 0.0002956742852074155, "grad_norm": 0.07958583533763885, "learning_rate": 4.000000000000001e-06, "loss": 0.0493, "step": 40 }, { "epoch": 0.0003695928565092694, "grad_norm": 0.10156191140413284, "learning_rate": 5e-06, "loss": 0.0476, "step": 50 }, { "epoch": 0.0004435114278111233, "grad_norm": 0.08352667093276978, "learning_rate": 6e-06, "loss": 0.0475, "step": 60 }, { "epoch": 0.0005174299991129772, "grad_norm": 0.08660561591386795, "learning_rate": 7.000000000000001e-06, "loss": 0.0471, "step": 70 }, { "epoch": 0.000591348570414831, "grad_norm": 0.09334266930818558, "learning_rate": 8.000000000000001e-06, "loss": 0.0428, "step": 80 }, { "epoch": 0.0006652671417166849, "grad_norm": 0.08674201369285583, "learning_rate": 9e-06, "loss": 0.0439, "step": 90 }, { "epoch": 0.0007391857130185388, "grad_norm": 0.09628602117300034, "learning_rate": 1e-05, "loss": 0.0433, "step": 100 }, { "epoch": 0.0008131042843203927, "grad_norm": 0.09610876441001892, "learning_rate": 1.1000000000000001e-05, "loss": 0.0464, "step": 110 }, { "epoch": 0.0008870228556222466, "grad_norm": 0.10648789256811142, "learning_rate": 1.2e-05, "loss": 0.0446, "step": 120 }, { "epoch": 0.0009609414269241004, "grad_norm": 0.0938977524638176, "learning_rate": 1.3000000000000001e-05, "loss": 0.0425, "step": 130 }, { "epoch": 0.0010348599982259543, "grad_norm": 0.10076262056827545, "learning_rate": 1.4000000000000001e-05, "loss": 0.044, "step": 140 }, { "epoch": 0.001108778569527808, "grad_norm": 0.10113703459501266, "learning_rate": 1.5e-05, "loss": 0.0426, "step": 150 }, { "epoch": 0.001182697140829662, "grad_norm": 0.08176673948764801, "learning_rate": 1.6000000000000003e-05, "loss": 0.0411, "step": 160 }, { "epoch": 0.0012566157121315159, "grad_norm": 0.1192707046866417, "learning_rate": 1.7000000000000003e-05, "loss": 0.0429, "step": 170 }, { "epoch": 0.0013305342834333698, "grad_norm": 0.0927235409617424, "learning_rate": 1.8e-05, "loss": 0.0451, "step": 180 }, { "epoch": 0.0014044528547352236, "grad_norm": 0.09320451319217682, "learning_rate": 1.9e-05, "loss": 0.0441, "step": 190 }, { "epoch": 0.0014783714260370776, "grad_norm": 0.10145901888608932, "learning_rate": 2e-05, "loss": 0.0413, "step": 200 }, { "epoch": 0.0015522899973389314, "grad_norm": 0.12276735156774521, "learning_rate": 2.1e-05, "loss": 0.0389, "step": 210 }, { "epoch": 0.0016262085686407854, "grad_norm": 0.1172725036740303, "learning_rate": 2.2000000000000003e-05, "loss": 0.0393, "step": 220 }, { "epoch": 0.0017001271399426391, "grad_norm": 0.10647889226675034, "learning_rate": 2.3000000000000003e-05, "loss": 0.0437, "step": 230 }, { "epoch": 0.0017740457112444931, "grad_norm": 0.10727279633283615, "learning_rate": 2.4e-05, "loss": 0.04, "step": 240 }, { "epoch": 0.001847964282546347, "grad_norm": 0.08067339658737183, "learning_rate": 2.5e-05, "loss": 0.0385, "step": 250 }, { "epoch": 0.0019218828538482009, "grad_norm": 0.09736546128988266, "learning_rate": 2.6000000000000002e-05, "loss": 0.0385, "step": 260 }, { "epoch": 0.0019958014251500547, "grad_norm": 0.08011851459741592, "learning_rate": 2.7000000000000002e-05, "loss": 0.039, "step": 270 }, { "epoch": 0.0020697199964519086, "grad_norm": 0.08815836906433105, "learning_rate": 2.8000000000000003e-05, "loss": 0.0368, "step": 280 }, { "epoch": 0.0021436385677537626, "grad_norm": 0.08860961347818375, "learning_rate": 2.9e-05, "loss": 0.0397, "step": 290 }, { "epoch": 0.002217557139055616, "grad_norm": 0.0761508047580719, "learning_rate": 3e-05, "loss": 0.0357, "step": 300 }, { "epoch": 0.00229147571035747, "grad_norm": 0.08089523762464523, "learning_rate": 3.1e-05, "loss": 0.0384, "step": 310 }, { "epoch": 0.002365394281659324, "grad_norm": 0.09379731118679047, "learning_rate": 3.2000000000000005e-05, "loss": 0.0413, "step": 320 }, { "epoch": 0.002439312852961178, "grad_norm": 0.09349235147237778, "learning_rate": 3.3e-05, "loss": 0.039, "step": 330 }, { "epoch": 0.0025132314242630317, "grad_norm": 0.0876515731215477, "learning_rate": 3.4000000000000007e-05, "loss": 0.037, "step": 340 }, { "epoch": 0.0025871499955648857, "grad_norm": 0.10627899318933487, "learning_rate": 3.5e-05, "loss": 0.0371, "step": 350 }, { "epoch": 0.0026610685668667397, "grad_norm": 0.0915268212556839, "learning_rate": 3.6e-05, "loss": 0.0371, "step": 360 }, { "epoch": 0.0027349871381685937, "grad_norm": 0.1196686178445816, "learning_rate": 3.7e-05, "loss": 0.0379, "step": 370 }, { "epoch": 0.0028089057094704472, "grad_norm": 0.1142694279551506, "learning_rate": 3.8e-05, "loss": 0.0373, "step": 380 }, { "epoch": 0.0028828242807723012, "grad_norm": 0.07585809379816055, "learning_rate": 3.9000000000000006e-05, "loss": 0.0369, "step": 390 }, { "epoch": 0.002956742852074155, "grad_norm": 0.10797297954559326, "learning_rate": 4e-05, "loss": 0.0373, "step": 400 }, { "epoch": 0.003030661423376009, "grad_norm": 0.1142873466014862, "learning_rate": 4.1e-05, "loss": 0.0406, "step": 410 }, { "epoch": 0.0031045799946778628, "grad_norm": 0.09952966868877411, "learning_rate": 4.2e-05, "loss": 0.0361, "step": 420 }, { "epoch": 0.0031784985659797167, "grad_norm": 0.09601733088493347, "learning_rate": 4.3e-05, "loss": 0.033, "step": 430 }, { "epoch": 0.0032524171372815707, "grad_norm": 0.0806892067193985, "learning_rate": 4.4000000000000006e-05, "loss": 0.035, "step": 440 }, { "epoch": 0.0033263357085834243, "grad_norm": 0.09059257805347443, "learning_rate": 4.5e-05, "loss": 0.0334, "step": 450 }, { "epoch": 0.0034002542798852783, "grad_norm": 0.12588344514369965, "learning_rate": 4.600000000000001e-05, "loss": 0.0349, "step": 460 }, { "epoch": 0.0034741728511871323, "grad_norm": 0.10129739344120026, "learning_rate": 4.7e-05, "loss": 0.032, "step": 470 }, { "epoch": 0.0035480914224889862, "grad_norm": 0.08103670924901962, "learning_rate": 4.8e-05, "loss": 0.0322, "step": 480 }, { "epoch": 0.00362200999379084, "grad_norm": 0.0991249829530716, "learning_rate": 4.9e-05, "loss": 0.034, "step": 490 }, { "epoch": 0.003695928565092694, "grad_norm": 0.09622903913259506, "learning_rate": 5e-05, "loss": 0.0319, "step": 500 }, { "epoch": 0.0037698471363945478, "grad_norm": 0.09769944846630096, "learning_rate": 4.99962903608737e-05, "loss": 0.0293, "step": 510 }, { "epoch": 0.0038437657076964018, "grad_norm": 0.10930614918470383, "learning_rate": 4.999258072174739e-05, "loss": 0.0353, "step": 520 }, { "epoch": 0.003917684278998255, "grad_norm": 0.0826733186841011, "learning_rate": 4.9988871082621084e-05, "loss": 0.0336, "step": 530 }, { "epoch": 0.003991602850300109, "grad_norm": 0.0965137779712677, "learning_rate": 4.998516144349478e-05, "loss": 0.0347, "step": 540 }, { "epoch": 0.004065521421601963, "grad_norm": 0.09630349278450012, "learning_rate": 4.9981451804368475e-05, "loss": 0.0313, "step": 550 }, { "epoch": 0.004139439992903817, "grad_norm": 0.1024831011891365, "learning_rate": 4.997774216524217e-05, "loss": 0.0332, "step": 560 }, { "epoch": 0.004213358564205671, "grad_norm": 0.14341188967227936, "learning_rate": 4.997403252611586e-05, "loss": 0.0316, "step": 570 }, { "epoch": 0.004287277135507525, "grad_norm": 0.0977560356259346, "learning_rate": 4.9970322886989557e-05, "loss": 0.0293, "step": 580 }, { "epoch": 0.004361195706809378, "grad_norm": 0.10209672152996063, "learning_rate": 4.9966613247863246e-05, "loss": 0.0331, "step": 590 }, { "epoch": 0.004435114278111232, "grad_norm": 0.0934465304017067, "learning_rate": 4.996290360873694e-05, "loss": 0.0326, "step": 600 }, { "epoch": 0.004509032849413086, "grad_norm": 0.09404677152633667, "learning_rate": 4.995919396961064e-05, "loss": 0.0348, "step": 610 }, { "epoch": 0.00458295142071494, "grad_norm": 0.07064525038003922, "learning_rate": 4.9955484330484334e-05, "loss": 0.0321, "step": 620 }, { "epoch": 0.004656869992016794, "grad_norm": 0.1300738900899887, "learning_rate": 4.995177469135803e-05, "loss": 0.0334, "step": 630 }, { "epoch": 0.004730788563318648, "grad_norm": 0.11687029898166656, "learning_rate": 4.9948065052231726e-05, "loss": 0.0313, "step": 640 }, { "epoch": 0.004804707134620502, "grad_norm": 0.10037320107221603, "learning_rate": 4.9944355413105415e-05, "loss": 0.03, "step": 650 }, { "epoch": 0.004878625705922356, "grad_norm": 0.12054847180843353, "learning_rate": 4.994064577397911e-05, "loss": 0.0291, "step": 660 }, { "epoch": 0.004952544277224209, "grad_norm": 0.09925072640180588, "learning_rate": 4.99369361348528e-05, "loss": 0.0297, "step": 670 }, { "epoch": 0.005026462848526063, "grad_norm": 0.11456023156642914, "learning_rate": 4.9933226495726496e-05, "loss": 0.033, "step": 680 }, { "epoch": 0.005100381419827917, "grad_norm": 0.1687926948070526, "learning_rate": 4.992951685660019e-05, "loss": 0.0326, "step": 690 }, { "epoch": 0.005174299991129771, "grad_norm": 0.09963828325271606, "learning_rate": 4.992580721747389e-05, "loss": 0.0317, "step": 700 }, { "epoch": 0.005248218562431625, "grad_norm": 0.09856045991182327, "learning_rate": 4.9922097578347584e-05, "loss": 0.0325, "step": 710 }, { "epoch": 0.005322137133733479, "grad_norm": 0.08094804733991623, "learning_rate": 4.991838793922127e-05, "loss": 0.0286, "step": 720 }, { "epoch": 0.005396055705035333, "grad_norm": 0.09114375710487366, "learning_rate": 4.991467830009497e-05, "loss": 0.0289, "step": 730 }, { "epoch": 0.005469974276337187, "grad_norm": 0.1050422415137291, "learning_rate": 4.9910968660968665e-05, "loss": 0.0294, "step": 740 }, { "epoch": 0.0055438928476390405, "grad_norm": 0.0893392488360405, "learning_rate": 4.9907259021842354e-05, "loss": 0.0309, "step": 750 }, { "epoch": 0.0056178114189408945, "grad_norm": 0.1049913540482521, "learning_rate": 4.990354938271605e-05, "loss": 0.0262, "step": 760 }, { "epoch": 0.0056917299902427484, "grad_norm": 0.11163979023694992, "learning_rate": 4.9899839743589746e-05, "loss": 0.03, "step": 770 }, { "epoch": 0.0057656485615446024, "grad_norm": 0.09120380878448486, "learning_rate": 4.989613010446344e-05, "loss": 0.0311, "step": 780 }, { "epoch": 0.005839567132846456, "grad_norm": 0.10143250972032547, "learning_rate": 4.989242046533714e-05, "loss": 0.0312, "step": 790 }, { "epoch": 0.00591348570414831, "grad_norm": 0.0912647396326065, "learning_rate": 4.988871082621083e-05, "loss": 0.0291, "step": 800 }, { "epoch": 0.005987404275450164, "grad_norm": 0.09807098656892776, "learning_rate": 4.988500118708452e-05, "loss": 0.0306, "step": 810 }, { "epoch": 0.006061322846752018, "grad_norm": 0.09862897545099258, "learning_rate": 4.988129154795821e-05, "loss": 0.0296, "step": 820 }, { "epoch": 0.0061352414180538715, "grad_norm": 0.09236502647399902, "learning_rate": 4.987758190883191e-05, "loss": 0.0312, "step": 830 }, { "epoch": 0.0062091599893557255, "grad_norm": 0.10084735602140427, "learning_rate": 4.9873872269705604e-05, "loss": 0.0295, "step": 840 }, { "epoch": 0.0062830785606575795, "grad_norm": 0.11696548759937286, "learning_rate": 4.98701626305793e-05, "loss": 0.0308, "step": 850 }, { "epoch": 0.0063569971319594335, "grad_norm": 0.12154483795166016, "learning_rate": 4.9866452991452996e-05, "loss": 0.0302, "step": 860 }, { "epoch": 0.0064309157032612875, "grad_norm": 0.148088276386261, "learning_rate": 4.986274335232669e-05, "loss": 0.0269, "step": 870 }, { "epoch": 0.0065048342745631415, "grad_norm": 0.13604265451431274, "learning_rate": 4.985903371320038e-05, "loss": 0.0279, "step": 880 }, { "epoch": 0.0065787528458649954, "grad_norm": 0.14540578424930573, "learning_rate": 4.985532407407408e-05, "loss": 0.0287, "step": 890 }, { "epoch": 0.006652671417166849, "grad_norm": 0.08655355125665665, "learning_rate": 4.9851614434947767e-05, "loss": 0.0276, "step": 900 }, { "epoch": 0.0067265899884687026, "grad_norm": 0.12625974416732788, "learning_rate": 4.984790479582146e-05, "loss": 0.0303, "step": 910 }, { "epoch": 0.0068005085597705565, "grad_norm": 0.10419458895921707, "learning_rate": 4.984419515669516e-05, "loss": 0.0297, "step": 920 }, { "epoch": 0.0068744271310724105, "grad_norm": 0.10234599560499191, "learning_rate": 4.9840485517568854e-05, "loss": 0.0278, "step": 930 }, { "epoch": 0.0069483457023742645, "grad_norm": 0.12286118417978287, "learning_rate": 4.983677587844255e-05, "loss": 0.0303, "step": 940 }, { "epoch": 0.0070222642736761185, "grad_norm": 0.10289271920919418, "learning_rate": 4.983306623931624e-05, "loss": 0.0284, "step": 950 }, { "epoch": 0.0070961828449779725, "grad_norm": 0.12728984653949738, "learning_rate": 4.9829356600189936e-05, "loss": 0.0321, "step": 960 }, { "epoch": 0.0071701014162798265, "grad_norm": 0.11914363503456116, "learning_rate": 4.982564696106363e-05, "loss": 0.0281, "step": 970 }, { "epoch": 0.00724401998758168, "grad_norm": 0.09279730916023254, "learning_rate": 4.982193732193732e-05, "loss": 0.0319, "step": 980 }, { "epoch": 0.007317938558883534, "grad_norm": 0.12616656720638275, "learning_rate": 4.981822768281102e-05, "loss": 0.0289, "step": 990 }, { "epoch": 0.007391857130185388, "grad_norm": 0.09209688007831573, "learning_rate": 4.981451804368471e-05, "loss": 0.0299, "step": 1000 }, { "epoch": 0.007465775701487242, "grad_norm": 0.08926288038492203, "learning_rate": 4.981080840455841e-05, "loss": 0.0286, "step": 1010 }, { "epoch": 0.0075396942727890956, "grad_norm": 0.10940416902303696, "learning_rate": 4.9807098765432105e-05, "loss": 0.0284, "step": 1020 }, { "epoch": 0.0076136128440909496, "grad_norm": 0.08967240899801254, "learning_rate": 4.9803389126305794e-05, "loss": 0.0277, "step": 1030 }, { "epoch": 0.0076875314153928035, "grad_norm": 0.09682679921388626, "learning_rate": 4.979967948717949e-05, "loss": 0.028, "step": 1040 }, { "epoch": 0.0077614499866946575, "grad_norm": 0.11966560035943985, "learning_rate": 4.979596984805318e-05, "loss": 0.0286, "step": 1050 }, { "epoch": 0.00783536855799651, "grad_norm": 0.08031348884105682, "learning_rate": 4.9792260208926875e-05, "loss": 0.029, "step": 1060 }, { "epoch": 0.007909287129298365, "grad_norm": 0.08792899549007416, "learning_rate": 4.978855056980057e-05, "loss": 0.0298, "step": 1070 }, { "epoch": 0.007983205700600219, "grad_norm": 0.11339164525270462, "learning_rate": 4.978484093067427e-05, "loss": 0.0274, "step": 1080 }, { "epoch": 0.008057124271902073, "grad_norm": 0.13752642273902893, "learning_rate": 4.978113129154796e-05, "loss": 0.0239, "step": 1090 }, { "epoch": 0.008131042843203927, "grad_norm": 0.1251632571220398, "learning_rate": 4.977742165242166e-05, "loss": 0.0302, "step": 1100 }, { "epoch": 0.00820496141450578, "grad_norm": 0.12013223767280579, "learning_rate": 4.977371201329535e-05, "loss": 0.029, "step": 1110 }, { "epoch": 0.008278879985807635, "grad_norm": 0.11286813020706177, "learning_rate": 4.9770002374169044e-05, "loss": 0.0281, "step": 1120 }, { "epoch": 0.008352798557109489, "grad_norm": 0.12342401593923569, "learning_rate": 4.976629273504273e-05, "loss": 0.0269, "step": 1130 }, { "epoch": 0.008426717128411343, "grad_norm": 0.09709502756595612, "learning_rate": 4.976258309591643e-05, "loss": 0.0318, "step": 1140 }, { "epoch": 0.008500635699713197, "grad_norm": 0.1205952987074852, "learning_rate": 4.9758873456790125e-05, "loss": 0.0306, "step": 1150 }, { "epoch": 0.00857455427101505, "grad_norm": 0.08721502870321274, "learning_rate": 4.975516381766382e-05, "loss": 0.0268, "step": 1160 }, { "epoch": 0.008648472842316905, "grad_norm": 0.09323421120643616, "learning_rate": 4.975145417853752e-05, "loss": 0.0276, "step": 1170 }, { "epoch": 0.008722391413618757, "grad_norm": 0.09394596517086029, "learning_rate": 4.9747744539411206e-05, "loss": 0.0271, "step": 1180 }, { "epoch": 0.00879630998492061, "grad_norm": 0.09926934540271759, "learning_rate": 4.97440349002849e-05, "loss": 0.0304, "step": 1190 }, { "epoch": 0.008870228556222465, "grad_norm": 0.09732608497142792, "learning_rate": 4.97403252611586e-05, "loss": 0.0248, "step": 1200 }, { "epoch": 0.008944147127524319, "grad_norm": 0.10517530143260956, "learning_rate": 4.973661562203229e-05, "loss": 0.0307, "step": 1210 }, { "epoch": 0.009018065698826173, "grad_norm": 0.09877362102270126, "learning_rate": 4.973290598290598e-05, "loss": 0.0263, "step": 1220 }, { "epoch": 0.009091984270128027, "grad_norm": 0.0978366956114769, "learning_rate": 4.972919634377968e-05, "loss": 0.0298, "step": 1230 }, { "epoch": 0.00916590284142988, "grad_norm": 0.14290444552898407, "learning_rate": 4.9725486704653375e-05, "loss": 0.0283, "step": 1240 }, { "epoch": 0.009239821412731735, "grad_norm": 0.10938958823680878, "learning_rate": 4.972177706552707e-05, "loss": 0.0294, "step": 1250 }, { "epoch": 0.009313739984033589, "grad_norm": 0.12752975523471832, "learning_rate": 4.971806742640076e-05, "loss": 0.027, "step": 1260 }, { "epoch": 0.009387658555335443, "grad_norm": 0.08596224337816238, "learning_rate": 4.9714357787274456e-05, "loss": 0.0273, "step": 1270 }, { "epoch": 0.009461577126637297, "grad_norm": 0.1066613495349884, "learning_rate": 4.9710648148148146e-05, "loss": 0.028, "step": 1280 }, { "epoch": 0.00953549569793915, "grad_norm": 0.12049562484025955, "learning_rate": 4.970693850902184e-05, "loss": 0.028, "step": 1290 }, { "epoch": 0.009609414269241005, "grad_norm": 0.12397699803113937, "learning_rate": 4.970322886989554e-05, "loss": 0.0272, "step": 1300 }, { "epoch": 0.009683332840542859, "grad_norm": 0.1257927119731903, "learning_rate": 4.9699519230769233e-05, "loss": 0.0285, "step": 1310 }, { "epoch": 0.009757251411844713, "grad_norm": 0.11058560013771057, "learning_rate": 4.969580959164293e-05, "loss": 0.0287, "step": 1320 }, { "epoch": 0.009831169983146567, "grad_norm": 0.10039151459932327, "learning_rate": 4.9692099952516625e-05, "loss": 0.0292, "step": 1330 }, { "epoch": 0.009905088554448419, "grad_norm": 0.1464836746454239, "learning_rate": 4.9688390313390315e-05, "loss": 0.0284, "step": 1340 }, { "epoch": 0.009979007125750273, "grad_norm": 0.1260063499212265, "learning_rate": 4.968468067426401e-05, "loss": 0.0288, "step": 1350 }, { "epoch": 0.010052925697052127, "grad_norm": 0.13692636787891388, "learning_rate": 4.96809710351377e-05, "loss": 0.0305, "step": 1360 }, { "epoch": 0.01012684426835398, "grad_norm": 0.09072256833314896, "learning_rate": 4.9677261396011396e-05, "loss": 0.0272, "step": 1370 }, { "epoch": 0.010200762839655835, "grad_norm": 0.1056734025478363, "learning_rate": 4.967355175688509e-05, "loss": 0.027, "step": 1380 }, { "epoch": 0.010274681410957689, "grad_norm": 0.1367037445306778, "learning_rate": 4.966984211775879e-05, "loss": 0.0274, "step": 1390 }, { "epoch": 0.010348599982259543, "grad_norm": 0.15280531346797943, "learning_rate": 4.9666132478632484e-05, "loss": 0.0292, "step": 1400 }, { "epoch": 0.010422518553561397, "grad_norm": 0.10445661842823029, "learning_rate": 4.966242283950617e-05, "loss": 0.0265, "step": 1410 }, { "epoch": 0.01049643712486325, "grad_norm": 0.093659907579422, "learning_rate": 4.965871320037987e-05, "loss": 0.0277, "step": 1420 }, { "epoch": 0.010570355696165105, "grad_norm": 0.13440953195095062, "learning_rate": 4.9655003561253565e-05, "loss": 0.0287, "step": 1430 }, { "epoch": 0.010644274267466959, "grad_norm": 0.09561889618635178, "learning_rate": 4.9651293922127254e-05, "loss": 0.0269, "step": 1440 }, { "epoch": 0.010718192838768813, "grad_norm": 0.12212470918893814, "learning_rate": 4.964758428300095e-05, "loss": 0.0245, "step": 1450 }, { "epoch": 0.010792111410070667, "grad_norm": 0.12034928053617477, "learning_rate": 4.9643874643874646e-05, "loss": 0.0273, "step": 1460 }, { "epoch": 0.01086602998137252, "grad_norm": 0.10437193512916565, "learning_rate": 4.964016500474834e-05, "loss": 0.0298, "step": 1470 }, { "epoch": 0.010939948552674375, "grad_norm": 0.10191825777292252, "learning_rate": 4.963645536562204e-05, "loss": 0.0282, "step": 1480 }, { "epoch": 0.011013867123976227, "grad_norm": 0.15169893205165863, "learning_rate": 4.963274572649573e-05, "loss": 0.0281, "step": 1490 }, { "epoch": 0.011087785695278081, "grad_norm": 0.13512222468852997, "learning_rate": 4.962903608736942e-05, "loss": 0.026, "step": 1500 }, { "epoch": 0.011161704266579935, "grad_norm": 0.09260152280330658, "learning_rate": 4.962532644824311e-05, "loss": 0.0272, "step": 1510 }, { "epoch": 0.011235622837881789, "grad_norm": 0.15228793025016785, "learning_rate": 4.962161680911681e-05, "loss": 0.028, "step": 1520 }, { "epoch": 0.011309541409183643, "grad_norm": 0.11596718430519104, "learning_rate": 4.9617907169990504e-05, "loss": 0.0272, "step": 1530 }, { "epoch": 0.011383459980485497, "grad_norm": 0.12411506474018097, "learning_rate": 4.96141975308642e-05, "loss": 0.0303, "step": 1540 }, { "epoch": 0.011457378551787351, "grad_norm": 0.12651769816875458, "learning_rate": 4.9610487891737896e-05, "loss": 0.0265, "step": 1550 }, { "epoch": 0.011531297123089205, "grad_norm": 0.10328517854213715, "learning_rate": 4.960677825261159e-05, "loss": 0.0276, "step": 1560 }, { "epoch": 0.011605215694391059, "grad_norm": 0.11359741538763046, "learning_rate": 4.960306861348528e-05, "loss": 0.0271, "step": 1570 }, { "epoch": 0.011679134265692913, "grad_norm": 0.11071664839982986, "learning_rate": 4.959935897435898e-05, "loss": 0.0251, "step": 1580 }, { "epoch": 0.011753052836994767, "grad_norm": 0.10678543895483017, "learning_rate": 4.9595649335232666e-05, "loss": 0.0244, "step": 1590 }, { "epoch": 0.01182697140829662, "grad_norm": 0.11132028698921204, "learning_rate": 4.959193969610636e-05, "loss": 0.0297, "step": 1600 }, { "epoch": 0.011900889979598475, "grad_norm": 0.08840740472078323, "learning_rate": 4.9588230056980065e-05, "loss": 0.0246, "step": 1610 }, { "epoch": 0.011974808550900329, "grad_norm": 0.11825734376907349, "learning_rate": 4.9584520417853754e-05, "loss": 0.0267, "step": 1620 }, { "epoch": 0.012048727122202183, "grad_norm": 0.0901472344994545, "learning_rate": 4.958081077872745e-05, "loss": 0.0273, "step": 1630 }, { "epoch": 0.012122645693504037, "grad_norm": 0.09711014479398727, "learning_rate": 4.957710113960114e-05, "loss": 0.0255, "step": 1640 }, { "epoch": 0.012196564264805889, "grad_norm": 0.12570925056934357, "learning_rate": 4.9573391500474835e-05, "loss": 0.0247, "step": 1650 }, { "epoch": 0.012270482836107743, "grad_norm": 0.09919515997171402, "learning_rate": 4.956968186134853e-05, "loss": 0.0241, "step": 1660 }, { "epoch": 0.012344401407409597, "grad_norm": 0.10764940083026886, "learning_rate": 4.956597222222222e-05, "loss": 0.0281, "step": 1670 }, { "epoch": 0.012418319978711451, "grad_norm": 0.1037641242146492, "learning_rate": 4.9562262583095916e-05, "loss": 0.0289, "step": 1680 }, { "epoch": 0.012492238550013305, "grad_norm": 0.09550962597131729, "learning_rate": 4.955855294396961e-05, "loss": 0.0255, "step": 1690 }, { "epoch": 0.012566157121315159, "grad_norm": 0.1115112155675888, "learning_rate": 4.955484330484331e-05, "loss": 0.0281, "step": 1700 }, { "epoch": 0.012640075692617013, "grad_norm": 0.12160573899745941, "learning_rate": 4.9551133665717004e-05, "loss": 0.0246, "step": 1710 }, { "epoch": 0.012713994263918867, "grad_norm": 0.10318835824728012, "learning_rate": 4.9547424026590694e-05, "loss": 0.0247, "step": 1720 }, { "epoch": 0.012787912835220721, "grad_norm": 0.09020746499300003, "learning_rate": 4.954371438746439e-05, "loss": 0.0255, "step": 1730 }, { "epoch": 0.012861831406522575, "grad_norm": 0.09125480055809021, "learning_rate": 4.954000474833808e-05, "loss": 0.026, "step": 1740 }, { "epoch": 0.012935749977824429, "grad_norm": 0.12356351315975189, "learning_rate": 4.9536295109211775e-05, "loss": 0.0262, "step": 1750 }, { "epoch": 0.013009668549126283, "grad_norm": 0.09936685860157013, "learning_rate": 4.953258547008548e-05, "loss": 0.0274, "step": 1760 }, { "epoch": 0.013083587120428137, "grad_norm": 0.11199319362640381, "learning_rate": 4.9528875830959167e-05, "loss": 0.0256, "step": 1770 }, { "epoch": 0.013157505691729991, "grad_norm": 0.13244837522506714, "learning_rate": 4.952516619183286e-05, "loss": 0.0292, "step": 1780 }, { "epoch": 0.013231424263031845, "grad_norm": 0.09166007488965988, "learning_rate": 4.952145655270656e-05, "loss": 0.0247, "step": 1790 }, { "epoch": 0.013305342834333697, "grad_norm": 0.12759141623973846, "learning_rate": 4.951774691358025e-05, "loss": 0.0266, "step": 1800 }, { "epoch": 0.013379261405635551, "grad_norm": 0.10648078471422195, "learning_rate": 4.9514037274453944e-05, "loss": 0.0264, "step": 1810 }, { "epoch": 0.013453179976937405, "grad_norm": 0.10549057275056839, "learning_rate": 4.951032763532763e-05, "loss": 0.0267, "step": 1820 }, { "epoch": 0.013527098548239259, "grad_norm": 0.104535773396492, "learning_rate": 4.950661799620133e-05, "loss": 0.0293, "step": 1830 }, { "epoch": 0.013601017119541113, "grad_norm": 0.1186116635799408, "learning_rate": 4.950290835707503e-05, "loss": 0.0266, "step": 1840 }, { "epoch": 0.013674935690842967, "grad_norm": 0.10436084866523743, "learning_rate": 4.949919871794872e-05, "loss": 0.0265, "step": 1850 }, { "epoch": 0.013748854262144821, "grad_norm": 0.09677781164646149, "learning_rate": 4.949548907882242e-05, "loss": 0.026, "step": 1860 }, { "epoch": 0.013822772833446675, "grad_norm": 0.08779451996088028, "learning_rate": 4.9491779439696106e-05, "loss": 0.0242, "step": 1870 }, { "epoch": 0.013896691404748529, "grad_norm": 0.1076350212097168, "learning_rate": 4.94880698005698e-05, "loss": 0.0244, "step": 1880 }, { "epoch": 0.013970609976050383, "grad_norm": 0.09812363982200623, "learning_rate": 4.94843601614435e-05, "loss": 0.0277, "step": 1890 }, { "epoch": 0.014044528547352237, "grad_norm": 0.09892376512289047, "learning_rate": 4.948065052231719e-05, "loss": 0.0269, "step": 1900 }, { "epoch": 0.014118447118654091, "grad_norm": 0.10767515003681183, "learning_rate": 4.947694088319089e-05, "loss": 0.0229, "step": 1910 }, { "epoch": 0.014192365689955945, "grad_norm": 0.10170572251081467, "learning_rate": 4.947323124406458e-05, "loss": 0.0279, "step": 1920 }, { "epoch": 0.014266284261257799, "grad_norm": 0.17412474751472473, "learning_rate": 4.9469521604938275e-05, "loss": 0.0278, "step": 1930 }, { "epoch": 0.014340202832559653, "grad_norm": 0.10065030306577682, "learning_rate": 4.946581196581197e-05, "loss": 0.0221, "step": 1940 }, { "epoch": 0.014414121403861507, "grad_norm": 0.1184917613863945, "learning_rate": 4.946210232668566e-05, "loss": 0.0239, "step": 1950 }, { "epoch": 0.01448803997516336, "grad_norm": 0.12135294079780579, "learning_rate": 4.9458392687559356e-05, "loss": 0.0254, "step": 1960 }, { "epoch": 0.014561958546465213, "grad_norm": 0.09968862682580948, "learning_rate": 4.9454683048433045e-05, "loss": 0.0265, "step": 1970 }, { "epoch": 0.014635877117767067, "grad_norm": 0.1089881956577301, "learning_rate": 4.945097340930674e-05, "loss": 0.0233, "step": 1980 }, { "epoch": 0.014709795689068921, "grad_norm": 0.09672956168651581, "learning_rate": 4.9447263770180444e-05, "loss": 0.0229, "step": 1990 }, { "epoch": 0.014783714260370775, "grad_norm": 0.10003507882356644, "learning_rate": 4.944355413105413e-05, "loss": 0.0218, "step": 2000 }, { "epoch": 0.01485763283167263, "grad_norm": 0.108626589179039, "learning_rate": 4.943984449192783e-05, "loss": 0.0241, "step": 2010 }, { "epoch": 0.014931551402974483, "grad_norm": 0.13139499723911285, "learning_rate": 4.9436134852801525e-05, "loss": 0.0295, "step": 2020 }, { "epoch": 0.015005469974276337, "grad_norm": 0.09990837424993515, "learning_rate": 4.9432425213675214e-05, "loss": 0.0232, "step": 2030 }, { "epoch": 0.015079388545578191, "grad_norm": 0.10354658961296082, "learning_rate": 4.942871557454891e-05, "loss": 0.0262, "step": 2040 }, { "epoch": 0.015153307116880045, "grad_norm": 0.08615817874670029, "learning_rate": 4.94250059354226e-05, "loss": 0.0274, "step": 2050 }, { "epoch": 0.015227225688181899, "grad_norm": 0.1270134299993515, "learning_rate": 4.94212962962963e-05, "loss": 0.0224, "step": 2060 }, { "epoch": 0.015301144259483753, "grad_norm": 0.14910046756267548, "learning_rate": 4.941758665717e-05, "loss": 0.0244, "step": 2070 }, { "epoch": 0.015375062830785607, "grad_norm": 0.10157673060894012, "learning_rate": 4.941387701804369e-05, "loss": 0.0273, "step": 2080 }, { "epoch": 0.015448981402087461, "grad_norm": 0.11614657193422318, "learning_rate": 4.941016737891738e-05, "loss": 0.024, "step": 2090 }, { "epoch": 0.015522899973389315, "grad_norm": 0.10795578360557556, "learning_rate": 4.940645773979107e-05, "loss": 0.025, "step": 2100 }, { "epoch": 0.015596818544691169, "grad_norm": 0.095462366938591, "learning_rate": 4.940274810066477e-05, "loss": 0.0258, "step": 2110 }, { "epoch": 0.01567073711599302, "grad_norm": 0.11237543076276779, "learning_rate": 4.9399038461538464e-05, "loss": 0.0264, "step": 2120 }, { "epoch": 0.015744655687294877, "grad_norm": 0.10964473336935043, "learning_rate": 4.9395328822412154e-05, "loss": 0.0241, "step": 2130 }, { "epoch": 0.01581857425859673, "grad_norm": 0.11441141366958618, "learning_rate": 4.9391619183285856e-05, "loss": 0.0281, "step": 2140 }, { "epoch": 0.015892492829898585, "grad_norm": 0.07639171928167343, "learning_rate": 4.9387909544159546e-05, "loss": 0.0232, "step": 2150 }, { "epoch": 0.015966411401200437, "grad_norm": 0.10645004361867905, "learning_rate": 4.938419990503324e-05, "loss": 0.0275, "step": 2160 }, { "epoch": 0.016040329972502293, "grad_norm": 0.10596774518489838, "learning_rate": 4.938049026590694e-05, "loss": 0.0229, "step": 2170 }, { "epoch": 0.016114248543804145, "grad_norm": 0.15091291069984436, "learning_rate": 4.937678062678063e-05, "loss": 0.0218, "step": 2180 }, { "epoch": 0.016188167115106, "grad_norm": 0.08416672796010971, "learning_rate": 4.937307098765432e-05, "loss": 0.0245, "step": 2190 }, { "epoch": 0.016262085686407853, "grad_norm": 0.11193812638521194, "learning_rate": 4.936936134852801e-05, "loss": 0.0244, "step": 2200 }, { "epoch": 0.016336004257709705, "grad_norm": 0.1309780776500702, "learning_rate": 4.9365651709401715e-05, "loss": 0.0234, "step": 2210 }, { "epoch": 0.01640992282901156, "grad_norm": 0.09695091843605042, "learning_rate": 4.936194207027541e-05, "loss": 0.024, "step": 2220 }, { "epoch": 0.016483841400313413, "grad_norm": 0.12367673218250275, "learning_rate": 4.93582324311491e-05, "loss": 0.0265, "step": 2230 }, { "epoch": 0.01655775997161527, "grad_norm": 0.08766240626573563, "learning_rate": 4.9354522792022796e-05, "loss": 0.0258, "step": 2240 }, { "epoch": 0.01663167854291712, "grad_norm": 0.2597678303718567, "learning_rate": 4.935081315289649e-05, "loss": 0.0265, "step": 2250 }, { "epoch": 0.016705597114218977, "grad_norm": 0.12836165726184845, "learning_rate": 4.934710351377018e-05, "loss": 0.0253, "step": 2260 }, { "epoch": 0.01677951568552083, "grad_norm": 0.09131550043821335, "learning_rate": 4.934339387464388e-05, "loss": 0.0263, "step": 2270 }, { "epoch": 0.016853434256822685, "grad_norm": 0.0957304835319519, "learning_rate": 4.9339684235517566e-05, "loss": 0.0248, "step": 2280 }, { "epoch": 0.016927352828124537, "grad_norm": 0.08887193351984024, "learning_rate": 4.933597459639127e-05, "loss": 0.0269, "step": 2290 }, { "epoch": 0.017001271399426393, "grad_norm": 0.1088196337223053, "learning_rate": 4.9332264957264965e-05, "loss": 0.0229, "step": 2300 }, { "epoch": 0.017075189970728245, "grad_norm": 0.10806940495967865, "learning_rate": 4.9328555318138654e-05, "loss": 0.024, "step": 2310 }, { "epoch": 0.0171491085420301, "grad_norm": 0.11325336247682571, "learning_rate": 4.932484567901235e-05, "loss": 0.0256, "step": 2320 }, { "epoch": 0.017223027113331953, "grad_norm": 0.08867617696523666, "learning_rate": 4.932113603988604e-05, "loss": 0.0239, "step": 2330 }, { "epoch": 0.01729694568463381, "grad_norm": 0.08839539438486099, "learning_rate": 4.9317426400759735e-05, "loss": 0.0232, "step": 2340 }, { "epoch": 0.01737086425593566, "grad_norm": 0.09393464028835297, "learning_rate": 4.931371676163343e-05, "loss": 0.0272, "step": 2350 }, { "epoch": 0.017444782827237514, "grad_norm": 0.14300134778022766, "learning_rate": 4.931000712250713e-05, "loss": 0.0287, "step": 2360 }, { "epoch": 0.01751870139853937, "grad_norm": 0.10064567625522614, "learning_rate": 4.930629748338082e-05, "loss": 0.0239, "step": 2370 }, { "epoch": 0.01759261996984122, "grad_norm": 0.08626890927553177, "learning_rate": 4.930258784425451e-05, "loss": 0.0239, "step": 2380 }, { "epoch": 0.017666538541143077, "grad_norm": 0.08982729911804199, "learning_rate": 4.929887820512821e-05, "loss": 0.0232, "step": 2390 }, { "epoch": 0.01774045711244493, "grad_norm": 0.11209940910339355, "learning_rate": 4.9295168566001904e-05, "loss": 0.027, "step": 2400 }, { "epoch": 0.017814375683746785, "grad_norm": 0.09284085780382156, "learning_rate": 4.929145892687559e-05, "loss": 0.0265, "step": 2410 }, { "epoch": 0.017888294255048637, "grad_norm": 0.11055232584476471, "learning_rate": 4.928774928774929e-05, "loss": 0.0229, "step": 2420 }, { "epoch": 0.017962212826350493, "grad_norm": 0.08943367004394531, "learning_rate": 4.928403964862298e-05, "loss": 0.0243, "step": 2430 }, { "epoch": 0.018036131397652345, "grad_norm": 0.08455714583396912, "learning_rate": 4.928033000949668e-05, "loss": 0.0227, "step": 2440 }, { "epoch": 0.0181100499689542, "grad_norm": 0.08124112337827682, "learning_rate": 4.927662037037038e-05, "loss": 0.0241, "step": 2450 }, { "epoch": 0.018183968540256053, "grad_norm": 0.0854414626955986, "learning_rate": 4.9272910731244066e-05, "loss": 0.0234, "step": 2460 }, { "epoch": 0.01825788711155791, "grad_norm": 0.10405626893043518, "learning_rate": 4.926920109211776e-05, "loss": 0.023, "step": 2470 }, { "epoch": 0.01833180568285976, "grad_norm": 0.10854543745517731, "learning_rate": 4.926549145299146e-05, "loss": 0.025, "step": 2480 }, { "epoch": 0.018405724254161617, "grad_norm": 0.14602318406105042, "learning_rate": 4.926178181386515e-05, "loss": 0.0238, "step": 2490 }, { "epoch": 0.01847964282546347, "grad_norm": 0.11568371206521988, "learning_rate": 4.9258072174738843e-05, "loss": 0.0239, "step": 2500 }, { "epoch": 0.01855356139676532, "grad_norm": 0.10612810403108597, "learning_rate": 4.925436253561253e-05, "loss": 0.0264, "step": 2510 }, { "epoch": 0.018627479968067177, "grad_norm": 0.12310179322957993, "learning_rate": 4.9250652896486235e-05, "loss": 0.0229, "step": 2520 }, { "epoch": 0.01870139853936903, "grad_norm": 0.12349710613489151, "learning_rate": 4.924694325735993e-05, "loss": 0.0252, "step": 2530 }, { "epoch": 0.018775317110670885, "grad_norm": 0.09072163701057434, "learning_rate": 4.924323361823362e-05, "loss": 0.0234, "step": 2540 }, { "epoch": 0.018849235681972738, "grad_norm": 0.11435705423355103, "learning_rate": 4.9239523979107316e-05, "loss": 0.0253, "step": 2550 }, { "epoch": 0.018923154253274593, "grad_norm": 0.12449908256530762, "learning_rate": 4.9235814339981006e-05, "loss": 0.0257, "step": 2560 }, { "epoch": 0.018997072824576446, "grad_norm": 0.103098563849926, "learning_rate": 4.92321047008547e-05, "loss": 0.0257, "step": 2570 }, { "epoch": 0.0190709913958783, "grad_norm": 0.13323761522769928, "learning_rate": 4.92283950617284e-05, "loss": 0.0265, "step": 2580 }, { "epoch": 0.019144909967180154, "grad_norm": 0.08810979127883911, "learning_rate": 4.9224685422602094e-05, "loss": 0.0256, "step": 2590 }, { "epoch": 0.01921882853848201, "grad_norm": 0.0921526849269867, "learning_rate": 4.922097578347579e-05, "loss": 0.0251, "step": 2600 }, { "epoch": 0.01929274710978386, "grad_norm": 0.11148613691329956, "learning_rate": 4.921726614434948e-05, "loss": 0.0264, "step": 2610 }, { "epoch": 0.019366665681085717, "grad_norm": 0.10836231708526611, "learning_rate": 4.9213556505223175e-05, "loss": 0.0206, "step": 2620 }, { "epoch": 0.01944058425238757, "grad_norm": 0.07907849550247192, "learning_rate": 4.920984686609687e-05, "loss": 0.0239, "step": 2630 }, { "epoch": 0.019514502823689425, "grad_norm": 0.10812706500291824, "learning_rate": 4.920613722697056e-05, "loss": 0.0237, "step": 2640 }, { "epoch": 0.019588421394991277, "grad_norm": 0.12056887894868851, "learning_rate": 4.9202427587844256e-05, "loss": 0.0274, "step": 2650 }, { "epoch": 0.019662339966293133, "grad_norm": 0.10785806179046631, "learning_rate": 4.9198717948717945e-05, "loss": 0.0258, "step": 2660 }, { "epoch": 0.019736258537594985, "grad_norm": 0.12284066528081894, "learning_rate": 4.919500830959165e-05, "loss": 0.0254, "step": 2670 }, { "epoch": 0.019810177108896838, "grad_norm": 0.08642525970935822, "learning_rate": 4.9191298670465344e-05, "loss": 0.0256, "step": 2680 }, { "epoch": 0.019884095680198693, "grad_norm": 0.10404020547866821, "learning_rate": 4.918758903133903e-05, "loss": 0.0234, "step": 2690 }, { "epoch": 0.019958014251500546, "grad_norm": 0.11959104984998703, "learning_rate": 4.918387939221273e-05, "loss": 0.0254, "step": 2700 }, { "epoch": 0.0200319328228024, "grad_norm": 0.12476497888565063, "learning_rate": 4.9180169753086425e-05, "loss": 0.0248, "step": 2710 }, { "epoch": 0.020105851394104254, "grad_norm": 0.13198316097259521, "learning_rate": 4.9176460113960114e-05, "loss": 0.0241, "step": 2720 }, { "epoch": 0.02017976996540611, "grad_norm": 0.08732737600803375, "learning_rate": 4.917275047483381e-05, "loss": 0.0259, "step": 2730 }, { "epoch": 0.02025368853670796, "grad_norm": 0.11259639263153076, "learning_rate": 4.9169040835707506e-05, "loss": 0.0238, "step": 2740 }, { "epoch": 0.020327607108009817, "grad_norm": 0.09043506532907486, "learning_rate": 4.91653311965812e-05, "loss": 0.0213, "step": 2750 }, { "epoch": 0.02040152567931167, "grad_norm": 0.10611968487501144, "learning_rate": 4.91616215574549e-05, "loss": 0.0249, "step": 2760 }, { "epoch": 0.020475444250613525, "grad_norm": 0.10952197015285492, "learning_rate": 4.915791191832859e-05, "loss": 0.0281, "step": 2770 }, { "epoch": 0.020549362821915378, "grad_norm": 0.1053660586476326, "learning_rate": 4.915420227920228e-05, "loss": 0.027, "step": 2780 }, { "epoch": 0.020623281393217233, "grad_norm": 0.14103762805461884, "learning_rate": 4.915049264007597e-05, "loss": 0.0242, "step": 2790 }, { "epoch": 0.020697199964519086, "grad_norm": 0.1869761049747467, "learning_rate": 4.914678300094967e-05, "loss": 0.0244, "step": 2800 }, { "epoch": 0.02077111853582094, "grad_norm": 0.1109655573964119, "learning_rate": 4.9143073361823364e-05, "loss": 0.0252, "step": 2810 }, { "epoch": 0.020845037107122794, "grad_norm": 0.08841854333877563, "learning_rate": 4.913936372269706e-05, "loss": 0.0249, "step": 2820 }, { "epoch": 0.020918955678424646, "grad_norm": 0.11417660862207413, "learning_rate": 4.9135654083570756e-05, "loss": 0.0263, "step": 2830 }, { "epoch": 0.0209928742497265, "grad_norm": 0.13581736385822296, "learning_rate": 4.9131944444444445e-05, "loss": 0.0245, "step": 2840 }, { "epoch": 0.021066792821028354, "grad_norm": 0.10360286384820938, "learning_rate": 4.912823480531814e-05, "loss": 0.0241, "step": 2850 }, { "epoch": 0.02114071139233021, "grad_norm": 0.11725900322198868, "learning_rate": 4.912452516619184e-05, "loss": 0.0229, "step": 2860 }, { "epoch": 0.021214629963632062, "grad_norm": 0.08907769620418549, "learning_rate": 4.9120815527065526e-05, "loss": 0.0214, "step": 2870 }, { "epoch": 0.021288548534933917, "grad_norm": 0.0941435769200325, "learning_rate": 4.911710588793922e-05, "loss": 0.0261, "step": 2880 }, { "epoch": 0.02136246710623577, "grad_norm": 0.14143934845924377, "learning_rate": 4.911339624881292e-05, "loss": 0.0232, "step": 2890 }, { "epoch": 0.021436385677537625, "grad_norm": 0.1258591115474701, "learning_rate": 4.9109686609686614e-05, "loss": 0.0223, "step": 2900 }, { "epoch": 0.021510304248839478, "grad_norm": 0.12183864414691925, "learning_rate": 4.910597697056031e-05, "loss": 0.0257, "step": 2910 }, { "epoch": 0.021584222820141333, "grad_norm": 0.1272670328617096, "learning_rate": 4.9102267331434e-05, "loss": 0.0212, "step": 2920 }, { "epoch": 0.021658141391443186, "grad_norm": 0.11030561476945877, "learning_rate": 4.9098557692307695e-05, "loss": 0.023, "step": 2930 }, { "epoch": 0.02173205996274504, "grad_norm": 0.09155961871147156, "learning_rate": 4.909484805318139e-05, "loss": 0.0231, "step": 2940 }, { "epoch": 0.021805978534046894, "grad_norm": 0.09755866974592209, "learning_rate": 4.909113841405508e-05, "loss": 0.0237, "step": 2950 }, { "epoch": 0.02187989710534875, "grad_norm": 0.1016668975353241, "learning_rate": 4.9087428774928777e-05, "loss": 0.0262, "step": 2960 }, { "epoch": 0.0219538156766506, "grad_norm": 0.08545517176389694, "learning_rate": 4.908371913580247e-05, "loss": 0.0234, "step": 2970 }, { "epoch": 0.022027734247952454, "grad_norm": 0.11458541452884674, "learning_rate": 4.908000949667617e-05, "loss": 0.0265, "step": 2980 }, { "epoch": 0.02210165281925431, "grad_norm": 0.09427709132432938, "learning_rate": 4.9076299857549864e-05, "loss": 0.0257, "step": 2990 }, { "epoch": 0.022175571390556162, "grad_norm": 0.10649760067462921, "learning_rate": 4.9072590218423554e-05, "loss": 0.0264, "step": 3000 }, { "epoch": 0.022249489961858018, "grad_norm": 0.09446646273136139, "learning_rate": 4.906888057929725e-05, "loss": 0.0227, "step": 3010 }, { "epoch": 0.02232340853315987, "grad_norm": 0.10109565407037735, "learning_rate": 4.906517094017094e-05, "loss": 0.0261, "step": 3020 }, { "epoch": 0.022397327104461726, "grad_norm": 0.1348312348127365, "learning_rate": 4.9061461301044635e-05, "loss": 0.0239, "step": 3030 }, { "epoch": 0.022471245675763578, "grad_norm": 0.08773966878652573, "learning_rate": 4.905775166191833e-05, "loss": 0.0236, "step": 3040 }, { "epoch": 0.022545164247065434, "grad_norm": 0.11004015058279037, "learning_rate": 4.905404202279203e-05, "loss": 0.023, "step": 3050 }, { "epoch": 0.022619082818367286, "grad_norm": 0.10679835081100464, "learning_rate": 4.905033238366572e-05, "loss": 0.0239, "step": 3060 }, { "epoch": 0.02269300138966914, "grad_norm": 0.11770069599151611, "learning_rate": 4.904662274453941e-05, "loss": 0.0259, "step": 3070 }, { "epoch": 0.022766919960970994, "grad_norm": 0.11873900145292282, "learning_rate": 4.904291310541311e-05, "loss": 0.0249, "step": 3080 }, { "epoch": 0.02284083853227285, "grad_norm": 0.11943815648555756, "learning_rate": 4.9039203466286804e-05, "loss": 0.0242, "step": 3090 }, { "epoch": 0.022914757103574702, "grad_norm": 0.1334373950958252, "learning_rate": 4.903549382716049e-05, "loss": 0.0258, "step": 3100 }, { "epoch": 0.022988675674876557, "grad_norm": 0.13952457904815674, "learning_rate": 4.903178418803419e-05, "loss": 0.0257, "step": 3110 }, { "epoch": 0.02306259424617841, "grad_norm": 0.07053710520267487, "learning_rate": 4.9028074548907885e-05, "loss": 0.0226, "step": 3120 }, { "epoch": 0.023136512817480265, "grad_norm": 0.11949121952056885, "learning_rate": 4.902436490978158e-05, "loss": 0.0276, "step": 3130 }, { "epoch": 0.023210431388782118, "grad_norm": 0.11576800048351288, "learning_rate": 4.902065527065528e-05, "loss": 0.0247, "step": 3140 }, { "epoch": 0.02328434996008397, "grad_norm": 0.09908673912286758, "learning_rate": 4.9016945631528966e-05, "loss": 0.0266, "step": 3150 }, { "epoch": 0.023358268531385826, "grad_norm": 0.1006418839097023, "learning_rate": 4.901323599240266e-05, "loss": 0.0245, "step": 3160 }, { "epoch": 0.023432187102687678, "grad_norm": 0.09430541843175888, "learning_rate": 4.900952635327636e-05, "loss": 0.0241, "step": 3170 }, { "epoch": 0.023506105673989534, "grad_norm": 0.08641228079795837, "learning_rate": 4.900581671415005e-05, "loss": 0.0244, "step": 3180 }, { "epoch": 0.023580024245291386, "grad_norm": 0.11231532692909241, "learning_rate": 4.900210707502374e-05, "loss": 0.0263, "step": 3190 }, { "epoch": 0.02365394281659324, "grad_norm": 0.08847200870513916, "learning_rate": 4.899839743589744e-05, "loss": 0.023, "step": 3200 }, { "epoch": 0.023727861387895094, "grad_norm": 0.08235383033752441, "learning_rate": 4.8994687796771135e-05, "loss": 0.0207, "step": 3210 }, { "epoch": 0.02380177995919695, "grad_norm": 0.07484035193920135, "learning_rate": 4.899097815764483e-05, "loss": 0.0202, "step": 3220 }, { "epoch": 0.023875698530498802, "grad_norm": 0.08688723295927048, "learning_rate": 4.898726851851852e-05, "loss": 0.0226, "step": 3230 }, { "epoch": 0.023949617101800658, "grad_norm": 0.11297879368066788, "learning_rate": 4.8983558879392216e-05, "loss": 0.0257, "step": 3240 }, { "epoch": 0.02402353567310251, "grad_norm": 0.08175697922706604, "learning_rate": 4.8979849240265905e-05, "loss": 0.0226, "step": 3250 }, { "epoch": 0.024097454244404366, "grad_norm": 0.10165733844041824, "learning_rate": 4.89761396011396e-05, "loss": 0.023, "step": 3260 }, { "epoch": 0.024171372815706218, "grad_norm": 0.10624788701534271, "learning_rate": 4.89724299620133e-05, "loss": 0.0243, "step": 3270 }, { "epoch": 0.024245291387008074, "grad_norm": 0.11952788382768631, "learning_rate": 4.896872032288699e-05, "loss": 0.0229, "step": 3280 }, { "epoch": 0.024319209958309926, "grad_norm": 0.0901573970913887, "learning_rate": 4.896501068376069e-05, "loss": 0.023, "step": 3290 }, { "epoch": 0.024393128529611778, "grad_norm": 0.13301552832126617, "learning_rate": 4.896130104463438e-05, "loss": 0.0259, "step": 3300 }, { "epoch": 0.024467047100913634, "grad_norm": 0.11203417927026749, "learning_rate": 4.8957591405508074e-05, "loss": 0.0234, "step": 3310 }, { "epoch": 0.024540965672215486, "grad_norm": 0.09428996592760086, "learning_rate": 4.895388176638177e-05, "loss": 0.0236, "step": 3320 }, { "epoch": 0.024614884243517342, "grad_norm": 0.11264640092849731, "learning_rate": 4.895017212725546e-05, "loss": 0.0217, "step": 3330 }, { "epoch": 0.024688802814819194, "grad_norm": 0.14519962668418884, "learning_rate": 4.8946462488129156e-05, "loss": 0.024, "step": 3340 }, { "epoch": 0.02476272138612105, "grad_norm": 0.0921749472618103, "learning_rate": 4.894275284900285e-05, "loss": 0.0253, "step": 3350 }, { "epoch": 0.024836639957422902, "grad_norm": 0.0954316258430481, "learning_rate": 4.893904320987655e-05, "loss": 0.0219, "step": 3360 }, { "epoch": 0.024910558528724758, "grad_norm": 0.10995155572891235, "learning_rate": 4.8935333570750243e-05, "loss": 0.0215, "step": 3370 }, { "epoch": 0.02498447710002661, "grad_norm": 0.10263795405626297, "learning_rate": 4.893162393162393e-05, "loss": 0.021, "step": 3380 }, { "epoch": 0.025058395671328466, "grad_norm": 0.15626676380634308, "learning_rate": 4.892791429249763e-05, "loss": 0.0228, "step": 3390 }, { "epoch": 0.025132314242630318, "grad_norm": 0.10327065736055374, "learning_rate": 4.8924204653371325e-05, "loss": 0.0216, "step": 3400 }, { "epoch": 0.025206232813932174, "grad_norm": 0.10524246096611023, "learning_rate": 4.8920495014245014e-05, "loss": 0.0248, "step": 3410 }, { "epoch": 0.025280151385234026, "grad_norm": 0.0885235071182251, "learning_rate": 4.891678537511871e-05, "loss": 0.0227, "step": 3420 }, { "epoch": 0.02535406995653588, "grad_norm": 0.11894160509109497, "learning_rate": 4.8913075735992406e-05, "loss": 0.0244, "step": 3430 }, { "epoch": 0.025427988527837734, "grad_norm": 0.08932094275951385, "learning_rate": 4.89093660968661e-05, "loss": 0.0224, "step": 3440 }, { "epoch": 0.025501907099139586, "grad_norm": 0.12240859866142273, "learning_rate": 4.89056564577398e-05, "loss": 0.0218, "step": 3450 }, { "epoch": 0.025575825670441442, "grad_norm": 0.10221285372972488, "learning_rate": 4.890194681861349e-05, "loss": 0.021, "step": 3460 }, { "epoch": 0.025649744241743294, "grad_norm": 0.13738150894641876, "learning_rate": 4.889823717948718e-05, "loss": 0.0253, "step": 3470 }, { "epoch": 0.02572366281304515, "grad_norm": 0.10425925999879837, "learning_rate": 4.889452754036087e-05, "loss": 0.0236, "step": 3480 }, { "epoch": 0.025797581384347002, "grad_norm": 0.16000455617904663, "learning_rate": 4.889081790123457e-05, "loss": 0.0259, "step": 3490 }, { "epoch": 0.025871499955648858, "grad_norm": 0.10048896074295044, "learning_rate": 4.8887108262108264e-05, "loss": 0.0227, "step": 3500 }, { "epoch": 0.02594541852695071, "grad_norm": 0.10501318424940109, "learning_rate": 4.888339862298196e-05, "loss": 0.025, "step": 3510 }, { "epoch": 0.026019337098252566, "grad_norm": 0.11101071536540985, "learning_rate": 4.8879688983855656e-05, "loss": 0.0231, "step": 3520 }, { "epoch": 0.026093255669554418, "grad_norm": 0.09944774210453033, "learning_rate": 4.8875979344729345e-05, "loss": 0.0218, "step": 3530 }, { "epoch": 0.026167174240856274, "grad_norm": 0.10541743040084839, "learning_rate": 4.887226970560304e-05, "loss": 0.0235, "step": 3540 }, { "epoch": 0.026241092812158126, "grad_norm": 0.11680968105792999, "learning_rate": 4.886856006647674e-05, "loss": 0.0247, "step": 3550 }, { "epoch": 0.026315011383459982, "grad_norm": 0.08649037778377533, "learning_rate": 4.8864850427350426e-05, "loss": 0.0219, "step": 3560 }, { "epoch": 0.026388929954761834, "grad_norm": 0.09611788392066956, "learning_rate": 4.886114078822412e-05, "loss": 0.0249, "step": 3570 }, { "epoch": 0.02646284852606369, "grad_norm": 0.09479816257953644, "learning_rate": 4.885743114909782e-05, "loss": 0.0223, "step": 3580 }, { "epoch": 0.026536767097365542, "grad_norm": 0.1077667772769928, "learning_rate": 4.8853721509971514e-05, "loss": 0.0229, "step": 3590 }, { "epoch": 0.026610685668667394, "grad_norm": 0.12021303921937943, "learning_rate": 4.885001187084521e-05, "loss": 0.023, "step": 3600 }, { "epoch": 0.02668460423996925, "grad_norm": 0.10672514885663986, "learning_rate": 4.88463022317189e-05, "loss": 0.0204, "step": 3610 }, { "epoch": 0.026758522811271102, "grad_norm": 0.11359500885009766, "learning_rate": 4.8842592592592595e-05, "loss": 0.0256, "step": 3620 }, { "epoch": 0.026832441382572958, "grad_norm": 0.1413438767194748, "learning_rate": 4.883888295346629e-05, "loss": 0.0275, "step": 3630 }, { "epoch": 0.02690635995387481, "grad_norm": 0.10484588891267776, "learning_rate": 4.883517331433998e-05, "loss": 0.0279, "step": 3640 }, { "epoch": 0.026980278525176666, "grad_norm": 0.10619416832923889, "learning_rate": 4.8831463675213676e-05, "loss": 0.0245, "step": 3650 }, { "epoch": 0.027054197096478518, "grad_norm": 0.11082316190004349, "learning_rate": 4.882775403608737e-05, "loss": 0.0226, "step": 3660 }, { "epoch": 0.027128115667780374, "grad_norm": 0.1237013041973114, "learning_rate": 4.882404439696107e-05, "loss": 0.0222, "step": 3670 }, { "epoch": 0.027202034239082226, "grad_norm": 0.16743157804012299, "learning_rate": 4.8820334757834764e-05, "loss": 0.0243, "step": 3680 }, { "epoch": 0.027275952810384082, "grad_norm": 0.10074224323034286, "learning_rate": 4.8816625118708453e-05, "loss": 0.0252, "step": 3690 }, { "epoch": 0.027349871381685934, "grad_norm": 0.10610511898994446, "learning_rate": 4.881291547958215e-05, "loss": 0.0234, "step": 3700 }, { "epoch": 0.02742378995298779, "grad_norm": 0.09411498159170151, "learning_rate": 4.880920584045584e-05, "loss": 0.0253, "step": 3710 }, { "epoch": 0.027497708524289642, "grad_norm": 0.12062369287014008, "learning_rate": 4.8805496201329535e-05, "loss": 0.0268, "step": 3720 }, { "epoch": 0.027571627095591498, "grad_norm": 0.12509259581565857, "learning_rate": 4.880178656220323e-05, "loss": 0.0228, "step": 3730 }, { "epoch": 0.02764554566689335, "grad_norm": 0.11746224761009216, "learning_rate": 4.8798076923076926e-05, "loss": 0.0253, "step": 3740 }, { "epoch": 0.027719464238195206, "grad_norm": 0.08255576342344284, "learning_rate": 4.879436728395062e-05, "loss": 0.0211, "step": 3750 }, { "epoch": 0.027793382809497058, "grad_norm": 0.13874807953834534, "learning_rate": 4.879065764482431e-05, "loss": 0.0258, "step": 3760 }, { "epoch": 0.02786730138079891, "grad_norm": 0.1162390410900116, "learning_rate": 4.878694800569801e-05, "loss": 0.0219, "step": 3770 }, { "epoch": 0.027941219952100766, "grad_norm": 0.08504689484834671, "learning_rate": 4.8783238366571704e-05, "loss": 0.0251, "step": 3780 }, { "epoch": 0.02801513852340262, "grad_norm": 0.12767374515533447, "learning_rate": 4.877952872744539e-05, "loss": 0.0226, "step": 3790 }, { "epoch": 0.028089057094704474, "grad_norm": 0.09197654575109482, "learning_rate": 4.877581908831909e-05, "loss": 0.0213, "step": 3800 }, { "epoch": 0.028162975666006326, "grad_norm": 0.08843453228473663, "learning_rate": 4.8772109449192785e-05, "loss": 0.0225, "step": 3810 }, { "epoch": 0.028236894237308182, "grad_norm": 0.13571232557296753, "learning_rate": 4.876839981006648e-05, "loss": 0.0226, "step": 3820 }, { "epoch": 0.028310812808610034, "grad_norm": 0.09555158764123917, "learning_rate": 4.876469017094018e-05, "loss": 0.0234, "step": 3830 }, { "epoch": 0.02838473137991189, "grad_norm": 0.07628829032182693, "learning_rate": 4.8760980531813866e-05, "loss": 0.0219, "step": 3840 }, { "epoch": 0.028458649951213742, "grad_norm": 0.11912820488214493, "learning_rate": 4.875727089268756e-05, "loss": 0.0214, "step": 3850 }, { "epoch": 0.028532568522515598, "grad_norm": 0.09438012540340424, "learning_rate": 4.875356125356126e-05, "loss": 0.0217, "step": 3860 }, { "epoch": 0.02860648709381745, "grad_norm": 0.09157334268093109, "learning_rate": 4.874985161443495e-05, "loss": 0.026, "step": 3870 }, { "epoch": 0.028680405665119306, "grad_norm": 0.08945705741643906, "learning_rate": 4.874614197530864e-05, "loss": 0.026, "step": 3880 }, { "epoch": 0.028754324236421158, "grad_norm": 0.10933069884777069, "learning_rate": 4.874243233618234e-05, "loss": 0.0242, "step": 3890 }, { "epoch": 0.028828242807723014, "grad_norm": 0.12087175250053406, "learning_rate": 4.8738722697056035e-05, "loss": 0.0259, "step": 3900 }, { "epoch": 0.028902161379024866, "grad_norm": 0.11087092012166977, "learning_rate": 4.873501305792973e-05, "loss": 0.0242, "step": 3910 }, { "epoch": 0.02897607995032672, "grad_norm": 0.08969036489725113, "learning_rate": 4.873130341880342e-05, "loss": 0.024, "step": 3920 }, { "epoch": 0.029049998521628574, "grad_norm": 0.08746183663606644, "learning_rate": 4.8727593779677116e-05, "loss": 0.0238, "step": 3930 }, { "epoch": 0.029123917092930426, "grad_norm": 0.09499067813158035, "learning_rate": 4.8723884140550805e-05, "loss": 0.0256, "step": 3940 }, { "epoch": 0.029197835664232282, "grad_norm": 0.09479714184999466, "learning_rate": 4.87201745014245e-05, "loss": 0.0254, "step": 3950 }, { "epoch": 0.029271754235534134, "grad_norm": 0.07698513567447662, "learning_rate": 4.87164648622982e-05, "loss": 0.0246, "step": 3960 }, { "epoch": 0.02934567280683599, "grad_norm": 0.08836125582456589, "learning_rate": 4.871275522317189e-05, "loss": 0.0229, "step": 3970 }, { "epoch": 0.029419591378137842, "grad_norm": 0.10476455837488174, "learning_rate": 4.870904558404559e-05, "loss": 0.0226, "step": 3980 }, { "epoch": 0.029493509949439698, "grad_norm": 0.12344580888748169, "learning_rate": 4.870533594491928e-05, "loss": 0.0249, "step": 3990 }, { "epoch": 0.02956742852074155, "grad_norm": 0.09737499803304672, "learning_rate": 4.8701626305792974e-05, "loss": 0.0227, "step": 4000 }, { "epoch": 0.029641347092043406, "grad_norm": 0.11230745911598206, "learning_rate": 4.869791666666667e-05, "loss": 0.0248, "step": 4010 }, { "epoch": 0.02971526566334526, "grad_norm": 0.11428452283143997, "learning_rate": 4.869420702754036e-05, "loss": 0.0245, "step": 4020 }, { "epoch": 0.029789184234647114, "grad_norm": 0.09732426702976227, "learning_rate": 4.8690497388414055e-05, "loss": 0.0267, "step": 4030 }, { "epoch": 0.029863102805948966, "grad_norm": 0.08766438066959381, "learning_rate": 4.868678774928775e-05, "loss": 0.0236, "step": 4040 }, { "epoch": 0.029937021377250822, "grad_norm": 0.09755643457174301, "learning_rate": 4.868307811016145e-05, "loss": 0.025, "step": 4050 }, { "epoch": 0.030010939948552674, "grad_norm": 0.11232589185237885, "learning_rate": 4.867936847103514e-05, "loss": 0.0254, "step": 4060 }, { "epoch": 0.030084858519854527, "grad_norm": 0.08921632915735245, "learning_rate": 4.867565883190883e-05, "loss": 0.0228, "step": 4070 }, { "epoch": 0.030158777091156382, "grad_norm": 0.19874423742294312, "learning_rate": 4.867194919278253e-05, "loss": 0.0239, "step": 4080 }, { "epoch": 0.030232695662458235, "grad_norm": 0.09998581558465958, "learning_rate": 4.8668239553656224e-05, "loss": 0.0241, "step": 4090 }, { "epoch": 0.03030661423376009, "grad_norm": 0.09771718829870224, "learning_rate": 4.8664529914529914e-05, "loss": 0.0224, "step": 4100 }, { "epoch": 0.030380532805061942, "grad_norm": 0.11452648788690567, "learning_rate": 4.866082027540361e-05, "loss": 0.0219, "step": 4110 }, { "epoch": 0.030454451376363798, "grad_norm": 0.14325961470603943, "learning_rate": 4.8657110636277305e-05, "loss": 0.0257, "step": 4120 }, { "epoch": 0.03052836994766565, "grad_norm": 0.11013491451740265, "learning_rate": 4.8653400997151e-05, "loss": 0.0249, "step": 4130 }, { "epoch": 0.030602288518967506, "grad_norm": 0.10259222984313965, "learning_rate": 4.86496913580247e-05, "loss": 0.0238, "step": 4140 }, { "epoch": 0.03067620709026936, "grad_norm": 0.08032498508691788, "learning_rate": 4.8645981718898387e-05, "loss": 0.0244, "step": 4150 }, { "epoch": 0.030750125661571214, "grad_norm": 0.09167686849832535, "learning_rate": 4.864227207977208e-05, "loss": 0.0236, "step": 4160 }, { "epoch": 0.030824044232873066, "grad_norm": 0.11132217943668365, "learning_rate": 4.863856244064577e-05, "loss": 0.024, "step": 4170 }, { "epoch": 0.030897962804174922, "grad_norm": 0.09461288899183273, "learning_rate": 4.863485280151947e-05, "loss": 0.0221, "step": 4180 }, { "epoch": 0.030971881375476774, "grad_norm": 0.1043187603354454, "learning_rate": 4.8631143162393164e-05, "loss": 0.0212, "step": 4190 }, { "epoch": 0.03104579994677863, "grad_norm": 0.08882828056812286, "learning_rate": 4.862743352326686e-05, "loss": 0.0239, "step": 4200 }, { "epoch": 0.031119718518080482, "grad_norm": 0.11656889319419861, "learning_rate": 4.8623723884140556e-05, "loss": 0.0212, "step": 4210 }, { "epoch": 0.031193637089382338, "grad_norm": 0.09930509328842163, "learning_rate": 4.8620014245014245e-05, "loss": 0.0243, "step": 4220 }, { "epoch": 0.03126755566068419, "grad_norm": 0.11788803339004517, "learning_rate": 4.861630460588794e-05, "loss": 0.0232, "step": 4230 }, { "epoch": 0.03134147423198604, "grad_norm": 0.13846427202224731, "learning_rate": 4.861259496676164e-05, "loss": 0.0229, "step": 4240 }, { "epoch": 0.031415392803287895, "grad_norm": 0.10129490494728088, "learning_rate": 4.8608885327635326e-05, "loss": 0.023, "step": 4250 }, { "epoch": 0.031489311374589754, "grad_norm": 0.0792374238371849, "learning_rate": 4.860517568850902e-05, "loss": 0.0242, "step": 4260 }, { "epoch": 0.031563229945891606, "grad_norm": 0.1227840855717659, "learning_rate": 4.860146604938272e-05, "loss": 0.0256, "step": 4270 }, { "epoch": 0.03163714851719346, "grad_norm": 0.1305595189332962, "learning_rate": 4.8597756410256414e-05, "loss": 0.0229, "step": 4280 }, { "epoch": 0.03171106708849531, "grad_norm": 0.10320558398962021, "learning_rate": 4.859404677113011e-05, "loss": 0.0239, "step": 4290 }, { "epoch": 0.03178498565979717, "grad_norm": 0.11386742442846298, "learning_rate": 4.85903371320038e-05, "loss": 0.0239, "step": 4300 }, { "epoch": 0.03185890423109902, "grad_norm": 0.09916136413812637, "learning_rate": 4.8586627492877495e-05, "loss": 0.026, "step": 4310 }, { "epoch": 0.031932822802400875, "grad_norm": 0.13368867337703705, "learning_rate": 4.858291785375119e-05, "loss": 0.0265, "step": 4320 }, { "epoch": 0.03200674137370273, "grad_norm": 0.11623481661081314, "learning_rate": 4.857920821462488e-05, "loss": 0.026, "step": 4330 }, { "epoch": 0.032080659945004586, "grad_norm": 0.1140202060341835, "learning_rate": 4.8575498575498576e-05, "loss": 0.0236, "step": 4340 }, { "epoch": 0.03215457851630644, "grad_norm": 0.08688046783208847, "learning_rate": 4.857178893637227e-05, "loss": 0.0234, "step": 4350 }, { "epoch": 0.03222849708760829, "grad_norm": 0.11289218068122864, "learning_rate": 4.856807929724597e-05, "loss": 0.0225, "step": 4360 }, { "epoch": 0.03230241565891014, "grad_norm": 0.1553889960050583, "learning_rate": 4.8564369658119664e-05, "loss": 0.0252, "step": 4370 }, { "epoch": 0.032376334230212, "grad_norm": 0.09674090892076492, "learning_rate": 4.856066001899335e-05, "loss": 0.025, "step": 4380 }, { "epoch": 0.032450252801513854, "grad_norm": 0.10170965641736984, "learning_rate": 4.855695037986705e-05, "loss": 0.0228, "step": 4390 }, { "epoch": 0.032524171372815706, "grad_norm": 0.09621470421552658, "learning_rate": 4.855324074074074e-05, "loss": 0.0209, "step": 4400 }, { "epoch": 0.03259808994411756, "grad_norm": 0.1560935229063034, "learning_rate": 4.8549531101614434e-05, "loss": 0.0233, "step": 4410 }, { "epoch": 0.03267200851541941, "grad_norm": 0.1017451360821724, "learning_rate": 4.854582146248813e-05, "loss": 0.0243, "step": 4420 }, { "epoch": 0.03274592708672127, "grad_norm": 0.10564211755990982, "learning_rate": 4.8542111823361826e-05, "loss": 0.0235, "step": 4430 }, { "epoch": 0.03281984565802312, "grad_norm": 0.08408430963754654, "learning_rate": 4.853840218423552e-05, "loss": 0.0212, "step": 4440 }, { "epoch": 0.032893764229324975, "grad_norm": 0.10621950775384903, "learning_rate": 4.853469254510921e-05, "loss": 0.0203, "step": 4450 }, { "epoch": 0.03296768280062683, "grad_norm": 0.19483087956905365, "learning_rate": 4.853098290598291e-05, "loss": 0.023, "step": 4460 }, { "epoch": 0.033041601371928686, "grad_norm": 0.1312873363494873, "learning_rate": 4.85272732668566e-05, "loss": 0.0232, "step": 4470 }, { "epoch": 0.03311551994323054, "grad_norm": 0.10183980315923691, "learning_rate": 4.852356362773029e-05, "loss": 0.0216, "step": 4480 }, { "epoch": 0.03318943851453239, "grad_norm": 0.10369880497455597, "learning_rate": 4.851985398860399e-05, "loss": 0.0194, "step": 4490 }, { "epoch": 0.03326335708583424, "grad_norm": 0.10564015805721283, "learning_rate": 4.8516144349477684e-05, "loss": 0.0247, "step": 4500 }, { "epoch": 0.0333372756571361, "grad_norm": 0.09465089440345764, "learning_rate": 4.851243471035138e-05, "loss": 0.0253, "step": 4510 }, { "epoch": 0.033411194228437954, "grad_norm": 0.11145325005054474, "learning_rate": 4.8508725071225076e-05, "loss": 0.0231, "step": 4520 }, { "epoch": 0.03348511279973981, "grad_norm": 0.09158811718225479, "learning_rate": 4.8505015432098766e-05, "loss": 0.0232, "step": 4530 }, { "epoch": 0.03355903137104166, "grad_norm": 0.10896454751491547, "learning_rate": 4.850130579297246e-05, "loss": 0.0224, "step": 4540 }, { "epoch": 0.03363294994234351, "grad_norm": 0.09764958918094635, "learning_rate": 4.849759615384616e-05, "loss": 0.0251, "step": 4550 }, { "epoch": 0.03370686851364537, "grad_norm": 0.16335123777389526, "learning_rate": 4.849388651471985e-05, "loss": 0.0241, "step": 4560 }, { "epoch": 0.03378078708494722, "grad_norm": 0.09329704940319061, "learning_rate": 4.849017687559354e-05, "loss": 0.0234, "step": 4570 }, { "epoch": 0.033854705656249075, "grad_norm": 0.11753322929143906, "learning_rate": 4.848646723646724e-05, "loss": 0.0202, "step": 4580 }, { "epoch": 0.03392862422755093, "grad_norm": 0.12258260697126389, "learning_rate": 4.8482757597340935e-05, "loss": 0.0258, "step": 4590 }, { "epoch": 0.034002542798852786, "grad_norm": 0.10052376240491867, "learning_rate": 4.847904795821463e-05, "loss": 0.0219, "step": 4600 }, { "epoch": 0.03407646137015464, "grad_norm": 0.07553847879171371, "learning_rate": 4.847533831908832e-05, "loss": 0.0243, "step": 4610 }, { "epoch": 0.03415037994145649, "grad_norm": 0.09385402500629425, "learning_rate": 4.8471628679962016e-05, "loss": 0.023, "step": 4620 }, { "epoch": 0.03422429851275834, "grad_norm": 0.15772198140621185, "learning_rate": 4.8467919040835705e-05, "loss": 0.0214, "step": 4630 }, { "epoch": 0.0342982170840602, "grad_norm": 0.10387273132801056, "learning_rate": 4.84642094017094e-05, "loss": 0.0256, "step": 4640 }, { "epoch": 0.034372135655362054, "grad_norm": 0.13165810704231262, "learning_rate": 4.8460499762583104e-05, "loss": 0.028, "step": 4650 }, { "epoch": 0.03444605422666391, "grad_norm": 0.09483859688043594, "learning_rate": 4.845679012345679e-05, "loss": 0.0256, "step": 4660 }, { "epoch": 0.03451997279796576, "grad_norm": 0.11623439192771912, "learning_rate": 4.845308048433049e-05, "loss": 0.0261, "step": 4670 }, { "epoch": 0.03459389136926762, "grad_norm": 0.139932781457901, "learning_rate": 4.844937084520418e-05, "loss": 0.0229, "step": 4680 }, { "epoch": 0.03466780994056947, "grad_norm": 0.12717574834823608, "learning_rate": 4.8445661206077874e-05, "loss": 0.0239, "step": 4690 }, { "epoch": 0.03474172851187132, "grad_norm": 0.1345638930797577, "learning_rate": 4.844195156695157e-05, "loss": 0.0224, "step": 4700 }, { "epoch": 0.034815647083173175, "grad_norm": 0.10481065511703491, "learning_rate": 4.843824192782526e-05, "loss": 0.0262, "step": 4710 }, { "epoch": 0.03488956565447503, "grad_norm": 0.09500519186258316, "learning_rate": 4.8434532288698955e-05, "loss": 0.0211, "step": 4720 }, { "epoch": 0.034963484225776886, "grad_norm": 0.13622979819774628, "learning_rate": 4.843082264957265e-05, "loss": 0.025, "step": 4730 }, { "epoch": 0.03503740279707874, "grad_norm": 0.10376254469156265, "learning_rate": 4.842711301044635e-05, "loss": 0.021, "step": 4740 }, { "epoch": 0.03511132136838059, "grad_norm": 0.095113605260849, "learning_rate": 4.842340337132004e-05, "loss": 0.0229, "step": 4750 }, { "epoch": 0.03518523993968244, "grad_norm": 0.10841836780309677, "learning_rate": 4.841969373219373e-05, "loss": 0.0235, "step": 4760 }, { "epoch": 0.0352591585109843, "grad_norm": 0.10691962391138077, "learning_rate": 4.841598409306743e-05, "loss": 0.0234, "step": 4770 }, { "epoch": 0.035333077082286155, "grad_norm": 0.10558052361011505, "learning_rate": 4.8412274453941124e-05, "loss": 0.0201, "step": 4780 }, { "epoch": 0.03540699565358801, "grad_norm": 0.10873495787382126, "learning_rate": 4.840856481481481e-05, "loss": 0.0231, "step": 4790 }, { "epoch": 0.03548091422488986, "grad_norm": 0.11643857508897781, "learning_rate": 4.8404855175688516e-05, "loss": 0.0222, "step": 4800 }, { "epoch": 0.03555483279619172, "grad_norm": 0.10683627426624298, "learning_rate": 4.8401145536562205e-05, "loss": 0.0224, "step": 4810 }, { "epoch": 0.03562875136749357, "grad_norm": 0.11844533681869507, "learning_rate": 4.83974358974359e-05, "loss": 0.0197, "step": 4820 }, { "epoch": 0.03570266993879542, "grad_norm": 0.09665479511022568, "learning_rate": 4.83937262583096e-05, "loss": 0.0222, "step": 4830 }, { "epoch": 0.035776588510097275, "grad_norm": 0.1258080154657364, "learning_rate": 4.8390016619183286e-05, "loss": 0.022, "step": 4840 }, { "epoch": 0.035850507081399134, "grad_norm": 0.06919804215431213, "learning_rate": 4.838630698005698e-05, "loss": 0.0218, "step": 4850 }, { "epoch": 0.035924425652700986, "grad_norm": 0.09917058795690536, "learning_rate": 4.838259734093067e-05, "loss": 0.0227, "step": 4860 }, { "epoch": 0.03599834422400284, "grad_norm": 0.1694340854883194, "learning_rate": 4.837888770180437e-05, "loss": 0.0235, "step": 4870 }, { "epoch": 0.03607226279530469, "grad_norm": 0.10197817534208298, "learning_rate": 4.837517806267807e-05, "loss": 0.0262, "step": 4880 }, { "epoch": 0.03614618136660654, "grad_norm": 0.10047579556703568, "learning_rate": 4.837146842355176e-05, "loss": 0.0255, "step": 4890 }, { "epoch": 0.0362200999379084, "grad_norm": 0.07849813252687454, "learning_rate": 4.8367758784425455e-05, "loss": 0.0209, "step": 4900 }, { "epoch": 0.036294018509210255, "grad_norm": 0.10035062581300735, "learning_rate": 4.8364049145299145e-05, "loss": 0.0248, "step": 4910 }, { "epoch": 0.03636793708051211, "grad_norm": 0.12051332741975784, "learning_rate": 4.836033950617284e-05, "loss": 0.0236, "step": 4920 }, { "epoch": 0.03644185565181396, "grad_norm": 0.09612835943698883, "learning_rate": 4.8356629867046536e-05, "loss": 0.0218, "step": 4930 }, { "epoch": 0.03651577422311582, "grad_norm": 0.11352284252643585, "learning_rate": 4.8352920227920226e-05, "loss": 0.0251, "step": 4940 }, { "epoch": 0.03658969279441767, "grad_norm": 0.11685504764318466, "learning_rate": 4.834921058879393e-05, "loss": 0.0246, "step": 4950 }, { "epoch": 0.03666361136571952, "grad_norm": 0.09964120388031006, "learning_rate": 4.834550094966762e-05, "loss": 0.023, "step": 4960 }, { "epoch": 0.036737529937021375, "grad_norm": 0.09999851137399673, "learning_rate": 4.8341791310541314e-05, "loss": 0.0224, "step": 4970 }, { "epoch": 0.036811448508323234, "grad_norm": 0.10812164843082428, "learning_rate": 4.833808167141501e-05, "loss": 0.0246, "step": 4980 }, { "epoch": 0.03688536707962509, "grad_norm": 0.10093782842159271, "learning_rate": 4.83343720322887e-05, "loss": 0.0238, "step": 4990 }, { "epoch": 0.03695928565092694, "grad_norm": 0.08897837996482849, "learning_rate": 4.8330662393162395e-05, "loss": 0.0212, "step": 5000 }, { "epoch": 0.03703320422222879, "grad_norm": 0.08802120387554169, "learning_rate": 4.832695275403609e-05, "loss": 0.0261, "step": 5010 }, { "epoch": 0.03710712279353064, "grad_norm": 0.1296195685863495, "learning_rate": 4.832324311490978e-05, "loss": 0.0243, "step": 5020 }, { "epoch": 0.0371810413648325, "grad_norm": 0.09052955359220505, "learning_rate": 4.831953347578348e-05, "loss": 0.0217, "step": 5030 }, { "epoch": 0.037254959936134355, "grad_norm": 0.11013671010732651, "learning_rate": 4.831582383665717e-05, "loss": 0.0262, "step": 5040 }, { "epoch": 0.03732887850743621, "grad_norm": 0.08840049803256989, "learning_rate": 4.831211419753087e-05, "loss": 0.0245, "step": 5050 }, { "epoch": 0.03740279707873806, "grad_norm": 0.14922712743282318, "learning_rate": 4.8308404558404564e-05, "loss": 0.023, "step": 5060 }, { "epoch": 0.03747671565003992, "grad_norm": 0.13218800723552704, "learning_rate": 4.830469491927825e-05, "loss": 0.0223, "step": 5070 }, { "epoch": 0.03755063422134177, "grad_norm": 0.15943408012390137, "learning_rate": 4.830098528015195e-05, "loss": 0.0229, "step": 5080 }, { "epoch": 0.03762455279264362, "grad_norm": 0.10490970313549042, "learning_rate": 4.829727564102564e-05, "loss": 0.0244, "step": 5090 }, { "epoch": 0.037698471363945475, "grad_norm": 0.11059322208166122, "learning_rate": 4.8293566001899334e-05, "loss": 0.0231, "step": 5100 }, { "epoch": 0.037772389935247334, "grad_norm": 0.13888812065124512, "learning_rate": 4.828985636277304e-05, "loss": 0.0216, "step": 5110 }, { "epoch": 0.03784630850654919, "grad_norm": 0.10815978795289993, "learning_rate": 4.8286146723646726e-05, "loss": 0.0236, "step": 5120 }, { "epoch": 0.03792022707785104, "grad_norm": 0.11108426004648209, "learning_rate": 4.828243708452042e-05, "loss": 0.0229, "step": 5130 }, { "epoch": 0.03799414564915289, "grad_norm": 0.11670207977294922, "learning_rate": 4.827872744539411e-05, "loss": 0.0227, "step": 5140 }, { "epoch": 0.03806806422045475, "grad_norm": 0.08577834069728851, "learning_rate": 4.827501780626781e-05, "loss": 0.0228, "step": 5150 }, { "epoch": 0.0381419827917566, "grad_norm": 0.11961571127176285, "learning_rate": 4.82713081671415e-05, "loss": 0.0244, "step": 5160 }, { "epoch": 0.038215901363058455, "grad_norm": 0.11697886884212494, "learning_rate": 4.826759852801519e-05, "loss": 0.021, "step": 5170 }, { "epoch": 0.03828981993436031, "grad_norm": 0.08601241558790207, "learning_rate": 4.8263888888888895e-05, "loss": 0.0263, "step": 5180 }, { "epoch": 0.03836373850566216, "grad_norm": 0.1259474903345108, "learning_rate": 4.8260179249762584e-05, "loss": 0.0215, "step": 5190 }, { "epoch": 0.03843765707696402, "grad_norm": 0.11750676482915878, "learning_rate": 4.825646961063628e-05, "loss": 0.0219, "step": 5200 }, { "epoch": 0.03851157564826587, "grad_norm": 0.10238835960626602, "learning_rate": 4.8252759971509976e-05, "loss": 0.0214, "step": 5210 }, { "epoch": 0.03858549421956772, "grad_norm": 0.2656376361846924, "learning_rate": 4.8249050332383665e-05, "loss": 0.0244, "step": 5220 }, { "epoch": 0.038659412790869575, "grad_norm": 0.10988765954971313, "learning_rate": 4.824534069325736e-05, "loss": 0.023, "step": 5230 }, { "epoch": 0.038733331362171435, "grad_norm": 0.13231900334358215, "learning_rate": 4.824163105413106e-05, "loss": 0.0249, "step": 5240 }, { "epoch": 0.03880724993347329, "grad_norm": 0.10795532912015915, "learning_rate": 4.8237921415004746e-05, "loss": 0.022, "step": 5250 }, { "epoch": 0.03888116850477514, "grad_norm": 0.12678179144859314, "learning_rate": 4.823421177587845e-05, "loss": 0.0275, "step": 5260 }, { "epoch": 0.03895508707607699, "grad_norm": 0.10373057425022125, "learning_rate": 4.823050213675214e-05, "loss": 0.0235, "step": 5270 }, { "epoch": 0.03902900564737885, "grad_norm": 0.13203772902488708, "learning_rate": 4.8226792497625834e-05, "loss": 0.0254, "step": 5280 }, { "epoch": 0.0391029242186807, "grad_norm": 0.09419368207454681, "learning_rate": 4.822308285849953e-05, "loss": 0.0209, "step": 5290 }, { "epoch": 0.039176842789982555, "grad_norm": 0.12208737432956696, "learning_rate": 4.821937321937322e-05, "loss": 0.0225, "step": 5300 }, { "epoch": 0.03925076136128441, "grad_norm": 0.11247088760137558, "learning_rate": 4.8215663580246915e-05, "loss": 0.0255, "step": 5310 }, { "epoch": 0.039324679932586266, "grad_norm": 0.09036470949649811, "learning_rate": 4.8211953941120605e-05, "loss": 0.0246, "step": 5320 }, { "epoch": 0.03939859850388812, "grad_norm": 0.08673543483018875, "learning_rate": 4.820824430199431e-05, "loss": 0.02, "step": 5330 }, { "epoch": 0.03947251707518997, "grad_norm": 0.10785869508981705, "learning_rate": 4.8204534662868e-05, "loss": 0.0232, "step": 5340 }, { "epoch": 0.03954643564649182, "grad_norm": 0.15774936974048615, "learning_rate": 4.820082502374169e-05, "loss": 0.02, "step": 5350 }, { "epoch": 0.039620354217793675, "grad_norm": 0.09657442569732666, "learning_rate": 4.819711538461539e-05, "loss": 0.0222, "step": 5360 }, { "epoch": 0.039694272789095535, "grad_norm": 0.08638004213571548, "learning_rate": 4.819340574548908e-05, "loss": 0.0202, "step": 5370 }, { "epoch": 0.03976819136039739, "grad_norm": 0.07568100094795227, "learning_rate": 4.8189696106362774e-05, "loss": 0.0221, "step": 5380 }, { "epoch": 0.03984210993169924, "grad_norm": 0.10558179020881653, "learning_rate": 4.818598646723647e-05, "loss": 0.0239, "step": 5390 }, { "epoch": 0.03991602850300109, "grad_norm": 0.15386056900024414, "learning_rate": 4.818227682811016e-05, "loss": 0.0214, "step": 5400 }, { "epoch": 0.03998994707430295, "grad_norm": 0.1022673174738884, "learning_rate": 4.817856718898386e-05, "loss": 0.0231, "step": 5410 }, { "epoch": 0.0400638656456048, "grad_norm": 0.10176906734704971, "learning_rate": 4.817485754985755e-05, "loss": 0.023, "step": 5420 }, { "epoch": 0.040137784216906655, "grad_norm": 0.11575393378734589, "learning_rate": 4.817114791073125e-05, "loss": 0.023, "step": 5430 }, { "epoch": 0.04021170278820851, "grad_norm": 0.11693524569272995, "learning_rate": 4.816743827160494e-05, "loss": 0.024, "step": 5440 }, { "epoch": 0.04028562135951037, "grad_norm": 0.10131990909576416, "learning_rate": 4.816372863247863e-05, "loss": 0.0211, "step": 5450 }, { "epoch": 0.04035953993081222, "grad_norm": 0.1304934024810791, "learning_rate": 4.816001899335233e-05, "loss": 0.0225, "step": 5460 }, { "epoch": 0.04043345850211407, "grad_norm": 0.12911392748355865, "learning_rate": 4.8156309354226024e-05, "loss": 0.0198, "step": 5470 }, { "epoch": 0.04050737707341592, "grad_norm": 0.07756907492876053, "learning_rate": 4.815259971509972e-05, "loss": 0.0186, "step": 5480 }, { "epoch": 0.040581295644717776, "grad_norm": 0.09241756796836853, "learning_rate": 4.8148890075973416e-05, "loss": 0.0208, "step": 5490 }, { "epoch": 0.040655214216019635, "grad_norm": 0.09917902946472168, "learning_rate": 4.8145180436847105e-05, "loss": 0.024, "step": 5500 }, { "epoch": 0.04072913278732149, "grad_norm": 0.11154290288686752, "learning_rate": 4.81414707977208e-05, "loss": 0.0222, "step": 5510 }, { "epoch": 0.04080305135862334, "grad_norm": 0.08803219348192215, "learning_rate": 4.81377611585945e-05, "loss": 0.0219, "step": 5520 }, { "epoch": 0.04087696992992519, "grad_norm": 0.112600177526474, "learning_rate": 4.8134051519468186e-05, "loss": 0.0228, "step": 5530 }, { "epoch": 0.04095088850122705, "grad_norm": 0.08610142767429352, "learning_rate": 4.813034188034188e-05, "loss": 0.0211, "step": 5540 }, { "epoch": 0.0410248070725289, "grad_norm": 0.10519230365753174, "learning_rate": 4.812663224121557e-05, "loss": 0.0226, "step": 5550 }, { "epoch": 0.041098725643830755, "grad_norm": 0.10147269815206528, "learning_rate": 4.8122922602089274e-05, "loss": 0.0201, "step": 5560 }, { "epoch": 0.04117264421513261, "grad_norm": 0.115921251475811, "learning_rate": 4.811921296296297e-05, "loss": 0.0251, "step": 5570 }, { "epoch": 0.04124656278643447, "grad_norm": 0.09626685827970505, "learning_rate": 4.811550332383666e-05, "loss": 0.0206, "step": 5580 }, { "epoch": 0.04132048135773632, "grad_norm": 0.08677060902118683, "learning_rate": 4.8111793684710355e-05, "loss": 0.0212, "step": 5590 }, { "epoch": 0.04139439992903817, "grad_norm": 0.0958794429898262, "learning_rate": 4.8108084045584044e-05, "loss": 0.0224, "step": 5600 }, { "epoch": 0.04146831850034002, "grad_norm": 0.11729035526514053, "learning_rate": 4.810437440645774e-05, "loss": 0.0211, "step": 5610 }, { "epoch": 0.04154223707164188, "grad_norm": 0.10277558118104935, "learning_rate": 4.8100664767331436e-05, "loss": 0.0232, "step": 5620 }, { "epoch": 0.041616155642943735, "grad_norm": 0.13820619881153107, "learning_rate": 4.809695512820513e-05, "loss": 0.0231, "step": 5630 }, { "epoch": 0.04169007421424559, "grad_norm": 0.11183051764965057, "learning_rate": 4.809324548907883e-05, "loss": 0.0243, "step": 5640 }, { "epoch": 0.04176399278554744, "grad_norm": 0.10406447947025299, "learning_rate": 4.808953584995252e-05, "loss": 0.0211, "step": 5650 }, { "epoch": 0.04183791135684929, "grad_norm": 0.08350703120231628, "learning_rate": 4.808582621082621e-05, "loss": 0.0203, "step": 5660 }, { "epoch": 0.04191182992815115, "grad_norm": 0.11309721320867538, "learning_rate": 4.808211657169991e-05, "loss": 0.0222, "step": 5670 }, { "epoch": 0.041985748499453, "grad_norm": 0.08373697847127914, "learning_rate": 4.80784069325736e-05, "loss": 0.0199, "step": 5680 }, { "epoch": 0.042059667070754855, "grad_norm": 0.09311481565237045, "learning_rate": 4.8074697293447294e-05, "loss": 0.0247, "step": 5690 }, { "epoch": 0.04213358564205671, "grad_norm": 0.08883152902126312, "learning_rate": 4.807098765432099e-05, "loss": 0.025, "step": 5700 }, { "epoch": 0.04220750421335857, "grad_norm": 0.14571034908294678, "learning_rate": 4.8067278015194686e-05, "loss": 0.0229, "step": 5710 }, { "epoch": 0.04228142278466042, "grad_norm": 0.09702489525079727, "learning_rate": 4.806356837606838e-05, "loss": 0.0221, "step": 5720 }, { "epoch": 0.04235534135596227, "grad_norm": 0.13664387166500092, "learning_rate": 4.805985873694207e-05, "loss": 0.0237, "step": 5730 }, { "epoch": 0.042429259927264124, "grad_norm": 0.09496665000915527, "learning_rate": 4.805614909781577e-05, "loss": 0.0206, "step": 5740 }, { "epoch": 0.04250317849856598, "grad_norm": 0.1166987493634224, "learning_rate": 4.8052439458689463e-05, "loss": 0.021, "step": 5750 }, { "epoch": 0.042577097069867835, "grad_norm": 0.11197572201490402, "learning_rate": 4.804872981956315e-05, "loss": 0.0223, "step": 5760 }, { "epoch": 0.04265101564116969, "grad_norm": 0.10108604282140732, "learning_rate": 4.804502018043685e-05, "loss": 0.0228, "step": 5770 }, { "epoch": 0.04272493421247154, "grad_norm": 0.09897824376821518, "learning_rate": 4.8041310541310545e-05, "loss": 0.0219, "step": 5780 }, { "epoch": 0.0427988527837734, "grad_norm": 0.10928849875926971, "learning_rate": 4.803760090218424e-05, "loss": 0.0227, "step": 5790 }, { "epoch": 0.04287277135507525, "grad_norm": 0.09280960261821747, "learning_rate": 4.8033891263057937e-05, "loss": 0.0214, "step": 5800 }, { "epoch": 0.0429466899263771, "grad_norm": 0.11366759985685349, "learning_rate": 4.8030181623931626e-05, "loss": 0.0215, "step": 5810 }, { "epoch": 0.043020608497678955, "grad_norm": 0.10362562537193298, "learning_rate": 4.802647198480532e-05, "loss": 0.0227, "step": 5820 }, { "epoch": 0.04309452706898081, "grad_norm": 0.08938121795654297, "learning_rate": 4.802276234567901e-05, "loss": 0.0222, "step": 5830 }, { "epoch": 0.04316844564028267, "grad_norm": 0.08877433091402054, "learning_rate": 4.801905270655271e-05, "loss": 0.0228, "step": 5840 }, { "epoch": 0.04324236421158452, "grad_norm": 0.12970751523971558, "learning_rate": 4.80153430674264e-05, "loss": 0.0204, "step": 5850 }, { "epoch": 0.04331628278288637, "grad_norm": 0.0647781565785408, "learning_rate": 4.80116334283001e-05, "loss": 0.0205, "step": 5860 }, { "epoch": 0.043390201354188224, "grad_norm": 0.09601494669914246, "learning_rate": 4.8007923789173795e-05, "loss": 0.0234, "step": 5870 }, { "epoch": 0.04346411992549008, "grad_norm": 0.11197741329669952, "learning_rate": 4.800421415004749e-05, "loss": 0.0228, "step": 5880 }, { "epoch": 0.043538038496791935, "grad_norm": 0.12032473832368851, "learning_rate": 4.800050451092118e-05, "loss": 0.0208, "step": 5890 }, { "epoch": 0.04361195706809379, "grad_norm": 0.09236224740743637, "learning_rate": 4.7996794871794876e-05, "loss": 0.0232, "step": 5900 }, { "epoch": 0.04368587563939564, "grad_norm": 0.12311521917581558, "learning_rate": 4.7993085232668565e-05, "loss": 0.0208, "step": 5910 }, { "epoch": 0.0437597942106975, "grad_norm": 0.09777011722326279, "learning_rate": 4.798937559354226e-05, "loss": 0.0233, "step": 5920 }, { "epoch": 0.04383371278199935, "grad_norm": 0.09315415471792221, "learning_rate": 4.798566595441596e-05, "loss": 0.0215, "step": 5930 }, { "epoch": 0.0439076313533012, "grad_norm": 0.11994203925132751, "learning_rate": 4.798195631528965e-05, "loss": 0.0238, "step": 5940 }, { "epoch": 0.043981549924603056, "grad_norm": 0.10405700653791428, "learning_rate": 4.797824667616335e-05, "loss": 0.0251, "step": 5950 }, { "epoch": 0.04405546849590491, "grad_norm": 0.10718826204538345, "learning_rate": 4.797453703703704e-05, "loss": 0.0228, "step": 5960 }, { "epoch": 0.04412938706720677, "grad_norm": 0.15280954539775848, "learning_rate": 4.7970827397910734e-05, "loss": 0.0245, "step": 5970 }, { "epoch": 0.04420330563850862, "grad_norm": 0.10383470356464386, "learning_rate": 4.796711775878443e-05, "loss": 0.026, "step": 5980 }, { "epoch": 0.04427722420981047, "grad_norm": 0.0948052927851677, "learning_rate": 4.796340811965812e-05, "loss": 0.0219, "step": 5990 }, { "epoch": 0.044351142781112324, "grad_norm": 0.10084927082061768, "learning_rate": 4.7959698480531815e-05, "loss": 0.0241, "step": 6000 }, { "epoch": 0.04442506135241418, "grad_norm": 0.10056540369987488, "learning_rate": 4.795598884140551e-05, "loss": 0.0234, "step": 6010 }, { "epoch": 0.044498979923716035, "grad_norm": 0.0987926572561264, "learning_rate": 4.795227920227921e-05, "loss": 0.0214, "step": 6020 }, { "epoch": 0.04457289849501789, "grad_norm": 0.10318058729171753, "learning_rate": 4.79485695631529e-05, "loss": 0.024, "step": 6030 }, { "epoch": 0.04464681706631974, "grad_norm": 0.08674127608537674, "learning_rate": 4.794485992402659e-05, "loss": 0.0257, "step": 6040 }, { "epoch": 0.0447207356376216, "grad_norm": 0.09985113888978958, "learning_rate": 4.794115028490029e-05, "loss": 0.0252, "step": 6050 }, { "epoch": 0.04479465420892345, "grad_norm": 0.08422184735536575, "learning_rate": 4.793744064577398e-05, "loss": 0.0224, "step": 6060 }, { "epoch": 0.0448685727802253, "grad_norm": 0.10255678743124008, "learning_rate": 4.7933731006647673e-05, "loss": 0.0211, "step": 6070 }, { "epoch": 0.044942491351527156, "grad_norm": 0.0996970385313034, "learning_rate": 4.793002136752137e-05, "loss": 0.0222, "step": 6080 }, { "epoch": 0.045016409922829015, "grad_norm": 0.1050659790635109, "learning_rate": 4.7926311728395065e-05, "loss": 0.0232, "step": 6090 }, { "epoch": 0.04509032849413087, "grad_norm": 0.11204075068235397, "learning_rate": 4.792260208926876e-05, "loss": 0.0254, "step": 6100 }, { "epoch": 0.04516424706543272, "grad_norm": 0.09773686528205872, "learning_rate": 4.791889245014246e-05, "loss": 0.0235, "step": 6110 }, { "epoch": 0.04523816563673457, "grad_norm": 0.09989985823631287, "learning_rate": 4.7915182811016147e-05, "loss": 0.0216, "step": 6120 }, { "epoch": 0.045312084208036424, "grad_norm": 0.10974659770727158, "learning_rate": 4.791147317188984e-05, "loss": 0.0216, "step": 6130 }, { "epoch": 0.04538600277933828, "grad_norm": 0.11773477494716644, "learning_rate": 4.790776353276353e-05, "loss": 0.0232, "step": 6140 }, { "epoch": 0.045459921350640135, "grad_norm": 0.09761492162942886, "learning_rate": 4.790405389363723e-05, "loss": 0.0232, "step": 6150 }, { "epoch": 0.04553383992194199, "grad_norm": 0.10829565674066544, "learning_rate": 4.7900344254510924e-05, "loss": 0.0234, "step": 6160 }, { "epoch": 0.04560775849324384, "grad_norm": 0.12356780469417572, "learning_rate": 4.789663461538462e-05, "loss": 0.0199, "step": 6170 }, { "epoch": 0.0456816770645457, "grad_norm": 0.09067923575639725, "learning_rate": 4.7892924976258316e-05, "loss": 0.0237, "step": 6180 }, { "epoch": 0.04575559563584755, "grad_norm": 0.09042474627494812, "learning_rate": 4.7889215337132005e-05, "loss": 0.0228, "step": 6190 }, { "epoch": 0.045829514207149404, "grad_norm": 0.08567062765359879, "learning_rate": 4.78855056980057e-05, "loss": 0.0229, "step": 6200 }, { "epoch": 0.045903432778451256, "grad_norm": 0.10306154936552048, "learning_rate": 4.78817960588794e-05, "loss": 0.0201, "step": 6210 }, { "epoch": 0.045977351349753115, "grad_norm": 0.1060139611363411, "learning_rate": 4.7878086419753086e-05, "loss": 0.0216, "step": 6220 }, { "epoch": 0.04605126992105497, "grad_norm": 0.09518091380596161, "learning_rate": 4.787437678062678e-05, "loss": 0.0243, "step": 6230 }, { "epoch": 0.04612518849235682, "grad_norm": 0.09898778051137924, "learning_rate": 4.787066714150048e-05, "loss": 0.0225, "step": 6240 }, { "epoch": 0.04619910706365867, "grad_norm": 0.08496725559234619, "learning_rate": 4.7866957502374174e-05, "loss": 0.0225, "step": 6250 }, { "epoch": 0.04627302563496053, "grad_norm": 0.0848846286535263, "learning_rate": 4.786324786324787e-05, "loss": 0.0241, "step": 6260 }, { "epoch": 0.04634694420626238, "grad_norm": 0.1118076741695404, "learning_rate": 4.785953822412156e-05, "loss": 0.0214, "step": 6270 }, { "epoch": 0.046420862777564235, "grad_norm": 0.08831203728914261, "learning_rate": 4.7855828584995255e-05, "loss": 0.0201, "step": 6280 }, { "epoch": 0.04649478134886609, "grad_norm": 0.1217513382434845, "learning_rate": 4.7852118945868944e-05, "loss": 0.025, "step": 6290 }, { "epoch": 0.04656869992016794, "grad_norm": 0.13399529457092285, "learning_rate": 4.784840930674264e-05, "loss": 0.0249, "step": 6300 }, { "epoch": 0.0466426184914698, "grad_norm": 0.10110674798488617, "learning_rate": 4.7844699667616336e-05, "loss": 0.0214, "step": 6310 }, { "epoch": 0.04671653706277165, "grad_norm": 0.12537550926208496, "learning_rate": 4.784099002849003e-05, "loss": 0.0233, "step": 6320 }, { "epoch": 0.046790455634073504, "grad_norm": 0.08597607165575027, "learning_rate": 4.783728038936373e-05, "loss": 0.0225, "step": 6330 }, { "epoch": 0.046864374205375356, "grad_norm": 0.12496454268693924, "learning_rate": 4.7833570750237424e-05, "loss": 0.0233, "step": 6340 }, { "epoch": 0.046938292776677215, "grad_norm": 0.11686607450246811, "learning_rate": 4.782986111111111e-05, "loss": 0.0212, "step": 6350 }, { "epoch": 0.04701221134797907, "grad_norm": 0.09545844793319702, "learning_rate": 4.782615147198481e-05, "loss": 0.0223, "step": 6360 }, { "epoch": 0.04708612991928092, "grad_norm": 0.12743836641311646, "learning_rate": 4.78224418328585e-05, "loss": 0.0212, "step": 6370 }, { "epoch": 0.04716004849058277, "grad_norm": 0.10326215624809265, "learning_rate": 4.7818732193732194e-05, "loss": 0.0216, "step": 6380 }, { "epoch": 0.04723396706188463, "grad_norm": 0.1223498284816742, "learning_rate": 4.781502255460589e-05, "loss": 0.0279, "step": 6390 }, { "epoch": 0.04730788563318648, "grad_norm": 0.08332131803035736, "learning_rate": 4.7811312915479586e-05, "loss": 0.0213, "step": 6400 }, { "epoch": 0.047381804204488336, "grad_norm": 0.10657671838998795, "learning_rate": 4.780760327635328e-05, "loss": 0.0222, "step": 6410 }, { "epoch": 0.04745572277579019, "grad_norm": 0.0815543606877327, "learning_rate": 4.780389363722697e-05, "loss": 0.025, "step": 6420 }, { "epoch": 0.04752964134709204, "grad_norm": 0.1204519271850586, "learning_rate": 4.780018399810067e-05, "loss": 0.0204, "step": 6430 }, { "epoch": 0.0476035599183939, "grad_norm": 0.12509311735630035, "learning_rate": 4.779647435897436e-05, "loss": 0.02, "step": 6440 }, { "epoch": 0.04767747848969575, "grad_norm": 0.1401158720254898, "learning_rate": 4.779276471984805e-05, "loss": 0.0234, "step": 6450 }, { "epoch": 0.047751397060997604, "grad_norm": 0.08719848096370697, "learning_rate": 4.778905508072175e-05, "loss": 0.0211, "step": 6460 }, { "epoch": 0.047825315632299456, "grad_norm": 0.08922212570905685, "learning_rate": 4.7785345441595444e-05, "loss": 0.0246, "step": 6470 }, { "epoch": 0.047899234203601315, "grad_norm": 0.14943371713161469, "learning_rate": 4.778163580246914e-05, "loss": 0.0233, "step": 6480 }, { "epoch": 0.04797315277490317, "grad_norm": 0.09004784375429153, "learning_rate": 4.7777926163342836e-05, "loss": 0.0209, "step": 6490 }, { "epoch": 0.04804707134620502, "grad_norm": 0.10191723704338074, "learning_rate": 4.7774216524216525e-05, "loss": 0.0241, "step": 6500 }, { "epoch": 0.04812098991750687, "grad_norm": 0.08505424857139587, "learning_rate": 4.777050688509022e-05, "loss": 0.0216, "step": 6510 }, { "epoch": 0.04819490848880873, "grad_norm": 0.09687618911266327, "learning_rate": 4.776679724596391e-05, "loss": 0.024, "step": 6520 }, { "epoch": 0.04826882706011058, "grad_norm": 0.067629374563694, "learning_rate": 4.7763087606837607e-05, "loss": 0.0227, "step": 6530 }, { "epoch": 0.048342745631412436, "grad_norm": 0.09508171677589417, "learning_rate": 4.77593779677113e-05, "loss": 0.0223, "step": 6540 }, { "epoch": 0.04841666420271429, "grad_norm": 0.09708387404680252, "learning_rate": 4.7755668328585e-05, "loss": 0.0205, "step": 6550 }, { "epoch": 0.04849058277401615, "grad_norm": 0.11333795636892319, "learning_rate": 4.7751958689458695e-05, "loss": 0.0216, "step": 6560 }, { "epoch": 0.048564501345318, "grad_norm": 0.0891813114285469, "learning_rate": 4.774824905033239e-05, "loss": 0.0213, "step": 6570 }, { "epoch": 0.04863841991661985, "grad_norm": 0.09201674908399582, "learning_rate": 4.774453941120608e-05, "loss": 0.0238, "step": 6580 }, { "epoch": 0.048712338487921704, "grad_norm": 0.10166573524475098, "learning_rate": 4.7740829772079776e-05, "loss": 0.0241, "step": 6590 }, { "epoch": 0.048786257059223556, "grad_norm": 0.08590281754732132, "learning_rate": 4.7737120132953465e-05, "loss": 0.0232, "step": 6600 }, { "epoch": 0.048860175630525415, "grad_norm": 0.0673174038529396, "learning_rate": 4.773341049382716e-05, "loss": 0.021, "step": 6610 }, { "epoch": 0.04893409420182727, "grad_norm": 0.09542142599821091, "learning_rate": 4.772970085470086e-05, "loss": 0.0212, "step": 6620 }, { "epoch": 0.04900801277312912, "grad_norm": 0.12368637323379517, "learning_rate": 4.772599121557455e-05, "loss": 0.0235, "step": 6630 }, { "epoch": 0.04908193134443097, "grad_norm": 0.10154004395008087, "learning_rate": 4.772228157644825e-05, "loss": 0.0215, "step": 6640 }, { "epoch": 0.04915584991573283, "grad_norm": 0.09259312599897385, "learning_rate": 4.771857193732194e-05, "loss": 0.0206, "step": 6650 }, { "epoch": 0.049229768487034684, "grad_norm": 0.07898467779159546, "learning_rate": 4.7714862298195634e-05, "loss": 0.0188, "step": 6660 }, { "epoch": 0.049303687058336536, "grad_norm": 0.12356526404619217, "learning_rate": 4.771115265906933e-05, "loss": 0.0217, "step": 6670 }, { "epoch": 0.04937760562963839, "grad_norm": 0.14405713975429535, "learning_rate": 4.770744301994302e-05, "loss": 0.0208, "step": 6680 }, { "epoch": 0.04945152420094025, "grad_norm": 0.0964358001947403, "learning_rate": 4.7703733380816715e-05, "loss": 0.0222, "step": 6690 }, { "epoch": 0.0495254427722421, "grad_norm": 0.07914058864116669, "learning_rate": 4.770002374169041e-05, "loss": 0.0222, "step": 6700 }, { "epoch": 0.04959936134354395, "grad_norm": 0.09293976426124573, "learning_rate": 4.769631410256411e-05, "loss": 0.0231, "step": 6710 }, { "epoch": 0.049673279914845804, "grad_norm": 0.09488365799188614, "learning_rate": 4.76926044634378e-05, "loss": 0.0205, "step": 6720 }, { "epoch": 0.049747198486147656, "grad_norm": 0.10206745564937592, "learning_rate": 4.768889482431149e-05, "loss": 0.0231, "step": 6730 }, { "epoch": 0.049821117057449515, "grad_norm": 0.08070364594459534, "learning_rate": 4.768518518518519e-05, "loss": 0.0233, "step": 6740 }, { "epoch": 0.04989503562875137, "grad_norm": 0.10973179340362549, "learning_rate": 4.768147554605888e-05, "loss": 0.0223, "step": 6750 }, { "epoch": 0.04996895420005322, "grad_norm": 0.12484482675790787, "learning_rate": 4.767776590693257e-05, "loss": 0.0233, "step": 6760 }, { "epoch": 0.05004287277135507, "grad_norm": 0.11287377029657364, "learning_rate": 4.767405626780627e-05, "loss": 0.0241, "step": 6770 }, { "epoch": 0.05011679134265693, "grad_norm": 0.12103903293609619, "learning_rate": 4.7670346628679965e-05, "loss": 0.0215, "step": 6780 }, { "epoch": 0.050190709913958784, "grad_norm": 0.1062537357211113, "learning_rate": 4.766663698955366e-05, "loss": 0.0205, "step": 6790 }, { "epoch": 0.050264628485260636, "grad_norm": 0.09399276226758957, "learning_rate": 4.766292735042736e-05, "loss": 0.0242, "step": 6800 }, { "epoch": 0.05033854705656249, "grad_norm": 0.09174839407205582, "learning_rate": 4.7659217711301046e-05, "loss": 0.0209, "step": 6810 }, { "epoch": 0.05041246562786435, "grad_norm": 0.11377735435962677, "learning_rate": 4.765550807217474e-05, "loss": 0.0205, "step": 6820 }, { "epoch": 0.0504863841991662, "grad_norm": 0.11271318048238754, "learning_rate": 4.765179843304843e-05, "loss": 0.0235, "step": 6830 }, { "epoch": 0.05056030277046805, "grad_norm": 0.10871057957410812, "learning_rate": 4.764808879392213e-05, "loss": 0.0214, "step": 6840 }, { "epoch": 0.050634221341769904, "grad_norm": 0.13600623607635498, "learning_rate": 4.764437915479582e-05, "loss": 0.0239, "step": 6850 }, { "epoch": 0.05070813991307176, "grad_norm": 0.11014101654291153, "learning_rate": 4.764066951566952e-05, "loss": 0.0239, "step": 6860 }, { "epoch": 0.050782058484373616, "grad_norm": 0.10503526031970978, "learning_rate": 4.7636959876543215e-05, "loss": 0.0217, "step": 6870 }, { "epoch": 0.05085597705567547, "grad_norm": 0.12979675829410553, "learning_rate": 4.7633250237416904e-05, "loss": 0.0212, "step": 6880 }, { "epoch": 0.05092989562697732, "grad_norm": 0.1068769097328186, "learning_rate": 4.76295405982906e-05, "loss": 0.0209, "step": 6890 }, { "epoch": 0.05100381419827917, "grad_norm": 0.11308766901493073, "learning_rate": 4.7625830959164296e-05, "loss": 0.0216, "step": 6900 }, { "epoch": 0.05107773276958103, "grad_norm": 0.1139088124036789, "learning_rate": 4.7622121320037986e-05, "loss": 0.0228, "step": 6910 }, { "epoch": 0.051151651340882884, "grad_norm": 0.10564363747835159, "learning_rate": 4.761841168091168e-05, "loss": 0.0218, "step": 6920 }, { "epoch": 0.051225569912184736, "grad_norm": 0.11074735224246979, "learning_rate": 4.761470204178538e-05, "loss": 0.023, "step": 6930 }, { "epoch": 0.05129948848348659, "grad_norm": 0.0938468873500824, "learning_rate": 4.7610992402659073e-05, "loss": 0.023, "step": 6940 }, { "epoch": 0.05137340705478845, "grad_norm": 0.08696165680885315, "learning_rate": 4.760728276353277e-05, "loss": 0.0242, "step": 6950 }, { "epoch": 0.0514473256260903, "grad_norm": 0.06806029379367828, "learning_rate": 4.760357312440646e-05, "loss": 0.0238, "step": 6960 }, { "epoch": 0.05152124419739215, "grad_norm": 0.165776327252388, "learning_rate": 4.7599863485280155e-05, "loss": 0.0169, "step": 6970 }, { "epoch": 0.051595162768694004, "grad_norm": 0.09899604320526123, "learning_rate": 4.7596153846153844e-05, "loss": 0.0217, "step": 6980 }, { "epoch": 0.05166908133999586, "grad_norm": 0.09343422204256058, "learning_rate": 4.759244420702754e-05, "loss": 0.0226, "step": 6990 }, { "epoch": 0.051742999911297716, "grad_norm": 0.09994067251682281, "learning_rate": 4.7588734567901236e-05, "loss": 0.0236, "step": 7000 }, { "epoch": 0.05181691848259957, "grad_norm": 0.08840420097112656, "learning_rate": 4.758502492877493e-05, "loss": 0.0205, "step": 7010 }, { "epoch": 0.05189083705390142, "grad_norm": 0.11010074615478516, "learning_rate": 4.758131528964863e-05, "loss": 0.0248, "step": 7020 }, { "epoch": 0.05196475562520328, "grad_norm": 0.1090909019112587, "learning_rate": 4.7577605650522324e-05, "loss": 0.0219, "step": 7030 }, { "epoch": 0.05203867419650513, "grad_norm": 0.09599191695451736, "learning_rate": 4.757389601139601e-05, "loss": 0.0203, "step": 7040 }, { "epoch": 0.052112592767806984, "grad_norm": 0.10519011318683624, "learning_rate": 4.757018637226971e-05, "loss": 0.0225, "step": 7050 }, { "epoch": 0.052186511339108836, "grad_norm": 0.1019931212067604, "learning_rate": 4.75664767331434e-05, "loss": 0.0219, "step": 7060 }, { "epoch": 0.05226042991041069, "grad_norm": 0.07580450177192688, "learning_rate": 4.7562767094017094e-05, "loss": 0.0244, "step": 7070 }, { "epoch": 0.05233434848171255, "grad_norm": 0.10171625763177872, "learning_rate": 4.755905745489079e-05, "loss": 0.0236, "step": 7080 }, { "epoch": 0.0524082670530144, "grad_norm": 0.10591152310371399, "learning_rate": 4.7555347815764486e-05, "loss": 0.0229, "step": 7090 }, { "epoch": 0.05248218562431625, "grad_norm": 0.07710936665534973, "learning_rate": 4.755163817663818e-05, "loss": 0.0209, "step": 7100 }, { "epoch": 0.052556104195618104, "grad_norm": 0.08563518524169922, "learning_rate": 4.754792853751187e-05, "loss": 0.0212, "step": 7110 }, { "epoch": 0.052630022766919964, "grad_norm": 0.09113353490829468, "learning_rate": 4.754421889838557e-05, "loss": 0.0207, "step": 7120 }, { "epoch": 0.052703941338221816, "grad_norm": 0.12325076758861542, "learning_rate": 4.754050925925926e-05, "loss": 0.0201, "step": 7130 }, { "epoch": 0.05277785990952367, "grad_norm": 0.11564420163631439, "learning_rate": 4.753679962013295e-05, "loss": 0.0195, "step": 7140 }, { "epoch": 0.05285177848082552, "grad_norm": 0.11357983201742172, "learning_rate": 4.753308998100665e-05, "loss": 0.0201, "step": 7150 }, { "epoch": 0.05292569705212738, "grad_norm": 0.10943233221769333, "learning_rate": 4.7529380341880344e-05, "loss": 0.0238, "step": 7160 }, { "epoch": 0.05299961562342923, "grad_norm": 0.12073173373937607, "learning_rate": 4.752567070275404e-05, "loss": 0.0259, "step": 7170 }, { "epoch": 0.053073534194731084, "grad_norm": 0.08202937990427017, "learning_rate": 4.7521961063627736e-05, "loss": 0.0198, "step": 7180 }, { "epoch": 0.053147452766032936, "grad_norm": 0.10718532651662827, "learning_rate": 4.7518251424501425e-05, "loss": 0.0206, "step": 7190 }, { "epoch": 0.05322137133733479, "grad_norm": 0.10505930334329605, "learning_rate": 4.751454178537512e-05, "loss": 0.0238, "step": 7200 }, { "epoch": 0.05329528990863665, "grad_norm": 0.11115860939025879, "learning_rate": 4.751083214624881e-05, "loss": 0.0204, "step": 7210 }, { "epoch": 0.0533692084799385, "grad_norm": 0.10364003479480743, "learning_rate": 4.7507122507122506e-05, "loss": 0.0226, "step": 7220 }, { "epoch": 0.05344312705124035, "grad_norm": 0.10836455971002579, "learning_rate": 4.75034128679962e-05, "loss": 0.0228, "step": 7230 }, { "epoch": 0.053517045622542204, "grad_norm": 0.11580884456634521, "learning_rate": 4.74997032288699e-05, "loss": 0.0223, "step": 7240 }, { "epoch": 0.053590964193844064, "grad_norm": 0.10361438989639282, "learning_rate": 4.7495993589743594e-05, "loss": 0.0216, "step": 7250 }, { "epoch": 0.053664882765145916, "grad_norm": 0.10192573815584183, "learning_rate": 4.749228395061729e-05, "loss": 0.0246, "step": 7260 }, { "epoch": 0.05373880133644777, "grad_norm": 0.11494297534227371, "learning_rate": 4.748857431149098e-05, "loss": 0.0222, "step": 7270 }, { "epoch": 0.05381271990774962, "grad_norm": 0.1062757596373558, "learning_rate": 4.7484864672364675e-05, "loss": 0.018, "step": 7280 }, { "epoch": 0.05388663847905148, "grad_norm": 0.1719273179769516, "learning_rate": 4.7481155033238365e-05, "loss": 0.0239, "step": 7290 }, { "epoch": 0.05396055705035333, "grad_norm": 0.0962737575173378, "learning_rate": 4.747744539411206e-05, "loss": 0.0218, "step": 7300 }, { "epoch": 0.054034475621655184, "grad_norm": 0.12584984302520752, "learning_rate": 4.7473735754985757e-05, "loss": 0.0238, "step": 7310 }, { "epoch": 0.054108394192957036, "grad_norm": 0.11260943859815598, "learning_rate": 4.747002611585945e-05, "loss": 0.0199, "step": 7320 }, { "epoch": 0.054182312764258896, "grad_norm": 0.08724574744701385, "learning_rate": 4.746631647673315e-05, "loss": 0.0227, "step": 7330 }, { "epoch": 0.05425623133556075, "grad_norm": 0.09556634724140167, "learning_rate": 4.746260683760684e-05, "loss": 0.0207, "step": 7340 }, { "epoch": 0.0543301499068626, "grad_norm": 0.09617023169994354, "learning_rate": 4.7458897198480534e-05, "loss": 0.02, "step": 7350 }, { "epoch": 0.05440406847816445, "grad_norm": 0.11631660163402557, "learning_rate": 4.745518755935423e-05, "loss": 0.0209, "step": 7360 }, { "epoch": 0.054477987049466305, "grad_norm": 0.08489320427179337, "learning_rate": 4.745147792022792e-05, "loss": 0.0201, "step": 7370 }, { "epoch": 0.054551905620768164, "grad_norm": 0.09915885329246521, "learning_rate": 4.7447768281101615e-05, "loss": 0.0211, "step": 7380 }, { "epoch": 0.054625824192070016, "grad_norm": 0.09194263815879822, "learning_rate": 4.744405864197531e-05, "loss": 0.0213, "step": 7390 }, { "epoch": 0.05469974276337187, "grad_norm": 0.09207558631896973, "learning_rate": 4.744034900284901e-05, "loss": 0.0204, "step": 7400 }, { "epoch": 0.05477366133467372, "grad_norm": 0.09770120680332184, "learning_rate": 4.74366393637227e-05, "loss": 0.0233, "step": 7410 }, { "epoch": 0.05484757990597558, "grad_norm": 0.09625663608312607, "learning_rate": 4.743292972459639e-05, "loss": 0.0217, "step": 7420 }, { "epoch": 0.05492149847727743, "grad_norm": 0.07443071156740189, "learning_rate": 4.742922008547009e-05, "loss": 0.0234, "step": 7430 }, { "epoch": 0.054995417048579284, "grad_norm": 0.10070905834436417, "learning_rate": 4.742551044634378e-05, "loss": 0.0209, "step": 7440 }, { "epoch": 0.055069335619881137, "grad_norm": 0.12188205868005753, "learning_rate": 4.742180080721747e-05, "loss": 0.0233, "step": 7450 }, { "epoch": 0.055143254191182996, "grad_norm": 0.09652313590049744, "learning_rate": 4.741809116809117e-05, "loss": 0.0199, "step": 7460 }, { "epoch": 0.05521717276248485, "grad_norm": 0.10234474390745163, "learning_rate": 4.7414381528964865e-05, "loss": 0.0221, "step": 7470 }, { "epoch": 0.0552910913337867, "grad_norm": 0.10418083518743515, "learning_rate": 4.741067188983856e-05, "loss": 0.0204, "step": 7480 }, { "epoch": 0.05536500990508855, "grad_norm": 0.0978635922074318, "learning_rate": 4.740696225071226e-05, "loss": 0.0199, "step": 7490 }, { "epoch": 0.05543892847639041, "grad_norm": 0.11908965557813644, "learning_rate": 4.7403252611585946e-05, "loss": 0.0239, "step": 7500 }, { "epoch": 0.055512847047692264, "grad_norm": 0.09557194262742996, "learning_rate": 4.739954297245964e-05, "loss": 0.0216, "step": 7510 }, { "epoch": 0.055586765618994116, "grad_norm": 0.08496109396219254, "learning_rate": 4.739583333333333e-05, "loss": 0.0217, "step": 7520 }, { "epoch": 0.05566068419029597, "grad_norm": 0.08821641653776169, "learning_rate": 4.739212369420703e-05, "loss": 0.0193, "step": 7530 }, { "epoch": 0.05573460276159782, "grad_norm": 0.11779448390007019, "learning_rate": 4.738841405508073e-05, "loss": 0.0218, "step": 7540 }, { "epoch": 0.05580852133289968, "grad_norm": 0.12221374362707138, "learning_rate": 4.738470441595442e-05, "loss": 0.0238, "step": 7550 }, { "epoch": 0.05588243990420153, "grad_norm": 0.15340213477611542, "learning_rate": 4.7380994776828115e-05, "loss": 0.0249, "step": 7560 }, { "epoch": 0.055956358475503384, "grad_norm": 0.15344905853271484, "learning_rate": 4.7377285137701804e-05, "loss": 0.0253, "step": 7570 }, { "epoch": 0.05603027704680524, "grad_norm": 0.10215380042791367, "learning_rate": 4.73735754985755e-05, "loss": 0.0195, "step": 7580 }, { "epoch": 0.056104195618107096, "grad_norm": 0.07139965891838074, "learning_rate": 4.7369865859449196e-05, "loss": 0.018, "step": 7590 }, { "epoch": 0.05617811418940895, "grad_norm": 0.121430903673172, "learning_rate": 4.7366156220322885e-05, "loss": 0.0224, "step": 7600 }, { "epoch": 0.0562520327607108, "grad_norm": 0.11266914755105972, "learning_rate": 4.736244658119658e-05, "loss": 0.0233, "step": 7610 }, { "epoch": 0.05632595133201265, "grad_norm": 0.09315747767686844, "learning_rate": 4.735873694207028e-05, "loss": 0.0214, "step": 7620 }, { "epoch": 0.05639986990331451, "grad_norm": 0.07198236882686615, "learning_rate": 4.735502730294397e-05, "loss": 0.0216, "step": 7630 }, { "epoch": 0.056473788474616364, "grad_norm": 0.12135116010904312, "learning_rate": 4.735131766381767e-05, "loss": 0.0209, "step": 7640 }, { "epoch": 0.056547707045918216, "grad_norm": 0.08780569583177567, "learning_rate": 4.734760802469136e-05, "loss": 0.0218, "step": 7650 }, { "epoch": 0.05662162561722007, "grad_norm": 0.10140490531921387, "learning_rate": 4.7343898385565054e-05, "loss": 0.0235, "step": 7660 }, { "epoch": 0.05669554418852192, "grad_norm": 0.08562912791967392, "learning_rate": 4.7340188746438744e-05, "loss": 0.0181, "step": 7670 }, { "epoch": 0.05676946275982378, "grad_norm": 0.13408763706684113, "learning_rate": 4.733647910731244e-05, "loss": 0.0256, "step": 7680 }, { "epoch": 0.05684338133112563, "grad_norm": 0.11752607673406601, "learning_rate": 4.733276946818614e-05, "loss": 0.022, "step": 7690 }, { "epoch": 0.056917299902427484, "grad_norm": 0.08813085407018661, "learning_rate": 4.732905982905983e-05, "loss": 0.0202, "step": 7700 }, { "epoch": 0.05699121847372934, "grad_norm": 0.09921573847532272, "learning_rate": 4.732535018993353e-05, "loss": 0.0201, "step": 7710 }, { "epoch": 0.057065137045031196, "grad_norm": 0.09355275332927704, "learning_rate": 4.7321640550807223e-05, "loss": 0.0194, "step": 7720 }, { "epoch": 0.05713905561633305, "grad_norm": 0.08472117781639099, "learning_rate": 4.731793091168091e-05, "loss": 0.0228, "step": 7730 }, { "epoch": 0.0572129741876349, "grad_norm": 0.11851277202367783, "learning_rate": 4.731422127255461e-05, "loss": 0.0235, "step": 7740 }, { "epoch": 0.05728689275893675, "grad_norm": 0.0786430612206459, "learning_rate": 4.73105116334283e-05, "loss": 0.0197, "step": 7750 }, { "epoch": 0.05736081133023861, "grad_norm": 0.12320347875356674, "learning_rate": 4.7306801994301994e-05, "loss": 0.0283, "step": 7760 }, { "epoch": 0.057434729901540464, "grad_norm": 0.0892769992351532, "learning_rate": 4.7303092355175696e-05, "loss": 0.0203, "step": 7770 }, { "epoch": 0.057508648472842316, "grad_norm": 0.09888837486505508, "learning_rate": 4.7299382716049386e-05, "loss": 0.0221, "step": 7780 }, { "epoch": 0.05758256704414417, "grad_norm": 0.11733058840036392, "learning_rate": 4.729567307692308e-05, "loss": 0.0223, "step": 7790 }, { "epoch": 0.05765648561544603, "grad_norm": 0.16852419078350067, "learning_rate": 4.729196343779677e-05, "loss": 0.0223, "step": 7800 }, { "epoch": 0.05773040418674788, "grad_norm": 0.11733348667621613, "learning_rate": 4.728825379867047e-05, "loss": 0.0226, "step": 7810 }, { "epoch": 0.05780432275804973, "grad_norm": 0.13076969981193542, "learning_rate": 4.728454415954416e-05, "loss": 0.0223, "step": 7820 }, { "epoch": 0.057878241329351585, "grad_norm": 0.07921652495861053, "learning_rate": 4.728083452041785e-05, "loss": 0.0219, "step": 7830 }, { "epoch": 0.05795215990065344, "grad_norm": 0.10504285991191864, "learning_rate": 4.727712488129155e-05, "loss": 0.0224, "step": 7840 }, { "epoch": 0.058026078471955296, "grad_norm": 0.11202266067266464, "learning_rate": 4.7273415242165244e-05, "loss": 0.0245, "step": 7850 }, { "epoch": 0.05809999704325715, "grad_norm": 0.12498188763856888, "learning_rate": 4.726970560303894e-05, "loss": 0.0231, "step": 7860 }, { "epoch": 0.058173915614559, "grad_norm": 0.0961272194981575, "learning_rate": 4.7265995963912636e-05, "loss": 0.0238, "step": 7870 }, { "epoch": 0.05824783418586085, "grad_norm": 0.09642474353313446, "learning_rate": 4.7262286324786325e-05, "loss": 0.0208, "step": 7880 }, { "epoch": 0.05832175275716271, "grad_norm": 0.07648549973964691, "learning_rate": 4.725857668566002e-05, "loss": 0.0213, "step": 7890 }, { "epoch": 0.058395671328464564, "grad_norm": 0.12183746695518494, "learning_rate": 4.725486704653371e-05, "loss": 0.0225, "step": 7900 }, { "epoch": 0.058469589899766417, "grad_norm": 0.10755477100610733, "learning_rate": 4.7251157407407406e-05, "loss": 0.0191, "step": 7910 }, { "epoch": 0.05854350847106827, "grad_norm": 0.12599851191043854, "learning_rate": 4.724744776828111e-05, "loss": 0.0236, "step": 7920 }, { "epoch": 0.05861742704237013, "grad_norm": 0.11307878792285919, "learning_rate": 4.72437381291548e-05, "loss": 0.0208, "step": 7930 }, { "epoch": 0.05869134561367198, "grad_norm": 0.107178695499897, "learning_rate": 4.7240028490028494e-05, "loss": 0.0211, "step": 7940 }, { "epoch": 0.05876526418497383, "grad_norm": 0.16315461695194244, "learning_rate": 4.723631885090219e-05, "loss": 0.0217, "step": 7950 }, { "epoch": 0.058839182756275685, "grad_norm": 0.09971021860837936, "learning_rate": 4.723260921177588e-05, "loss": 0.0187, "step": 7960 }, { "epoch": 0.058913101327577544, "grad_norm": 0.1025579646229744, "learning_rate": 4.7228899572649575e-05, "loss": 0.0224, "step": 7970 }, { "epoch": 0.058987019898879396, "grad_norm": 0.08908526599407196, "learning_rate": 4.7225189933523264e-05, "loss": 0.0235, "step": 7980 }, { "epoch": 0.05906093847018125, "grad_norm": 0.18570634722709656, "learning_rate": 4.722148029439696e-05, "loss": 0.0232, "step": 7990 }, { "epoch": 0.0591348570414831, "grad_norm": 0.08980167657136917, "learning_rate": 4.721777065527066e-05, "loss": 0.0235, "step": 8000 }, { "epoch": 0.05920877561278495, "grad_norm": 0.1067327931523323, "learning_rate": 4.721406101614435e-05, "loss": 0.024, "step": 8010 }, { "epoch": 0.05928269418408681, "grad_norm": 0.10643284767866135, "learning_rate": 4.721035137701805e-05, "loss": 0.0235, "step": 8020 }, { "epoch": 0.059356612755388664, "grad_norm": 0.11560133099555969, "learning_rate": 4.720664173789174e-05, "loss": 0.0243, "step": 8030 }, { "epoch": 0.05943053132669052, "grad_norm": 0.1009771004319191, "learning_rate": 4.720293209876543e-05, "loss": 0.0243, "step": 8040 }, { "epoch": 0.05950444989799237, "grad_norm": 0.1109670102596283, "learning_rate": 4.719922245963913e-05, "loss": 0.0213, "step": 8050 }, { "epoch": 0.05957836846929423, "grad_norm": 0.09228888154029846, "learning_rate": 4.719551282051282e-05, "loss": 0.0211, "step": 8060 }, { "epoch": 0.05965228704059608, "grad_norm": 0.10006465017795563, "learning_rate": 4.719180318138652e-05, "loss": 0.0209, "step": 8070 }, { "epoch": 0.05972620561189793, "grad_norm": 0.10764101892709732, "learning_rate": 4.718809354226021e-05, "loss": 0.0213, "step": 8080 }, { "epoch": 0.059800124183199785, "grad_norm": 0.08787568658590317, "learning_rate": 4.7184383903133906e-05, "loss": 0.0199, "step": 8090 }, { "epoch": 0.059874042754501644, "grad_norm": 0.1373230218887329, "learning_rate": 4.71806742640076e-05, "loss": 0.0252, "step": 8100 }, { "epoch": 0.059947961325803496, "grad_norm": 0.08644258230924606, "learning_rate": 4.717696462488129e-05, "loss": 0.0198, "step": 8110 }, { "epoch": 0.06002187989710535, "grad_norm": 0.08341766148805618, "learning_rate": 4.717325498575499e-05, "loss": 0.0241, "step": 8120 }, { "epoch": 0.0600957984684072, "grad_norm": 0.09608685970306396, "learning_rate": 4.716954534662868e-05, "loss": 0.0215, "step": 8130 }, { "epoch": 0.06016971703970905, "grad_norm": 0.1065250039100647, "learning_rate": 4.716583570750237e-05, "loss": 0.0225, "step": 8140 }, { "epoch": 0.06024363561101091, "grad_norm": 0.12095583975315094, "learning_rate": 4.7162126068376075e-05, "loss": 0.0212, "step": 8150 }, { "epoch": 0.060317554182312764, "grad_norm": 0.09507690370082855, "learning_rate": 4.7158416429249765e-05, "loss": 0.0219, "step": 8160 }, { "epoch": 0.06039147275361462, "grad_norm": 0.09556754678487778, "learning_rate": 4.715470679012346e-05, "loss": 0.023, "step": 8170 }, { "epoch": 0.06046539132491647, "grad_norm": 0.08235856890678406, "learning_rate": 4.7150997150997157e-05, "loss": 0.0218, "step": 8180 }, { "epoch": 0.06053930989621833, "grad_norm": 0.1018480509519577, "learning_rate": 4.7147287511870846e-05, "loss": 0.022, "step": 8190 }, { "epoch": 0.06061322846752018, "grad_norm": 0.08940430730581284, "learning_rate": 4.714357787274454e-05, "loss": 0.0218, "step": 8200 }, { "epoch": 0.06068714703882203, "grad_norm": 0.11216285824775696, "learning_rate": 4.713986823361823e-05, "loss": 0.025, "step": 8210 }, { "epoch": 0.060761065610123885, "grad_norm": 0.09381680935621262, "learning_rate": 4.7136158594491934e-05, "loss": 0.0229, "step": 8220 }, { "epoch": 0.060834984181425744, "grad_norm": 0.1107122078537941, "learning_rate": 4.713244895536563e-05, "loss": 0.02, "step": 8230 }, { "epoch": 0.060908902752727596, "grad_norm": 0.10714753717184067, "learning_rate": 4.712873931623932e-05, "loss": 0.0224, "step": 8240 }, { "epoch": 0.06098282132402945, "grad_norm": 0.09350134432315826, "learning_rate": 4.7125029677113015e-05, "loss": 0.0223, "step": 8250 }, { "epoch": 0.0610567398953313, "grad_norm": 0.10498078912496567, "learning_rate": 4.7121320037986704e-05, "loss": 0.0218, "step": 8260 }, { "epoch": 0.06113065846663316, "grad_norm": 0.09392526000738144, "learning_rate": 4.71176103988604e-05, "loss": 0.022, "step": 8270 }, { "epoch": 0.06120457703793501, "grad_norm": 0.08764494955539703, "learning_rate": 4.7113900759734096e-05, "loss": 0.0202, "step": 8280 }, { "epoch": 0.061278495609236865, "grad_norm": 0.14300397038459778, "learning_rate": 4.7110191120607785e-05, "loss": 0.0213, "step": 8290 }, { "epoch": 0.06135241418053872, "grad_norm": 0.09841963648796082, "learning_rate": 4.710648148148149e-05, "loss": 0.0219, "step": 8300 }, { "epoch": 0.06142633275184057, "grad_norm": 0.10955634713172913, "learning_rate": 4.710277184235518e-05, "loss": 0.0194, "step": 8310 }, { "epoch": 0.06150025132314243, "grad_norm": 0.0871150940656662, "learning_rate": 4.709906220322887e-05, "loss": 0.0239, "step": 8320 }, { "epoch": 0.06157416989444428, "grad_norm": 0.08169959485530853, "learning_rate": 4.709535256410257e-05, "loss": 0.0213, "step": 8330 }, { "epoch": 0.06164808846574613, "grad_norm": 0.08927330374717712, "learning_rate": 4.709164292497626e-05, "loss": 0.0194, "step": 8340 }, { "epoch": 0.061722007037047985, "grad_norm": 0.09410972148180008, "learning_rate": 4.7087933285849954e-05, "loss": 0.0223, "step": 8350 }, { "epoch": 0.061795925608349844, "grad_norm": 0.09375404566526413, "learning_rate": 4.708422364672364e-05, "loss": 0.0211, "step": 8360 }, { "epoch": 0.061869844179651697, "grad_norm": 0.08001571893692017, "learning_rate": 4.7080514007597346e-05, "loss": 0.0206, "step": 8370 }, { "epoch": 0.06194376275095355, "grad_norm": 0.10927123576402664, "learning_rate": 4.707680436847104e-05, "loss": 0.024, "step": 8380 }, { "epoch": 0.0620176813222554, "grad_norm": 0.13424262404441833, "learning_rate": 4.707309472934473e-05, "loss": 0.0227, "step": 8390 }, { "epoch": 0.06209159989355726, "grad_norm": 0.13544967770576477, "learning_rate": 4.706938509021843e-05, "loss": 0.0217, "step": 8400 }, { "epoch": 0.06216551846485911, "grad_norm": 0.09935387969017029, "learning_rate": 4.706567545109212e-05, "loss": 0.0231, "step": 8410 }, { "epoch": 0.062239437036160965, "grad_norm": 0.08632265031337738, "learning_rate": 4.706196581196581e-05, "loss": 0.0239, "step": 8420 }, { "epoch": 0.06231335560746282, "grad_norm": 0.09724076092243195, "learning_rate": 4.705825617283951e-05, "loss": 0.0197, "step": 8430 }, { "epoch": 0.062387274178764676, "grad_norm": 0.11309103667736053, "learning_rate": 4.70545465337132e-05, "loss": 0.0227, "step": 8440 }, { "epoch": 0.06246119275006653, "grad_norm": 0.12005878984928131, "learning_rate": 4.70508368945869e-05, "loss": 0.0228, "step": 8450 }, { "epoch": 0.06253511132136838, "grad_norm": 0.09755506366491318, "learning_rate": 4.7047127255460596e-05, "loss": 0.0218, "step": 8460 }, { "epoch": 0.06260902989267024, "grad_norm": 0.10736546665430069, "learning_rate": 4.7043417616334285e-05, "loss": 0.0236, "step": 8470 }, { "epoch": 0.06268294846397209, "grad_norm": 0.13473279774188995, "learning_rate": 4.703970797720798e-05, "loss": 0.0206, "step": 8480 }, { "epoch": 0.06275686703527394, "grad_norm": 0.08754564076662064, "learning_rate": 4.703599833808167e-05, "loss": 0.0215, "step": 8490 }, { "epoch": 0.06283078560657579, "grad_norm": 0.11559465527534485, "learning_rate": 4.7032288698955367e-05, "loss": 0.023, "step": 8500 }, { "epoch": 0.06290470417787765, "grad_norm": 0.11303414404392242, "learning_rate": 4.702857905982906e-05, "loss": 0.0204, "step": 8510 }, { "epoch": 0.06297862274917951, "grad_norm": 0.1281813234090805, "learning_rate": 4.702486942070276e-05, "loss": 0.0227, "step": 8520 }, { "epoch": 0.06305254132048135, "grad_norm": 0.1113453060388565, "learning_rate": 4.7021159781576454e-05, "loss": 0.0195, "step": 8530 }, { "epoch": 0.06312645989178321, "grad_norm": 0.09026116132736206, "learning_rate": 4.7017450142450144e-05, "loss": 0.023, "step": 8540 }, { "epoch": 0.06320037846308507, "grad_norm": 0.10665830224752426, "learning_rate": 4.701374050332384e-05, "loss": 0.0237, "step": 8550 }, { "epoch": 0.06327429703438692, "grad_norm": 0.08242546766996384, "learning_rate": 4.7010030864197536e-05, "loss": 0.0274, "step": 8560 }, { "epoch": 0.06334821560568878, "grad_norm": 0.09004712104797363, "learning_rate": 4.7006321225071225e-05, "loss": 0.0198, "step": 8570 }, { "epoch": 0.06342213417699062, "grad_norm": 0.11416617780923843, "learning_rate": 4.700261158594492e-05, "loss": 0.0252, "step": 8580 }, { "epoch": 0.06349605274829248, "grad_norm": 0.1045583114027977, "learning_rate": 4.699890194681861e-05, "loss": 0.0228, "step": 8590 }, { "epoch": 0.06356997131959434, "grad_norm": 0.09816955775022507, "learning_rate": 4.699519230769231e-05, "loss": 0.0198, "step": 8600 }, { "epoch": 0.06364388989089619, "grad_norm": 0.10833249986171722, "learning_rate": 4.699148266856601e-05, "loss": 0.021, "step": 8610 }, { "epoch": 0.06371780846219804, "grad_norm": 0.11117073148488998, "learning_rate": 4.69877730294397e-05, "loss": 0.023, "step": 8620 }, { "epoch": 0.06379172703349989, "grad_norm": 0.10439736396074295, "learning_rate": 4.6984063390313394e-05, "loss": 0.0235, "step": 8630 }, { "epoch": 0.06386564560480175, "grad_norm": 0.1064261794090271, "learning_rate": 4.698035375118709e-05, "loss": 0.0235, "step": 8640 }, { "epoch": 0.06393956417610361, "grad_norm": 0.08481346070766449, "learning_rate": 4.697664411206078e-05, "loss": 0.0199, "step": 8650 }, { "epoch": 0.06401348274740545, "grad_norm": 0.07966621220111847, "learning_rate": 4.6972934472934475e-05, "loss": 0.0214, "step": 8660 }, { "epoch": 0.06408740131870731, "grad_norm": 0.09658713638782501, "learning_rate": 4.696922483380817e-05, "loss": 0.0205, "step": 8670 }, { "epoch": 0.06416131989000917, "grad_norm": 0.09488363564014435, "learning_rate": 4.696551519468187e-05, "loss": 0.024, "step": 8680 }, { "epoch": 0.06423523846131102, "grad_norm": 0.10669881850481033, "learning_rate": 4.696180555555556e-05, "loss": 0.0207, "step": 8690 }, { "epoch": 0.06430915703261288, "grad_norm": 0.12813617289066315, "learning_rate": 4.695809591642925e-05, "loss": 0.0218, "step": 8700 }, { "epoch": 0.06438307560391472, "grad_norm": 0.12795375287532806, "learning_rate": 4.695438627730295e-05, "loss": 0.0223, "step": 8710 }, { "epoch": 0.06445699417521658, "grad_norm": 0.08577212691307068, "learning_rate": 4.695067663817664e-05, "loss": 0.0201, "step": 8720 }, { "epoch": 0.06453091274651844, "grad_norm": 0.14490552246570587, "learning_rate": 4.694696699905033e-05, "loss": 0.0211, "step": 8730 }, { "epoch": 0.06460483131782029, "grad_norm": 0.07944640517234802, "learning_rate": 4.694325735992403e-05, "loss": 0.0211, "step": 8740 }, { "epoch": 0.06467874988912214, "grad_norm": 0.08489855378866196, "learning_rate": 4.6939547720797725e-05, "loss": 0.0215, "step": 8750 }, { "epoch": 0.064752668460424, "grad_norm": 0.11695750057697296, "learning_rate": 4.693583808167142e-05, "loss": 0.0238, "step": 8760 }, { "epoch": 0.06482658703172585, "grad_norm": 0.09435489028692245, "learning_rate": 4.693212844254511e-05, "loss": 0.0193, "step": 8770 }, { "epoch": 0.06490050560302771, "grad_norm": 0.0975150316953659, "learning_rate": 4.6928418803418806e-05, "loss": 0.0213, "step": 8780 }, { "epoch": 0.06497442417432955, "grad_norm": 0.07952667772769928, "learning_rate": 4.69247091642925e-05, "loss": 0.0216, "step": 8790 }, { "epoch": 0.06504834274563141, "grad_norm": 0.09845630079507828, "learning_rate": 4.692099952516619e-05, "loss": 0.022, "step": 8800 }, { "epoch": 0.06512226131693327, "grad_norm": 0.09602982550859451, "learning_rate": 4.691728988603989e-05, "loss": 0.0198, "step": 8810 }, { "epoch": 0.06519617988823512, "grad_norm": 0.09934545308351517, "learning_rate": 4.691358024691358e-05, "loss": 0.0205, "step": 8820 }, { "epoch": 0.06527009845953698, "grad_norm": 0.10673316568136215, "learning_rate": 4.690987060778728e-05, "loss": 0.0227, "step": 8830 }, { "epoch": 0.06534401703083882, "grad_norm": 0.0985034927725792, "learning_rate": 4.6906160968660975e-05, "loss": 0.0226, "step": 8840 }, { "epoch": 0.06541793560214068, "grad_norm": 0.09951265901327133, "learning_rate": 4.6902451329534664e-05, "loss": 0.0235, "step": 8850 }, { "epoch": 0.06549185417344254, "grad_norm": 0.11149190366268158, "learning_rate": 4.689874169040836e-05, "loss": 0.0248, "step": 8860 }, { "epoch": 0.06556577274474439, "grad_norm": 0.11519861966371536, "learning_rate": 4.6895032051282056e-05, "loss": 0.0228, "step": 8870 }, { "epoch": 0.06563969131604624, "grad_norm": 0.12151552736759186, "learning_rate": 4.6891322412155745e-05, "loss": 0.0183, "step": 8880 }, { "epoch": 0.0657136098873481, "grad_norm": 0.09308876097202301, "learning_rate": 4.688761277302944e-05, "loss": 0.0225, "step": 8890 }, { "epoch": 0.06578752845864995, "grad_norm": 0.0984383374452591, "learning_rate": 4.688390313390314e-05, "loss": 0.02, "step": 8900 }, { "epoch": 0.06586144702995181, "grad_norm": 0.10223396122455597, "learning_rate": 4.6880193494776833e-05, "loss": 0.0201, "step": 8910 }, { "epoch": 0.06593536560125365, "grad_norm": 0.12238939851522446, "learning_rate": 4.687648385565053e-05, "loss": 0.0211, "step": 8920 }, { "epoch": 0.06600928417255551, "grad_norm": 0.0670313909649849, "learning_rate": 4.687277421652422e-05, "loss": 0.0225, "step": 8930 }, { "epoch": 0.06608320274385737, "grad_norm": 0.09160995483398438, "learning_rate": 4.6869064577397915e-05, "loss": 0.0235, "step": 8940 }, { "epoch": 0.06615712131515922, "grad_norm": 0.09256432950496674, "learning_rate": 4.6865354938271604e-05, "loss": 0.0236, "step": 8950 }, { "epoch": 0.06623103988646108, "grad_norm": 0.1058010682463646, "learning_rate": 4.68616452991453e-05, "loss": 0.0224, "step": 8960 }, { "epoch": 0.06630495845776292, "grad_norm": 0.12149609625339508, "learning_rate": 4.6857935660018996e-05, "loss": 0.0223, "step": 8970 }, { "epoch": 0.06637887702906478, "grad_norm": 0.09200827777385712, "learning_rate": 4.685422602089269e-05, "loss": 0.0238, "step": 8980 }, { "epoch": 0.06645279560036664, "grad_norm": 0.0811193585395813, "learning_rate": 4.685051638176639e-05, "loss": 0.0211, "step": 8990 }, { "epoch": 0.06652671417166849, "grad_norm": 0.10229609906673431, "learning_rate": 4.684680674264008e-05, "loss": 0.0235, "step": 9000 }, { "epoch": 0.06660063274297034, "grad_norm": 0.08031179010868073, "learning_rate": 4.684309710351377e-05, "loss": 0.0217, "step": 9010 }, { "epoch": 0.0666745513142722, "grad_norm": 0.08803895115852356, "learning_rate": 4.683938746438747e-05, "loss": 0.0213, "step": 9020 }, { "epoch": 0.06674846988557405, "grad_norm": 0.07700366526842117, "learning_rate": 4.683567782526116e-05, "loss": 0.0209, "step": 9030 }, { "epoch": 0.06682238845687591, "grad_norm": 0.11948195844888687, "learning_rate": 4.6831968186134854e-05, "loss": 0.0234, "step": 9040 }, { "epoch": 0.06689630702817775, "grad_norm": 0.10591305792331696, "learning_rate": 4.682825854700855e-05, "loss": 0.022, "step": 9050 }, { "epoch": 0.06697022559947961, "grad_norm": 0.09493808448314667, "learning_rate": 4.6824548907882246e-05, "loss": 0.0215, "step": 9060 }, { "epoch": 0.06704414417078147, "grad_norm": 0.09348826110363007, "learning_rate": 4.682083926875594e-05, "loss": 0.0242, "step": 9070 }, { "epoch": 0.06711806274208332, "grad_norm": 0.10891830921173096, "learning_rate": 4.681712962962963e-05, "loss": 0.0202, "step": 9080 }, { "epoch": 0.06719198131338518, "grad_norm": 0.08920338749885559, "learning_rate": 4.681341999050333e-05, "loss": 0.0207, "step": 9090 }, { "epoch": 0.06726589988468702, "grad_norm": 0.08524997532367706, "learning_rate": 4.680971035137702e-05, "loss": 0.0213, "step": 9100 }, { "epoch": 0.06733981845598888, "grad_norm": 0.10065700858831406, "learning_rate": 4.680600071225071e-05, "loss": 0.0206, "step": 9110 }, { "epoch": 0.06741373702729074, "grad_norm": 0.09767220169305801, "learning_rate": 4.680229107312441e-05, "loss": 0.0212, "step": 9120 }, { "epoch": 0.06748765559859259, "grad_norm": 0.08452901989221573, "learning_rate": 4.6798581433998104e-05, "loss": 0.0207, "step": 9130 }, { "epoch": 0.06756157416989444, "grad_norm": 0.12105948477983475, "learning_rate": 4.67948717948718e-05, "loss": 0.0194, "step": 9140 }, { "epoch": 0.0676354927411963, "grad_norm": 0.08149902522563934, "learning_rate": 4.6791162155745496e-05, "loss": 0.0207, "step": 9150 }, { "epoch": 0.06770941131249815, "grad_norm": 0.11312330514192581, "learning_rate": 4.6787452516619185e-05, "loss": 0.0229, "step": 9160 }, { "epoch": 0.06778332988380001, "grad_norm": 0.0945291742682457, "learning_rate": 4.678374287749288e-05, "loss": 0.0209, "step": 9170 }, { "epoch": 0.06785724845510185, "grad_norm": 0.10265430808067322, "learning_rate": 4.678003323836657e-05, "loss": 0.0203, "step": 9180 }, { "epoch": 0.06793116702640371, "grad_norm": 0.11135527491569519, "learning_rate": 4.6776323599240266e-05, "loss": 0.0218, "step": 9190 }, { "epoch": 0.06800508559770557, "grad_norm": 0.14490050077438354, "learning_rate": 4.677261396011396e-05, "loss": 0.0238, "step": 9200 }, { "epoch": 0.06807900416900742, "grad_norm": 0.08578338474035263, "learning_rate": 4.676890432098766e-05, "loss": 0.023, "step": 9210 }, { "epoch": 0.06815292274030928, "grad_norm": 0.10262192040681839, "learning_rate": 4.6765194681861354e-05, "loss": 0.0224, "step": 9220 }, { "epoch": 0.06822684131161114, "grad_norm": 0.09532645344734192, "learning_rate": 4.676148504273504e-05, "loss": 0.0227, "step": 9230 }, { "epoch": 0.06830075988291298, "grad_norm": 0.09172376990318298, "learning_rate": 4.675777540360874e-05, "loss": 0.0219, "step": 9240 }, { "epoch": 0.06837467845421484, "grad_norm": 0.08387801051139832, "learning_rate": 4.6754065764482435e-05, "loss": 0.0205, "step": 9250 }, { "epoch": 0.06844859702551669, "grad_norm": 0.09637405723333359, "learning_rate": 4.6750356125356124e-05, "loss": 0.0243, "step": 9260 }, { "epoch": 0.06852251559681855, "grad_norm": 0.08822491765022278, "learning_rate": 4.674664648622982e-05, "loss": 0.0211, "step": 9270 }, { "epoch": 0.0685964341681204, "grad_norm": 0.19279421865940094, "learning_rate": 4.6742936847103516e-05, "loss": 0.0235, "step": 9280 }, { "epoch": 0.06867035273942225, "grad_norm": 0.09226642549037933, "learning_rate": 4.673922720797721e-05, "loss": 0.0212, "step": 9290 }, { "epoch": 0.06874427131072411, "grad_norm": 0.1656857132911682, "learning_rate": 4.673551756885091e-05, "loss": 0.0228, "step": 9300 }, { "epoch": 0.06881818988202595, "grad_norm": 0.0956842228770256, "learning_rate": 4.67318079297246e-05, "loss": 0.0219, "step": 9310 }, { "epoch": 0.06889210845332781, "grad_norm": 0.09942670166492462, "learning_rate": 4.6728098290598294e-05, "loss": 0.0212, "step": 9320 }, { "epoch": 0.06896602702462967, "grad_norm": 0.09194504469633102, "learning_rate": 4.672438865147199e-05, "loss": 0.0231, "step": 9330 }, { "epoch": 0.06903994559593152, "grad_norm": 0.09336016327142715, "learning_rate": 4.672067901234568e-05, "loss": 0.0218, "step": 9340 }, { "epoch": 0.06911386416723338, "grad_norm": 0.10817690193653107, "learning_rate": 4.6716969373219375e-05, "loss": 0.0238, "step": 9350 }, { "epoch": 0.06918778273853524, "grad_norm": 0.10755597054958344, "learning_rate": 4.671325973409307e-05, "loss": 0.0211, "step": 9360 }, { "epoch": 0.06926170130983708, "grad_norm": 0.11105576902627945, "learning_rate": 4.6709550094966767e-05, "loss": 0.0198, "step": 9370 }, { "epoch": 0.06933561988113894, "grad_norm": 0.09294114261865616, "learning_rate": 4.670584045584046e-05, "loss": 0.0195, "step": 9380 }, { "epoch": 0.06940953845244079, "grad_norm": 0.10066384822130203, "learning_rate": 4.670213081671415e-05, "loss": 0.0223, "step": 9390 }, { "epoch": 0.06948345702374265, "grad_norm": 0.11441809684038162, "learning_rate": 4.669842117758785e-05, "loss": 0.0206, "step": 9400 }, { "epoch": 0.0695573755950445, "grad_norm": 0.11113165318965912, "learning_rate": 4.669471153846154e-05, "loss": 0.0236, "step": 9410 }, { "epoch": 0.06963129416634635, "grad_norm": 0.081535205245018, "learning_rate": 4.669100189933523e-05, "loss": 0.0206, "step": 9420 }, { "epoch": 0.06970521273764821, "grad_norm": 0.08822637796401978, "learning_rate": 4.668729226020893e-05, "loss": 0.0222, "step": 9430 }, { "epoch": 0.06977913130895005, "grad_norm": 0.10207725316286087, "learning_rate": 4.6683582621082625e-05, "loss": 0.0188, "step": 9440 }, { "epoch": 0.06985304988025191, "grad_norm": 0.10252611339092255, "learning_rate": 4.667987298195632e-05, "loss": 0.0216, "step": 9450 }, { "epoch": 0.06992696845155377, "grad_norm": 0.07835535705089569, "learning_rate": 4.667616334283001e-05, "loss": 0.0207, "step": 9460 }, { "epoch": 0.07000088702285562, "grad_norm": 0.08460769802331924, "learning_rate": 4.6672453703703706e-05, "loss": 0.0211, "step": 9470 }, { "epoch": 0.07007480559415748, "grad_norm": 0.15236924588680267, "learning_rate": 4.66687440645774e-05, "loss": 0.0247, "step": 9480 }, { "epoch": 0.07014872416545934, "grad_norm": 0.1699046641588211, "learning_rate": 4.666503442545109e-05, "loss": 0.0223, "step": 9490 }, { "epoch": 0.07022264273676118, "grad_norm": 0.10630764812231064, "learning_rate": 4.666132478632479e-05, "loss": 0.0248, "step": 9500 }, { "epoch": 0.07029656130806304, "grad_norm": 0.08956080675125122, "learning_rate": 4.665761514719848e-05, "loss": 0.0213, "step": 9510 }, { "epoch": 0.07037047987936489, "grad_norm": 0.1831154227256775, "learning_rate": 4.665390550807218e-05, "loss": 0.0259, "step": 9520 }, { "epoch": 0.07044439845066675, "grad_norm": 0.08845095336437225, "learning_rate": 4.6650195868945875e-05, "loss": 0.0215, "step": 9530 }, { "epoch": 0.0705183170219686, "grad_norm": 0.0894920602440834, "learning_rate": 4.6646486229819564e-05, "loss": 0.0228, "step": 9540 }, { "epoch": 0.07059223559327045, "grad_norm": 0.10273686796426773, "learning_rate": 4.664277659069326e-05, "loss": 0.0224, "step": 9550 }, { "epoch": 0.07066615416457231, "grad_norm": 0.09976007044315338, "learning_rate": 4.6639066951566956e-05, "loss": 0.0225, "step": 9560 }, { "epoch": 0.07074007273587415, "grad_norm": 0.11248171329498291, "learning_rate": 4.6635357312440645e-05, "loss": 0.0223, "step": 9570 }, { "epoch": 0.07081399130717601, "grad_norm": 0.11681903153657913, "learning_rate": 4.663164767331434e-05, "loss": 0.0234, "step": 9580 }, { "epoch": 0.07088790987847787, "grad_norm": 0.10257956385612488, "learning_rate": 4.662793803418804e-05, "loss": 0.0218, "step": 9590 }, { "epoch": 0.07096182844977972, "grad_norm": 0.0785127729177475, "learning_rate": 4.662422839506173e-05, "loss": 0.0184, "step": 9600 }, { "epoch": 0.07103574702108158, "grad_norm": 0.10620249807834625, "learning_rate": 4.662051875593543e-05, "loss": 0.0229, "step": 9610 }, { "epoch": 0.07110966559238344, "grad_norm": 0.04874080419540405, "learning_rate": 4.661680911680912e-05, "loss": 0.0207, "step": 9620 }, { "epoch": 0.07118358416368528, "grad_norm": 0.1125664934515953, "learning_rate": 4.6613099477682814e-05, "loss": 0.0236, "step": 9630 }, { "epoch": 0.07125750273498714, "grad_norm": 0.08896657824516296, "learning_rate": 4.6609389838556503e-05, "loss": 0.0207, "step": 9640 }, { "epoch": 0.07133142130628899, "grad_norm": 0.10772913694381714, "learning_rate": 4.66056801994302e-05, "loss": 0.0206, "step": 9650 }, { "epoch": 0.07140533987759085, "grad_norm": 0.10582844167947769, "learning_rate": 4.6601970560303895e-05, "loss": 0.0219, "step": 9660 }, { "epoch": 0.0714792584488927, "grad_norm": 0.0744025856256485, "learning_rate": 4.659826092117759e-05, "loss": 0.0216, "step": 9670 }, { "epoch": 0.07155317702019455, "grad_norm": 0.08924957364797592, "learning_rate": 4.659455128205129e-05, "loss": 0.024, "step": 9680 }, { "epoch": 0.07162709559149641, "grad_norm": 0.08392763137817383, "learning_rate": 4.6590841642924977e-05, "loss": 0.0185, "step": 9690 }, { "epoch": 0.07170101416279827, "grad_norm": 0.06921983510255814, "learning_rate": 4.658713200379867e-05, "loss": 0.0204, "step": 9700 }, { "epoch": 0.07177493273410011, "grad_norm": 0.07526102662086487, "learning_rate": 4.658342236467237e-05, "loss": 0.0199, "step": 9710 }, { "epoch": 0.07184885130540197, "grad_norm": 0.07144385576248169, "learning_rate": 4.657971272554606e-05, "loss": 0.0219, "step": 9720 }, { "epoch": 0.07192276987670382, "grad_norm": 0.11594574898481369, "learning_rate": 4.6576003086419754e-05, "loss": 0.0207, "step": 9730 }, { "epoch": 0.07199668844800568, "grad_norm": 0.10086581110954285, "learning_rate": 4.657229344729345e-05, "loss": 0.0214, "step": 9740 }, { "epoch": 0.07207060701930754, "grad_norm": 0.11353831738233566, "learning_rate": 4.6568583808167146e-05, "loss": 0.0219, "step": 9750 }, { "epoch": 0.07214452559060938, "grad_norm": 0.07933341711759567, "learning_rate": 4.656487416904084e-05, "loss": 0.0205, "step": 9760 }, { "epoch": 0.07221844416191124, "grad_norm": 0.11647021025419235, "learning_rate": 4.656116452991453e-05, "loss": 0.019, "step": 9770 }, { "epoch": 0.07229236273321309, "grad_norm": 0.13896368443965912, "learning_rate": 4.655745489078823e-05, "loss": 0.0212, "step": 9780 }, { "epoch": 0.07236628130451495, "grad_norm": 0.09607965499162674, "learning_rate": 4.655374525166192e-05, "loss": 0.0203, "step": 9790 }, { "epoch": 0.0724401998758168, "grad_norm": 0.11384850740432739, "learning_rate": 4.655003561253561e-05, "loss": 0.0212, "step": 9800 }, { "epoch": 0.07251411844711865, "grad_norm": 0.10243972390890121, "learning_rate": 4.654632597340931e-05, "loss": 0.0208, "step": 9810 }, { "epoch": 0.07258803701842051, "grad_norm": 0.1119912713766098, "learning_rate": 4.6542616334283004e-05, "loss": 0.0235, "step": 9820 }, { "epoch": 0.07266195558972237, "grad_norm": 0.09179473668336868, "learning_rate": 4.65389066951567e-05, "loss": 0.0226, "step": 9830 }, { "epoch": 0.07273587416102421, "grad_norm": 0.13562758266925812, "learning_rate": 4.6535197056030396e-05, "loss": 0.0226, "step": 9840 }, { "epoch": 0.07280979273232607, "grad_norm": 0.12494170665740967, "learning_rate": 4.6531487416904085e-05, "loss": 0.0196, "step": 9850 }, { "epoch": 0.07288371130362792, "grad_norm": 0.07629244774580002, "learning_rate": 4.652777777777778e-05, "loss": 0.0198, "step": 9860 }, { "epoch": 0.07295762987492978, "grad_norm": 0.0884569063782692, "learning_rate": 4.652406813865147e-05, "loss": 0.0204, "step": 9870 }, { "epoch": 0.07303154844623164, "grad_norm": 0.11918651312589645, "learning_rate": 4.6520358499525166e-05, "loss": 0.0233, "step": 9880 }, { "epoch": 0.07310546701753348, "grad_norm": 0.10792674869298935, "learning_rate": 4.651664886039886e-05, "loss": 0.0227, "step": 9890 }, { "epoch": 0.07317938558883534, "grad_norm": 0.08517907559871674, "learning_rate": 4.651293922127256e-05, "loss": 0.0213, "step": 9900 }, { "epoch": 0.07325330416013719, "grad_norm": 0.11669266223907471, "learning_rate": 4.6509229582146254e-05, "loss": 0.0212, "step": 9910 }, { "epoch": 0.07332722273143905, "grad_norm": 0.08712171763181686, "learning_rate": 4.650551994301994e-05, "loss": 0.0226, "step": 9920 }, { "epoch": 0.0734011413027409, "grad_norm": 0.11379371583461761, "learning_rate": 4.650181030389364e-05, "loss": 0.0223, "step": 9930 }, { "epoch": 0.07347505987404275, "grad_norm": 0.08450641483068466, "learning_rate": 4.6498100664767335e-05, "loss": 0.0214, "step": 9940 }, { "epoch": 0.07354897844534461, "grad_norm": 0.09043429046869278, "learning_rate": 4.6494391025641024e-05, "loss": 0.0206, "step": 9950 }, { "epoch": 0.07362289701664647, "grad_norm": 0.08543136715888977, "learning_rate": 4.649068138651472e-05, "loss": 0.0211, "step": 9960 }, { "epoch": 0.07369681558794831, "grad_norm": 0.11070428788661957, "learning_rate": 4.6486971747388416e-05, "loss": 0.0199, "step": 9970 }, { "epoch": 0.07377073415925017, "grad_norm": 0.08564955741167068, "learning_rate": 4.648326210826211e-05, "loss": 0.02, "step": 9980 }, { "epoch": 0.07384465273055202, "grad_norm": 0.09740497916936874, "learning_rate": 4.647955246913581e-05, "loss": 0.0219, "step": 9990 }, { "epoch": 0.07391857130185388, "grad_norm": 0.13537032902240753, "learning_rate": 4.64758428300095e-05, "loss": 0.0231, "step": 10000 }, { "epoch": 0.07391857130185388, "eval_f1": 0.5774625204378211, "eval_loss": 0.0209824051707983, "eval_precision": 0.4580666850684049, "eval_recall": 0.7810425453601965, "eval_runtime": 2765.0265, "eval_samples_per_second": 195.707, "eval_steps_per_second": 3.058, "step": 10000 }, { "epoch": 0.07399248987315574, "grad_norm": 0.08094476908445358, "learning_rate": 4.647213319088319e-05, "loss": 0.0204, "step": 10010 }, { "epoch": 0.07406640844445758, "grad_norm": 0.11555436998605728, "learning_rate": 4.646842355175689e-05, "loss": 0.0186, "step": 10020 }, { "epoch": 0.07414032701575944, "grad_norm": 0.09903301298618317, "learning_rate": 4.646471391263058e-05, "loss": 0.0238, "step": 10030 }, { "epoch": 0.07421424558706129, "grad_norm": 0.09888233989477158, "learning_rate": 4.6461004273504274e-05, "loss": 0.0206, "step": 10040 }, { "epoch": 0.07428816415836315, "grad_norm": 0.08759963512420654, "learning_rate": 4.645729463437797e-05, "loss": 0.0233, "step": 10050 }, { "epoch": 0.074362082729665, "grad_norm": 0.11545658856630325, "learning_rate": 4.6453584995251666e-05, "loss": 0.0216, "step": 10060 }, { "epoch": 0.07443600130096685, "grad_norm": 0.08216286450624466, "learning_rate": 4.644987535612536e-05, "loss": 0.0226, "step": 10070 }, { "epoch": 0.07450991987226871, "grad_norm": 0.0777827575802803, "learning_rate": 4.644616571699905e-05, "loss": 0.0213, "step": 10080 }, { "epoch": 0.07458383844357057, "grad_norm": 0.11039154976606369, "learning_rate": 4.644245607787275e-05, "loss": 0.0241, "step": 10090 }, { "epoch": 0.07465775701487241, "grad_norm": 0.08568236231803894, "learning_rate": 4.643874643874644e-05, "loss": 0.0226, "step": 10100 }, { "epoch": 0.07473167558617427, "grad_norm": 0.11165298521518707, "learning_rate": 4.643503679962013e-05, "loss": 0.0207, "step": 10110 }, { "epoch": 0.07480559415747612, "grad_norm": 0.0949447751045227, "learning_rate": 4.643132716049383e-05, "loss": 0.0213, "step": 10120 }, { "epoch": 0.07487951272877798, "grad_norm": 0.10933278501033783, "learning_rate": 4.6427617521367525e-05, "loss": 0.0212, "step": 10130 }, { "epoch": 0.07495343130007984, "grad_norm": 0.07423562556505203, "learning_rate": 4.642390788224122e-05, "loss": 0.0197, "step": 10140 }, { "epoch": 0.07502734987138168, "grad_norm": 0.12832492589950562, "learning_rate": 4.642019824311491e-05, "loss": 0.021, "step": 10150 }, { "epoch": 0.07510126844268354, "grad_norm": 0.10669612884521484, "learning_rate": 4.6416488603988606e-05, "loss": 0.0228, "step": 10160 }, { "epoch": 0.0751751870139854, "grad_norm": 0.11487097293138504, "learning_rate": 4.64127789648623e-05, "loss": 0.0219, "step": 10170 }, { "epoch": 0.07524910558528725, "grad_norm": 0.11099672317504883, "learning_rate": 4.640906932573599e-05, "loss": 0.0202, "step": 10180 }, { "epoch": 0.0753230241565891, "grad_norm": 0.11408338695764542, "learning_rate": 4.640535968660969e-05, "loss": 0.021, "step": 10190 }, { "epoch": 0.07539694272789095, "grad_norm": 0.0799785628914833, "learning_rate": 4.640165004748338e-05, "loss": 0.0196, "step": 10200 }, { "epoch": 0.07547086129919281, "grad_norm": 0.11926797777414322, "learning_rate": 4.639794040835708e-05, "loss": 0.0232, "step": 10210 }, { "epoch": 0.07554477987049467, "grad_norm": 0.1310286521911621, "learning_rate": 4.6394230769230775e-05, "loss": 0.0202, "step": 10220 }, { "epoch": 0.07561869844179651, "grad_norm": 0.08759672194719315, "learning_rate": 4.6390521130104464e-05, "loss": 0.0221, "step": 10230 }, { "epoch": 0.07569261701309837, "grad_norm": 0.10249481350183487, "learning_rate": 4.638681149097816e-05, "loss": 0.0203, "step": 10240 }, { "epoch": 0.07576653558440022, "grad_norm": 0.07235664129257202, "learning_rate": 4.6383101851851856e-05, "loss": 0.0218, "step": 10250 }, { "epoch": 0.07584045415570208, "grad_norm": 0.08320758491754532, "learning_rate": 4.6379392212725545e-05, "loss": 0.0209, "step": 10260 }, { "epoch": 0.07591437272700394, "grad_norm": 0.09801856428384781, "learning_rate": 4.637568257359924e-05, "loss": 0.0226, "step": 10270 }, { "epoch": 0.07598829129830578, "grad_norm": 0.09113458544015884, "learning_rate": 4.637197293447294e-05, "loss": 0.0251, "step": 10280 }, { "epoch": 0.07606220986960764, "grad_norm": 0.09591145813465118, "learning_rate": 4.636826329534663e-05, "loss": 0.0211, "step": 10290 }, { "epoch": 0.0761361284409095, "grad_norm": 0.10051578283309937, "learning_rate": 4.636455365622033e-05, "loss": 0.0219, "step": 10300 }, { "epoch": 0.07621004701221135, "grad_norm": 0.09922511130571365, "learning_rate": 4.636084401709402e-05, "loss": 0.0191, "step": 10310 }, { "epoch": 0.0762839655835132, "grad_norm": 0.17670218646526337, "learning_rate": 4.6357134377967714e-05, "loss": 0.0214, "step": 10320 }, { "epoch": 0.07635788415481505, "grad_norm": 0.07537711411714554, "learning_rate": 4.63534247388414e-05, "loss": 0.0205, "step": 10330 }, { "epoch": 0.07643180272611691, "grad_norm": 0.10695944726467133, "learning_rate": 4.63497150997151e-05, "loss": 0.022, "step": 10340 }, { "epoch": 0.07650572129741877, "grad_norm": 0.10096986591815948, "learning_rate": 4.6346005460588795e-05, "loss": 0.0218, "step": 10350 }, { "epoch": 0.07657963986872061, "grad_norm": 0.1410195380449295, "learning_rate": 4.634229582146249e-05, "loss": 0.0214, "step": 10360 }, { "epoch": 0.07665355844002247, "grad_norm": 0.07780885696411133, "learning_rate": 4.633858618233619e-05, "loss": 0.0208, "step": 10370 }, { "epoch": 0.07672747701132432, "grad_norm": 0.08565017580986023, "learning_rate": 4.6334876543209876e-05, "loss": 0.0203, "step": 10380 }, { "epoch": 0.07680139558262618, "grad_norm": 0.10375560820102692, "learning_rate": 4.633116690408357e-05, "loss": 0.0224, "step": 10390 }, { "epoch": 0.07687531415392804, "grad_norm": 0.10677853971719742, "learning_rate": 4.632745726495727e-05, "loss": 0.0224, "step": 10400 }, { "epoch": 0.07694923272522988, "grad_norm": 0.08839945495128632, "learning_rate": 4.632374762583096e-05, "loss": 0.0245, "step": 10410 }, { "epoch": 0.07702315129653174, "grad_norm": 0.11980149149894714, "learning_rate": 4.632003798670465e-05, "loss": 0.0212, "step": 10420 }, { "epoch": 0.0770970698678336, "grad_norm": 0.08483819663524628, "learning_rate": 4.631632834757835e-05, "loss": 0.0219, "step": 10430 }, { "epoch": 0.07717098843913545, "grad_norm": 0.15062575042247772, "learning_rate": 4.6312618708452045e-05, "loss": 0.0217, "step": 10440 }, { "epoch": 0.0772449070104373, "grad_norm": 0.08806386590003967, "learning_rate": 4.630890906932574e-05, "loss": 0.0225, "step": 10450 }, { "epoch": 0.07731882558173915, "grad_norm": 0.11287733167409897, "learning_rate": 4.630519943019943e-05, "loss": 0.0263, "step": 10460 }, { "epoch": 0.07739274415304101, "grad_norm": 0.12708717584609985, "learning_rate": 4.6301489791073126e-05, "loss": 0.0217, "step": 10470 }, { "epoch": 0.07746666272434287, "grad_norm": 0.1146998181939125, "learning_rate": 4.629778015194682e-05, "loss": 0.0203, "step": 10480 }, { "epoch": 0.07754058129564471, "grad_norm": 0.11010906845331192, "learning_rate": 4.629407051282051e-05, "loss": 0.021, "step": 10490 }, { "epoch": 0.07761449986694657, "grad_norm": 0.07615244388580322, "learning_rate": 4.629036087369421e-05, "loss": 0.0221, "step": 10500 }, { "epoch": 0.07768841843824842, "grad_norm": 0.09600045531988144, "learning_rate": 4.6286651234567904e-05, "loss": 0.0231, "step": 10510 }, { "epoch": 0.07776233700955028, "grad_norm": 0.11465899646282196, "learning_rate": 4.62829415954416e-05, "loss": 0.0218, "step": 10520 }, { "epoch": 0.07783625558085214, "grad_norm": 0.08350540697574615, "learning_rate": 4.6279231956315295e-05, "loss": 0.0199, "step": 10530 }, { "epoch": 0.07791017415215398, "grad_norm": 0.1006699725985527, "learning_rate": 4.6275522317188985e-05, "loss": 0.022, "step": 10540 }, { "epoch": 0.07798409272345584, "grad_norm": 0.08721012622117996, "learning_rate": 4.627181267806268e-05, "loss": 0.0268, "step": 10550 }, { "epoch": 0.0780580112947577, "grad_norm": 0.07063861191272736, "learning_rate": 4.626810303893637e-05, "loss": 0.0176, "step": 10560 }, { "epoch": 0.07813192986605955, "grad_norm": 0.12120065093040466, "learning_rate": 4.6264393399810066e-05, "loss": 0.0205, "step": 10570 }, { "epoch": 0.0782058484373614, "grad_norm": 0.1365332305431366, "learning_rate": 4.626068376068376e-05, "loss": 0.0235, "step": 10580 }, { "epoch": 0.07827976700866325, "grad_norm": 0.09110606461763382, "learning_rate": 4.625697412155746e-05, "loss": 0.0181, "step": 10590 }, { "epoch": 0.07835368557996511, "grad_norm": 0.09668658673763275, "learning_rate": 4.6253264482431154e-05, "loss": 0.0226, "step": 10600 }, { "epoch": 0.07842760415126697, "grad_norm": 0.13093529641628265, "learning_rate": 4.624955484330484e-05, "loss": 0.0232, "step": 10610 }, { "epoch": 0.07850152272256881, "grad_norm": 0.11461758613586426, "learning_rate": 4.624584520417854e-05, "loss": 0.0215, "step": 10620 }, { "epoch": 0.07857544129387067, "grad_norm": 0.08175163716077805, "learning_rate": 4.6242135565052235e-05, "loss": 0.021, "step": 10630 }, { "epoch": 0.07864935986517253, "grad_norm": 0.09790777415037155, "learning_rate": 4.6238425925925924e-05, "loss": 0.0215, "step": 10640 }, { "epoch": 0.07872327843647438, "grad_norm": 0.09221351891756058, "learning_rate": 4.623471628679962e-05, "loss": 0.0246, "step": 10650 }, { "epoch": 0.07879719700777624, "grad_norm": 0.09441790729761124, "learning_rate": 4.6231006647673316e-05, "loss": 0.0197, "step": 10660 }, { "epoch": 0.07887111557907808, "grad_norm": 0.07809023559093475, "learning_rate": 4.622729700854701e-05, "loss": 0.0175, "step": 10670 }, { "epoch": 0.07894503415037994, "grad_norm": 0.12023591250181198, "learning_rate": 4.622358736942071e-05, "loss": 0.0206, "step": 10680 }, { "epoch": 0.0790189527216818, "grad_norm": 0.10869825631380081, "learning_rate": 4.62198777302944e-05, "loss": 0.0229, "step": 10690 }, { "epoch": 0.07909287129298365, "grad_norm": 0.09325161576271057, "learning_rate": 4.621616809116809e-05, "loss": 0.0259, "step": 10700 }, { "epoch": 0.0791667898642855, "grad_norm": 0.10033397376537323, "learning_rate": 4.621245845204179e-05, "loss": 0.0227, "step": 10710 }, { "epoch": 0.07924070843558735, "grad_norm": 0.11315719038248062, "learning_rate": 4.620874881291548e-05, "loss": 0.0225, "step": 10720 }, { "epoch": 0.07931462700688921, "grad_norm": 0.08948411047458649, "learning_rate": 4.6205039173789174e-05, "loss": 0.023, "step": 10730 }, { "epoch": 0.07938854557819107, "grad_norm": 0.11530829966068268, "learning_rate": 4.620132953466287e-05, "loss": 0.0232, "step": 10740 }, { "epoch": 0.07946246414949291, "grad_norm": 0.08206850290298462, "learning_rate": 4.6197619895536566e-05, "loss": 0.0198, "step": 10750 }, { "epoch": 0.07953638272079477, "grad_norm": 0.07246618717908859, "learning_rate": 4.619391025641026e-05, "loss": 0.0208, "step": 10760 }, { "epoch": 0.07961030129209663, "grad_norm": 0.10258765518665314, "learning_rate": 4.619020061728395e-05, "loss": 0.0219, "step": 10770 }, { "epoch": 0.07968421986339848, "grad_norm": 0.09250310063362122, "learning_rate": 4.618649097815765e-05, "loss": 0.0221, "step": 10780 }, { "epoch": 0.07975813843470034, "grad_norm": 0.09063389152288437, "learning_rate": 4.6182781339031336e-05, "loss": 0.0223, "step": 10790 }, { "epoch": 0.07983205700600218, "grad_norm": 0.08710388094186783, "learning_rate": 4.617907169990503e-05, "loss": 0.02, "step": 10800 }, { "epoch": 0.07990597557730404, "grad_norm": 0.11745099723339081, "learning_rate": 4.6175362060778735e-05, "loss": 0.0221, "step": 10810 }, { "epoch": 0.0799798941486059, "grad_norm": 0.0828913003206253, "learning_rate": 4.6171652421652424e-05, "loss": 0.0215, "step": 10820 }, { "epoch": 0.08005381271990775, "grad_norm": 0.10530825704336166, "learning_rate": 4.616794278252612e-05, "loss": 0.0226, "step": 10830 }, { "epoch": 0.0801277312912096, "grad_norm": 0.08071672916412354, "learning_rate": 4.616423314339981e-05, "loss": 0.0189, "step": 10840 }, { "epoch": 0.08020164986251145, "grad_norm": 0.085964135825634, "learning_rate": 4.6160523504273505e-05, "loss": 0.0226, "step": 10850 }, { "epoch": 0.08027556843381331, "grad_norm": 0.08544149994850159, "learning_rate": 4.61568138651472e-05, "loss": 0.0238, "step": 10860 }, { "epoch": 0.08034948700511517, "grad_norm": 0.09496266394853592, "learning_rate": 4.615310422602089e-05, "loss": 0.0212, "step": 10870 }, { "epoch": 0.08042340557641701, "grad_norm": 0.08944917470216751, "learning_rate": 4.6149394586894587e-05, "loss": 0.0194, "step": 10880 }, { "epoch": 0.08049732414771887, "grad_norm": 0.1165291890501976, "learning_rate": 4.614568494776828e-05, "loss": 0.0226, "step": 10890 }, { "epoch": 0.08057124271902073, "grad_norm": 0.1558302789926529, "learning_rate": 4.614197530864198e-05, "loss": 0.0221, "step": 10900 }, { "epoch": 0.08064516129032258, "grad_norm": 0.1054091528058052, "learning_rate": 4.6138265669515674e-05, "loss": 0.0173, "step": 10910 }, { "epoch": 0.08071907986162444, "grad_norm": 0.0848877876996994, "learning_rate": 4.6134556030389364e-05, "loss": 0.0188, "step": 10920 }, { "epoch": 0.08079299843292628, "grad_norm": 0.10108659416437149, "learning_rate": 4.613084639126306e-05, "loss": 0.0232, "step": 10930 }, { "epoch": 0.08086691700422814, "grad_norm": 0.11190935969352722, "learning_rate": 4.6127136752136756e-05, "loss": 0.0226, "step": 10940 }, { "epoch": 0.08094083557553, "grad_norm": 0.09286840260028839, "learning_rate": 4.6123427113010445e-05, "loss": 0.0237, "step": 10950 }, { "epoch": 0.08101475414683185, "grad_norm": 0.08794547617435455, "learning_rate": 4.611971747388415e-05, "loss": 0.0219, "step": 10960 }, { "epoch": 0.0810886727181337, "grad_norm": 0.08183534443378448, "learning_rate": 4.611600783475784e-05, "loss": 0.0212, "step": 10970 }, { "epoch": 0.08116259128943555, "grad_norm": 0.0907917469739914, "learning_rate": 4.611229819563153e-05, "loss": 0.0209, "step": 10980 }, { "epoch": 0.08123650986073741, "grad_norm": 0.09019339084625244, "learning_rate": 4.610858855650523e-05, "loss": 0.0222, "step": 10990 }, { "epoch": 0.08131042843203927, "grad_norm": 0.09627287089824677, "learning_rate": 4.610487891737892e-05, "loss": 0.0217, "step": 11000 }, { "epoch": 0.08138434700334111, "grad_norm": 0.09994477778673172, "learning_rate": 4.6101169278252614e-05, "loss": 0.0219, "step": 11010 }, { "epoch": 0.08145826557464297, "grad_norm": 0.07893578708171844, "learning_rate": 4.60974596391263e-05, "loss": 0.0243, "step": 11020 }, { "epoch": 0.08153218414594483, "grad_norm": 0.10810483247041702, "learning_rate": 4.609375e-05, "loss": 0.0217, "step": 11030 }, { "epoch": 0.08160610271724668, "grad_norm": 0.09902021288871765, "learning_rate": 4.60900403608737e-05, "loss": 0.0216, "step": 11040 }, { "epoch": 0.08168002128854854, "grad_norm": 0.12042216211557388, "learning_rate": 4.608633072174739e-05, "loss": 0.0214, "step": 11050 }, { "epoch": 0.08175393985985038, "grad_norm": 0.10445085912942886, "learning_rate": 4.608262108262109e-05, "loss": 0.0229, "step": 11060 }, { "epoch": 0.08182785843115224, "grad_norm": 0.11058395355939865, "learning_rate": 4.6078911443494776e-05, "loss": 0.0224, "step": 11070 }, { "epoch": 0.0819017770024541, "grad_norm": 0.08662155270576477, "learning_rate": 4.607520180436847e-05, "loss": 0.0219, "step": 11080 }, { "epoch": 0.08197569557375595, "grad_norm": 0.12212938070297241, "learning_rate": 4.607149216524217e-05, "loss": 0.0243, "step": 11090 }, { "epoch": 0.0820496141450578, "grad_norm": 0.09374962747097015, "learning_rate": 4.606778252611586e-05, "loss": 0.0215, "step": 11100 }, { "epoch": 0.08212353271635967, "grad_norm": 0.07948266714811325, "learning_rate": 4.606407288698956e-05, "loss": 0.0221, "step": 11110 }, { "epoch": 0.08219745128766151, "grad_norm": 0.09368613362312317, "learning_rate": 4.606036324786325e-05, "loss": 0.0187, "step": 11120 }, { "epoch": 0.08227136985896337, "grad_norm": 0.10957033187150955, "learning_rate": 4.6056653608736945e-05, "loss": 0.0229, "step": 11130 }, { "epoch": 0.08234528843026521, "grad_norm": 0.11067720502614975, "learning_rate": 4.605294396961064e-05, "loss": 0.0219, "step": 11140 }, { "epoch": 0.08241920700156707, "grad_norm": 0.10309172421693802, "learning_rate": 4.604923433048433e-05, "loss": 0.0221, "step": 11150 }, { "epoch": 0.08249312557286893, "grad_norm": 0.10994286090135574, "learning_rate": 4.6045524691358026e-05, "loss": 0.0198, "step": 11160 }, { "epoch": 0.08256704414417078, "grad_norm": 0.11497566848993301, "learning_rate": 4.604181505223172e-05, "loss": 0.0214, "step": 11170 }, { "epoch": 0.08264096271547264, "grad_norm": 0.10038833320140839, "learning_rate": 4.603810541310541e-05, "loss": 0.0188, "step": 11180 }, { "epoch": 0.08271488128677448, "grad_norm": 0.08967998623847961, "learning_rate": 4.6034395773979114e-05, "loss": 0.021, "step": 11190 }, { "epoch": 0.08278879985807634, "grad_norm": 0.06262663006782532, "learning_rate": 4.60306861348528e-05, "loss": 0.0182, "step": 11200 }, { "epoch": 0.0828627184293782, "grad_norm": 0.15144652128219604, "learning_rate": 4.60269764957265e-05, "loss": 0.024, "step": 11210 }, { "epoch": 0.08293663700068005, "grad_norm": 0.07688304036855698, "learning_rate": 4.6023266856600195e-05, "loss": 0.0206, "step": 11220 }, { "epoch": 0.0830105555719819, "grad_norm": 0.07474672049283981, "learning_rate": 4.6019557217473884e-05, "loss": 0.0197, "step": 11230 }, { "epoch": 0.08308447414328377, "grad_norm": 0.07008994370698929, "learning_rate": 4.601584757834758e-05, "loss": 0.0191, "step": 11240 }, { "epoch": 0.08315839271458561, "grad_norm": 0.10242119431495667, "learning_rate": 4.601213793922127e-05, "loss": 0.0239, "step": 11250 }, { "epoch": 0.08323231128588747, "grad_norm": 0.10365156084299088, "learning_rate": 4.600842830009497e-05, "loss": 0.0237, "step": 11260 }, { "epoch": 0.08330622985718932, "grad_norm": 0.09733454138040543, "learning_rate": 4.600471866096867e-05, "loss": 0.0216, "step": 11270 }, { "epoch": 0.08338014842849117, "grad_norm": 0.11051540821790695, "learning_rate": 4.600100902184236e-05, "loss": 0.0217, "step": 11280 }, { "epoch": 0.08345406699979303, "grad_norm": 0.07793629914522171, "learning_rate": 4.5997299382716053e-05, "loss": 0.0206, "step": 11290 }, { "epoch": 0.08352798557109488, "grad_norm": 0.08378177881240845, "learning_rate": 4.599358974358974e-05, "loss": 0.0229, "step": 11300 }, { "epoch": 0.08360190414239674, "grad_norm": 0.12937644124031067, "learning_rate": 4.598988010446344e-05, "loss": 0.0212, "step": 11310 }, { "epoch": 0.08367582271369858, "grad_norm": 0.07999670505523682, "learning_rate": 4.5986170465337135e-05, "loss": 0.0211, "step": 11320 }, { "epoch": 0.08374974128500044, "grad_norm": 0.09235605597496033, "learning_rate": 4.5982460826210824e-05, "loss": 0.0186, "step": 11330 }, { "epoch": 0.0838236598563023, "grad_norm": 0.09930342435836792, "learning_rate": 4.5978751187084526e-05, "loss": 0.0206, "step": 11340 }, { "epoch": 0.08389757842760415, "grad_norm": 0.10215435177087784, "learning_rate": 4.5975041547958216e-05, "loss": 0.023, "step": 11350 }, { "epoch": 0.083971496998906, "grad_norm": 0.09137150645256042, "learning_rate": 4.597133190883191e-05, "loss": 0.0223, "step": 11360 }, { "epoch": 0.08404541557020787, "grad_norm": 0.10764443874359131, "learning_rate": 4.596762226970561e-05, "loss": 0.0199, "step": 11370 }, { "epoch": 0.08411933414150971, "grad_norm": 0.09711901098489761, "learning_rate": 4.59639126305793e-05, "loss": 0.0218, "step": 11380 }, { "epoch": 0.08419325271281157, "grad_norm": 0.10127461701631546, "learning_rate": 4.596020299145299e-05, "loss": 0.0196, "step": 11390 }, { "epoch": 0.08426717128411342, "grad_norm": 0.08950147032737732, "learning_rate": 4.595649335232669e-05, "loss": 0.0189, "step": 11400 }, { "epoch": 0.08434108985541527, "grad_norm": 0.11337869614362717, "learning_rate": 4.5952783713200385e-05, "loss": 0.0214, "step": 11410 }, { "epoch": 0.08441500842671713, "grad_norm": 0.10098463296890259, "learning_rate": 4.594907407407408e-05, "loss": 0.0179, "step": 11420 }, { "epoch": 0.08448892699801898, "grad_norm": 0.12542690336704254, "learning_rate": 4.594536443494777e-05, "loss": 0.0216, "step": 11430 }, { "epoch": 0.08456284556932084, "grad_norm": 0.08482198417186737, "learning_rate": 4.5941654795821466e-05, "loss": 0.0199, "step": 11440 }, { "epoch": 0.08463676414062268, "grad_norm": 0.09225975722074509, "learning_rate": 4.593794515669516e-05, "loss": 0.0212, "step": 11450 }, { "epoch": 0.08471068271192454, "grad_norm": 0.09084470570087433, "learning_rate": 4.593423551756885e-05, "loss": 0.0185, "step": 11460 }, { "epoch": 0.0847846012832264, "grad_norm": 0.087517149746418, "learning_rate": 4.593052587844255e-05, "loss": 0.0192, "step": 11470 }, { "epoch": 0.08485851985452825, "grad_norm": 0.10069217532873154, "learning_rate": 4.5926816239316236e-05, "loss": 0.0216, "step": 11480 }, { "epoch": 0.0849324384258301, "grad_norm": 0.1067439615726471, "learning_rate": 4.592310660018994e-05, "loss": 0.0213, "step": 11490 }, { "epoch": 0.08500635699713197, "grad_norm": 0.11482270807027817, "learning_rate": 4.5919396961063635e-05, "loss": 0.0234, "step": 11500 }, { "epoch": 0.08508027556843381, "grad_norm": 0.13125424087047577, "learning_rate": 4.5915687321937324e-05, "loss": 0.0221, "step": 11510 }, { "epoch": 0.08515419413973567, "grad_norm": 0.139346182346344, "learning_rate": 4.591197768281102e-05, "loss": 0.0228, "step": 11520 }, { "epoch": 0.08522811271103752, "grad_norm": 0.09836461395025253, "learning_rate": 4.590826804368471e-05, "loss": 0.0222, "step": 11530 }, { "epoch": 0.08530203128233937, "grad_norm": 0.09203539043664932, "learning_rate": 4.5904558404558405e-05, "loss": 0.0214, "step": 11540 }, { "epoch": 0.08537594985364123, "grad_norm": 0.07745225727558136, "learning_rate": 4.59008487654321e-05, "loss": 0.0188, "step": 11550 }, { "epoch": 0.08544986842494308, "grad_norm": 0.08701054751873016, "learning_rate": 4.58971391263058e-05, "loss": 0.0223, "step": 11560 }, { "epoch": 0.08552378699624494, "grad_norm": 0.1272624433040619, "learning_rate": 4.589342948717949e-05, "loss": 0.0235, "step": 11570 }, { "epoch": 0.0855977055675468, "grad_norm": 0.11356864869594574, "learning_rate": 4.588971984805318e-05, "loss": 0.0203, "step": 11580 }, { "epoch": 0.08567162413884864, "grad_norm": 0.12194553017616272, "learning_rate": 4.588601020892688e-05, "loss": 0.0225, "step": 11590 }, { "epoch": 0.0857455427101505, "grad_norm": 0.10308793932199478, "learning_rate": 4.5882300569800574e-05, "loss": 0.0212, "step": 11600 }, { "epoch": 0.08581946128145235, "grad_norm": 0.11726551502943039, "learning_rate": 4.587859093067426e-05, "loss": 0.0211, "step": 11610 }, { "epoch": 0.0858933798527542, "grad_norm": 0.0865316092967987, "learning_rate": 4.587488129154796e-05, "loss": 0.0211, "step": 11620 }, { "epoch": 0.08596729842405607, "grad_norm": 0.08077540993690491, "learning_rate": 4.5871171652421655e-05, "loss": 0.0207, "step": 11630 }, { "epoch": 0.08604121699535791, "grad_norm": 0.08920275419950485, "learning_rate": 4.586746201329535e-05, "loss": 0.024, "step": 11640 }, { "epoch": 0.08611513556665977, "grad_norm": 0.1083950474858284, "learning_rate": 4.586375237416905e-05, "loss": 0.0232, "step": 11650 }, { "epoch": 0.08618905413796162, "grad_norm": 0.070146843791008, "learning_rate": 4.5860042735042736e-05, "loss": 0.0212, "step": 11660 }, { "epoch": 0.08626297270926347, "grad_norm": 0.07127376645803452, "learning_rate": 4.585633309591643e-05, "loss": 0.0201, "step": 11670 }, { "epoch": 0.08633689128056533, "grad_norm": 0.11417879164218903, "learning_rate": 4.585262345679013e-05, "loss": 0.0221, "step": 11680 }, { "epoch": 0.08641080985186718, "grad_norm": 0.11518987268209457, "learning_rate": 4.584891381766382e-05, "loss": 0.0202, "step": 11690 }, { "epoch": 0.08648472842316904, "grad_norm": 0.0677439421415329, "learning_rate": 4.5845204178537514e-05, "loss": 0.0202, "step": 11700 }, { "epoch": 0.0865586469944709, "grad_norm": 0.09899143874645233, "learning_rate": 4.58414945394112e-05, "loss": 0.0197, "step": 11710 }, { "epoch": 0.08663256556577274, "grad_norm": 0.1325441151857376, "learning_rate": 4.5837784900284905e-05, "loss": 0.0222, "step": 11720 }, { "epoch": 0.0867064841370746, "grad_norm": 0.1297304332256317, "learning_rate": 4.58340752611586e-05, "loss": 0.0238, "step": 11730 }, { "epoch": 0.08678040270837645, "grad_norm": 0.08309206366539001, "learning_rate": 4.583036562203229e-05, "loss": 0.0206, "step": 11740 }, { "epoch": 0.0868543212796783, "grad_norm": 0.08319962024688721, "learning_rate": 4.5826655982905987e-05, "loss": 0.0201, "step": 11750 }, { "epoch": 0.08692823985098017, "grad_norm": 0.08149457722902298, "learning_rate": 4.5822946343779676e-05, "loss": 0.0208, "step": 11760 }, { "epoch": 0.08700215842228201, "grad_norm": 0.12498198449611664, "learning_rate": 4.581923670465337e-05, "loss": 0.0205, "step": 11770 }, { "epoch": 0.08707607699358387, "grad_norm": 0.06954797357320786, "learning_rate": 4.581552706552707e-05, "loss": 0.0204, "step": 11780 }, { "epoch": 0.08714999556488572, "grad_norm": 0.08185707032680511, "learning_rate": 4.5811817426400764e-05, "loss": 0.0211, "step": 11790 }, { "epoch": 0.08722391413618757, "grad_norm": 0.09459664672613144, "learning_rate": 4.580810778727446e-05, "loss": 0.0203, "step": 11800 }, { "epoch": 0.08729783270748943, "grad_norm": 0.08659505099058151, "learning_rate": 4.580439814814815e-05, "loss": 0.0218, "step": 11810 }, { "epoch": 0.08737175127879128, "grad_norm": 0.07980205118656158, "learning_rate": 4.5800688509021845e-05, "loss": 0.0245, "step": 11820 }, { "epoch": 0.08744566985009314, "grad_norm": 0.09071630239486694, "learning_rate": 4.579697886989554e-05, "loss": 0.0209, "step": 11830 }, { "epoch": 0.087519588421395, "grad_norm": 0.09811149537563324, "learning_rate": 4.579326923076923e-05, "loss": 0.0205, "step": 11840 }, { "epoch": 0.08759350699269684, "grad_norm": 0.09655947238206863, "learning_rate": 4.5789559591642926e-05, "loss": 0.0194, "step": 11850 }, { "epoch": 0.0876674255639987, "grad_norm": 0.10314806550741196, "learning_rate": 4.578584995251662e-05, "loss": 0.0188, "step": 11860 }, { "epoch": 0.08774134413530055, "grad_norm": 0.07170028239488602, "learning_rate": 4.578214031339032e-05, "loss": 0.0236, "step": 11870 }, { "epoch": 0.0878152627066024, "grad_norm": 0.07748035341501236, "learning_rate": 4.5778430674264014e-05, "loss": 0.0221, "step": 11880 }, { "epoch": 0.08788918127790427, "grad_norm": 0.12346388399600983, "learning_rate": 4.57747210351377e-05, "loss": 0.0234, "step": 11890 }, { "epoch": 0.08796309984920611, "grad_norm": 0.09907881170511246, "learning_rate": 4.57710113960114e-05, "loss": 0.0209, "step": 11900 }, { "epoch": 0.08803701842050797, "grad_norm": 0.12885276973247528, "learning_rate": 4.5767301756885095e-05, "loss": 0.0229, "step": 11910 }, { "epoch": 0.08811093699180982, "grad_norm": 0.12708497047424316, "learning_rate": 4.5763592117758784e-05, "loss": 0.0189, "step": 11920 }, { "epoch": 0.08818485556311167, "grad_norm": 0.10651792585849762, "learning_rate": 4.575988247863248e-05, "loss": 0.0206, "step": 11930 }, { "epoch": 0.08825877413441353, "grad_norm": 0.0823550820350647, "learning_rate": 4.5756172839506176e-05, "loss": 0.0188, "step": 11940 }, { "epoch": 0.08833269270571538, "grad_norm": 0.10734937340021133, "learning_rate": 4.575246320037987e-05, "loss": 0.0221, "step": 11950 }, { "epoch": 0.08840661127701724, "grad_norm": 0.10963788628578186, "learning_rate": 4.574875356125357e-05, "loss": 0.0204, "step": 11960 }, { "epoch": 0.0884805298483191, "grad_norm": 0.0906628742814064, "learning_rate": 4.574504392212726e-05, "loss": 0.0182, "step": 11970 }, { "epoch": 0.08855444841962094, "grad_norm": 0.08999045193195343, "learning_rate": 4.574133428300095e-05, "loss": 0.0203, "step": 11980 }, { "epoch": 0.0886283669909228, "grad_norm": 0.09220533818006516, "learning_rate": 4.573762464387464e-05, "loss": 0.0211, "step": 11990 }, { "epoch": 0.08870228556222465, "grad_norm": 0.09482621401548386, "learning_rate": 4.573391500474834e-05, "loss": 0.0248, "step": 12000 }, { "epoch": 0.0887762041335265, "grad_norm": 0.07867447286844254, "learning_rate": 4.5730205365622034e-05, "loss": 0.0213, "step": 12010 }, { "epoch": 0.08885012270482837, "grad_norm": 0.09121580421924591, "learning_rate": 4.572649572649573e-05, "loss": 0.023, "step": 12020 }, { "epoch": 0.08892404127613021, "grad_norm": 0.07908417284488678, "learning_rate": 4.5722786087369426e-05, "loss": 0.0208, "step": 12030 }, { "epoch": 0.08899795984743207, "grad_norm": 0.12759286165237427, "learning_rate": 4.5719076448243115e-05, "loss": 0.0237, "step": 12040 }, { "epoch": 0.08907187841873393, "grad_norm": 0.08981367945671082, "learning_rate": 4.571536680911681e-05, "loss": 0.0188, "step": 12050 }, { "epoch": 0.08914579699003577, "grad_norm": 0.16583828628063202, "learning_rate": 4.571165716999051e-05, "loss": 0.0234, "step": 12060 }, { "epoch": 0.08921971556133763, "grad_norm": 0.09226063638925552, "learning_rate": 4.5707947530864197e-05, "loss": 0.0205, "step": 12070 }, { "epoch": 0.08929363413263948, "grad_norm": 0.11874354630708694, "learning_rate": 4.570423789173789e-05, "loss": 0.0211, "step": 12080 }, { "epoch": 0.08936755270394134, "grad_norm": 0.1163206398487091, "learning_rate": 4.570052825261159e-05, "loss": 0.0202, "step": 12090 }, { "epoch": 0.0894414712752432, "grad_norm": 0.0943320095539093, "learning_rate": 4.5696818613485284e-05, "loss": 0.0198, "step": 12100 }, { "epoch": 0.08951538984654504, "grad_norm": 0.09926102310419083, "learning_rate": 4.569310897435898e-05, "loss": 0.0192, "step": 12110 }, { "epoch": 0.0895893084178469, "grad_norm": 0.08138690888881683, "learning_rate": 4.568939933523267e-05, "loss": 0.0208, "step": 12120 }, { "epoch": 0.08966322698914875, "grad_norm": 0.10757561773061752, "learning_rate": 4.5685689696106366e-05, "loss": 0.0224, "step": 12130 }, { "epoch": 0.0897371455604506, "grad_norm": 0.09196264296770096, "learning_rate": 4.568198005698006e-05, "loss": 0.0196, "step": 12140 }, { "epoch": 0.08981106413175247, "grad_norm": 0.10394033789634705, "learning_rate": 4.567827041785375e-05, "loss": 0.0191, "step": 12150 }, { "epoch": 0.08988498270305431, "grad_norm": 0.1103530302643776, "learning_rate": 4.567456077872745e-05, "loss": 0.0243, "step": 12160 }, { "epoch": 0.08995890127435617, "grad_norm": 0.08777318894863129, "learning_rate": 4.567085113960114e-05, "loss": 0.0216, "step": 12170 }, { "epoch": 0.09003281984565803, "grad_norm": 0.09444407373666763, "learning_rate": 4.566714150047484e-05, "loss": 0.0243, "step": 12180 }, { "epoch": 0.09010673841695988, "grad_norm": 0.13325783610343933, "learning_rate": 4.5663431861348535e-05, "loss": 0.0223, "step": 12190 }, { "epoch": 0.09018065698826173, "grad_norm": 0.11760847270488739, "learning_rate": 4.5659722222222224e-05, "loss": 0.0232, "step": 12200 }, { "epoch": 0.09025457555956358, "grad_norm": 0.09399545192718506, "learning_rate": 4.565601258309592e-05, "loss": 0.0221, "step": 12210 }, { "epoch": 0.09032849413086544, "grad_norm": 0.09595145285129547, "learning_rate": 4.565230294396961e-05, "loss": 0.0184, "step": 12220 }, { "epoch": 0.0904024127021673, "grad_norm": 0.09397619217634201, "learning_rate": 4.5648593304843305e-05, "loss": 0.0213, "step": 12230 }, { "epoch": 0.09047633127346914, "grad_norm": 0.13015051186084747, "learning_rate": 4.5644883665717e-05, "loss": 0.0213, "step": 12240 }, { "epoch": 0.090550249844771, "grad_norm": 0.09179303795099258, "learning_rate": 4.56411740265907e-05, "loss": 0.0224, "step": 12250 }, { "epoch": 0.09062416841607285, "grad_norm": 0.12288478761911392, "learning_rate": 4.563746438746439e-05, "loss": 0.0231, "step": 12260 }, { "epoch": 0.09069808698737471, "grad_norm": 0.08742814511060715, "learning_rate": 4.563375474833808e-05, "loss": 0.0206, "step": 12270 }, { "epoch": 0.09077200555867657, "grad_norm": 0.17362412810325623, "learning_rate": 4.563004510921178e-05, "loss": 0.0202, "step": 12280 }, { "epoch": 0.09084592412997841, "grad_norm": 0.06865504384040833, "learning_rate": 4.5626335470085474e-05, "loss": 0.0202, "step": 12290 }, { "epoch": 0.09091984270128027, "grad_norm": 0.0832584798336029, "learning_rate": 4.562262583095916e-05, "loss": 0.0235, "step": 12300 }, { "epoch": 0.09099376127258213, "grad_norm": 0.10800887644290924, "learning_rate": 4.561891619183286e-05, "loss": 0.0199, "step": 12310 }, { "epoch": 0.09106767984388398, "grad_norm": 0.07915148884057999, "learning_rate": 4.5615206552706555e-05, "loss": 0.0203, "step": 12320 }, { "epoch": 0.09114159841518583, "grad_norm": 0.09770840406417847, "learning_rate": 4.561149691358025e-05, "loss": 0.0217, "step": 12330 }, { "epoch": 0.09121551698648768, "grad_norm": 0.13962043821811676, "learning_rate": 4.560778727445395e-05, "loss": 0.0235, "step": 12340 }, { "epoch": 0.09128943555778954, "grad_norm": 0.08580047637224197, "learning_rate": 4.5604077635327636e-05, "loss": 0.0204, "step": 12350 }, { "epoch": 0.0913633541290914, "grad_norm": 0.07657952606678009, "learning_rate": 4.560036799620133e-05, "loss": 0.0214, "step": 12360 }, { "epoch": 0.09143727270039324, "grad_norm": 0.09985584765672684, "learning_rate": 4.559665835707503e-05, "loss": 0.0203, "step": 12370 }, { "epoch": 0.0915111912716951, "grad_norm": 0.14025211334228516, "learning_rate": 4.559294871794872e-05, "loss": 0.0224, "step": 12380 }, { "epoch": 0.09158510984299695, "grad_norm": 0.10704502463340759, "learning_rate": 4.558923907882241e-05, "loss": 0.0181, "step": 12390 }, { "epoch": 0.09165902841429881, "grad_norm": 0.12064116448163986, "learning_rate": 4.558552943969611e-05, "loss": 0.0217, "step": 12400 }, { "epoch": 0.09173294698560067, "grad_norm": 0.09326235204935074, "learning_rate": 4.5581819800569805e-05, "loss": 0.0192, "step": 12410 }, { "epoch": 0.09180686555690251, "grad_norm": 0.1293174922466278, "learning_rate": 4.55781101614435e-05, "loss": 0.0208, "step": 12420 }, { "epoch": 0.09188078412820437, "grad_norm": 0.09565316885709763, "learning_rate": 4.557440052231719e-05, "loss": 0.0204, "step": 12430 }, { "epoch": 0.09195470269950623, "grad_norm": 0.09142905473709106, "learning_rate": 4.5570690883190886e-05, "loss": 0.0224, "step": 12440 }, { "epoch": 0.09202862127080808, "grad_norm": 0.08464720845222473, "learning_rate": 4.5566981244064576e-05, "loss": 0.0208, "step": 12450 }, { "epoch": 0.09210253984210993, "grad_norm": 0.10270611196756363, "learning_rate": 4.556327160493827e-05, "loss": 0.0245, "step": 12460 }, { "epoch": 0.09217645841341178, "grad_norm": 0.0861668661236763, "learning_rate": 4.555956196581197e-05, "loss": 0.0204, "step": 12470 }, { "epoch": 0.09225037698471364, "grad_norm": 0.1521192193031311, "learning_rate": 4.5555852326685663e-05, "loss": 0.0211, "step": 12480 }, { "epoch": 0.0923242955560155, "grad_norm": 0.08717560023069382, "learning_rate": 4.555214268755936e-05, "loss": 0.0187, "step": 12490 }, { "epoch": 0.09239821412731734, "grad_norm": 0.0908551886677742, "learning_rate": 4.554843304843305e-05, "loss": 0.0195, "step": 12500 }, { "epoch": 0.0924721326986192, "grad_norm": 0.09777882695198059, "learning_rate": 4.5544723409306745e-05, "loss": 0.0211, "step": 12510 }, { "epoch": 0.09254605126992106, "grad_norm": 0.06726015359163284, "learning_rate": 4.554101377018044e-05, "loss": 0.0187, "step": 12520 }, { "epoch": 0.09261996984122291, "grad_norm": 0.08230742812156677, "learning_rate": 4.553730413105413e-05, "loss": 0.0213, "step": 12530 }, { "epoch": 0.09269388841252477, "grad_norm": 0.0912739634513855, "learning_rate": 4.5533594491927826e-05, "loss": 0.0224, "step": 12540 }, { "epoch": 0.09276780698382661, "grad_norm": 0.10400039702653885, "learning_rate": 4.552988485280152e-05, "loss": 0.0198, "step": 12550 }, { "epoch": 0.09284172555512847, "grad_norm": 0.07109177112579346, "learning_rate": 4.552617521367522e-05, "loss": 0.0198, "step": 12560 }, { "epoch": 0.09291564412643033, "grad_norm": 0.11673447489738464, "learning_rate": 4.5522465574548914e-05, "loss": 0.0216, "step": 12570 }, { "epoch": 0.09298956269773218, "grad_norm": 0.10010415315628052, "learning_rate": 4.55187559354226e-05, "loss": 0.0192, "step": 12580 }, { "epoch": 0.09306348126903403, "grad_norm": 0.12168619781732559, "learning_rate": 4.55150462962963e-05, "loss": 0.0249, "step": 12590 }, { "epoch": 0.09313739984033588, "grad_norm": 0.10904297232627869, "learning_rate": 4.5511336657169995e-05, "loss": 0.0221, "step": 12600 }, { "epoch": 0.09321131841163774, "grad_norm": 0.07244222611188889, "learning_rate": 4.5507627018043684e-05, "loss": 0.0208, "step": 12610 }, { "epoch": 0.0932852369829396, "grad_norm": 0.12043386697769165, "learning_rate": 4.550391737891738e-05, "loss": 0.0208, "step": 12620 }, { "epoch": 0.09335915555424144, "grad_norm": 0.08687592297792435, "learning_rate": 4.5500207739791076e-05, "loss": 0.0215, "step": 12630 }, { "epoch": 0.0934330741255433, "grad_norm": 0.11865407228469849, "learning_rate": 4.549649810066477e-05, "loss": 0.0208, "step": 12640 }, { "epoch": 0.09350699269684516, "grad_norm": 0.10706440359354019, "learning_rate": 4.549278846153847e-05, "loss": 0.0201, "step": 12650 }, { "epoch": 0.09358091126814701, "grad_norm": 0.08515679836273193, "learning_rate": 4.548907882241216e-05, "loss": 0.02, "step": 12660 }, { "epoch": 0.09365482983944887, "grad_norm": 0.1031784862279892, "learning_rate": 4.548536918328585e-05, "loss": 0.0208, "step": 12670 }, { "epoch": 0.09372874841075071, "grad_norm": 0.09280925989151001, "learning_rate": 4.548165954415954e-05, "loss": 0.0187, "step": 12680 }, { "epoch": 0.09380266698205257, "grad_norm": 0.09347113966941833, "learning_rate": 4.547794990503324e-05, "loss": 0.0197, "step": 12690 }, { "epoch": 0.09387658555335443, "grad_norm": 0.0832105502486229, "learning_rate": 4.5474240265906934e-05, "loss": 0.021, "step": 12700 }, { "epoch": 0.09395050412465628, "grad_norm": 0.11361734569072723, "learning_rate": 4.547053062678063e-05, "loss": 0.0206, "step": 12710 }, { "epoch": 0.09402442269595813, "grad_norm": 0.06806913763284683, "learning_rate": 4.5466820987654326e-05, "loss": 0.0199, "step": 12720 }, { "epoch": 0.09409834126725998, "grad_norm": 0.07145105302333832, "learning_rate": 4.5463111348528015e-05, "loss": 0.0216, "step": 12730 }, { "epoch": 0.09417225983856184, "grad_norm": 0.10840686410665512, "learning_rate": 4.545940170940171e-05, "loss": 0.0216, "step": 12740 }, { "epoch": 0.0942461784098637, "grad_norm": 0.06309173256158829, "learning_rate": 4.545569207027541e-05, "loss": 0.0222, "step": 12750 }, { "epoch": 0.09432009698116554, "grad_norm": 0.08596502989530563, "learning_rate": 4.5451982431149096e-05, "loss": 0.0194, "step": 12760 }, { "epoch": 0.0943940155524674, "grad_norm": 0.10800420492887497, "learning_rate": 4.544827279202279e-05, "loss": 0.0205, "step": 12770 }, { "epoch": 0.09446793412376926, "grad_norm": 0.11876120418310165, "learning_rate": 4.544456315289649e-05, "loss": 0.0199, "step": 12780 }, { "epoch": 0.09454185269507111, "grad_norm": 0.10091854631900787, "learning_rate": 4.5440853513770184e-05, "loss": 0.0213, "step": 12790 }, { "epoch": 0.09461577126637297, "grad_norm": 0.09491269290447235, "learning_rate": 4.543714387464388e-05, "loss": 0.0181, "step": 12800 }, { "epoch": 0.09468968983767481, "grad_norm": 0.09774453192949295, "learning_rate": 4.543343423551757e-05, "loss": 0.0212, "step": 12810 }, { "epoch": 0.09476360840897667, "grad_norm": 0.0799957811832428, "learning_rate": 4.5429724596391265e-05, "loss": 0.021, "step": 12820 }, { "epoch": 0.09483752698027853, "grad_norm": 0.10638774931430817, "learning_rate": 4.542601495726496e-05, "loss": 0.0224, "step": 12830 }, { "epoch": 0.09491144555158038, "grad_norm": 0.06579312682151794, "learning_rate": 4.542230531813865e-05, "loss": 0.0212, "step": 12840 }, { "epoch": 0.09498536412288223, "grad_norm": 0.0943514034152031, "learning_rate": 4.5418595679012346e-05, "loss": 0.0222, "step": 12850 }, { "epoch": 0.09505928269418408, "grad_norm": 0.11735722422599792, "learning_rate": 4.541488603988604e-05, "loss": 0.0194, "step": 12860 }, { "epoch": 0.09513320126548594, "grad_norm": 0.09771939367055893, "learning_rate": 4.541117640075974e-05, "loss": 0.0213, "step": 12870 }, { "epoch": 0.0952071198367878, "grad_norm": 0.09291230142116547, "learning_rate": 4.5407466761633434e-05, "loss": 0.0211, "step": 12880 }, { "epoch": 0.09528103840808964, "grad_norm": 0.123878613114357, "learning_rate": 4.5403757122507124e-05, "loss": 0.0222, "step": 12890 }, { "epoch": 0.0953549569793915, "grad_norm": 0.11175885796546936, "learning_rate": 4.540004748338082e-05, "loss": 0.0214, "step": 12900 }, { "epoch": 0.09542887555069336, "grad_norm": 0.09758037328720093, "learning_rate": 4.539633784425451e-05, "loss": 0.0203, "step": 12910 }, { "epoch": 0.09550279412199521, "grad_norm": 0.1057739108800888, "learning_rate": 4.5392628205128205e-05, "loss": 0.0226, "step": 12920 }, { "epoch": 0.09557671269329707, "grad_norm": 0.08232392370700836, "learning_rate": 4.53889185660019e-05, "loss": 0.0229, "step": 12930 }, { "epoch": 0.09565063126459891, "grad_norm": 0.11738672107458115, "learning_rate": 4.5385208926875597e-05, "loss": 0.0218, "step": 12940 }, { "epoch": 0.09572454983590077, "grad_norm": 0.10136391222476959, "learning_rate": 4.538149928774929e-05, "loss": 0.0223, "step": 12950 }, { "epoch": 0.09579846840720263, "grad_norm": 0.07713291794061661, "learning_rate": 4.537778964862298e-05, "loss": 0.022, "step": 12960 }, { "epoch": 0.09587238697850448, "grad_norm": 0.1363089680671692, "learning_rate": 4.537408000949668e-05, "loss": 0.0238, "step": 12970 }, { "epoch": 0.09594630554980633, "grad_norm": 0.08520155400037766, "learning_rate": 4.5370370370370374e-05, "loss": 0.0217, "step": 12980 }, { "epoch": 0.0960202241211082, "grad_norm": 0.09474644809961319, "learning_rate": 4.536666073124406e-05, "loss": 0.0175, "step": 12990 }, { "epoch": 0.09609414269241004, "grad_norm": 0.10518652945756912, "learning_rate": 4.536295109211776e-05, "loss": 0.0225, "step": 13000 }, { "epoch": 0.0961680612637119, "grad_norm": 0.1090826466679573, "learning_rate": 4.5359241452991455e-05, "loss": 0.0208, "step": 13010 }, { "epoch": 0.09624197983501374, "grad_norm": 0.09654388576745987, "learning_rate": 4.535553181386515e-05, "loss": 0.0193, "step": 13020 }, { "epoch": 0.0963158984063156, "grad_norm": 0.06633268296718597, "learning_rate": 4.535182217473885e-05, "loss": 0.0177, "step": 13030 }, { "epoch": 0.09638981697761746, "grad_norm": 0.1015574038028717, "learning_rate": 4.5348112535612536e-05, "loss": 0.0204, "step": 13040 }, { "epoch": 0.09646373554891931, "grad_norm": 0.11916191875934601, "learning_rate": 4.534440289648623e-05, "loss": 0.0221, "step": 13050 }, { "epoch": 0.09653765412022117, "grad_norm": 0.09542952477931976, "learning_rate": 4.534069325735993e-05, "loss": 0.0208, "step": 13060 }, { "epoch": 0.09661157269152301, "grad_norm": 0.12832722067832947, "learning_rate": 4.533698361823362e-05, "loss": 0.0213, "step": 13070 }, { "epoch": 0.09668549126282487, "grad_norm": 0.10564541071653366, "learning_rate": 4.533327397910731e-05, "loss": 0.0217, "step": 13080 }, { "epoch": 0.09675940983412673, "grad_norm": 0.08311822265386581, "learning_rate": 4.532956433998101e-05, "loss": 0.0206, "step": 13090 }, { "epoch": 0.09683332840542858, "grad_norm": 0.10254728049039841, "learning_rate": 4.5325854700854705e-05, "loss": 0.0237, "step": 13100 }, { "epoch": 0.09690724697673044, "grad_norm": 0.07333148270845413, "learning_rate": 4.53221450617284e-05, "loss": 0.0186, "step": 13110 }, { "epoch": 0.0969811655480323, "grad_norm": 0.1466401070356369, "learning_rate": 4.531843542260209e-05, "loss": 0.0189, "step": 13120 }, { "epoch": 0.09705508411933414, "grad_norm": 0.09987608343362808, "learning_rate": 4.5314725783475786e-05, "loss": 0.0216, "step": 13130 }, { "epoch": 0.097129002690636, "grad_norm": 0.09517742693424225, "learning_rate": 4.5311016144349475e-05, "loss": 0.0212, "step": 13140 }, { "epoch": 0.09720292126193784, "grad_norm": 0.09622327238321304, "learning_rate": 4.530730650522317e-05, "loss": 0.0237, "step": 13150 }, { "epoch": 0.0972768398332397, "grad_norm": 0.125885471701622, "learning_rate": 4.530359686609687e-05, "loss": 0.0219, "step": 13160 }, { "epoch": 0.09735075840454156, "grad_norm": 0.11400435119867325, "learning_rate": 4.529988722697056e-05, "loss": 0.0187, "step": 13170 }, { "epoch": 0.09742467697584341, "grad_norm": 0.0999847948551178, "learning_rate": 4.529617758784426e-05, "loss": 0.0204, "step": 13180 }, { "epoch": 0.09749859554714527, "grad_norm": 0.10056626051664352, "learning_rate": 4.529246794871795e-05, "loss": 0.0196, "step": 13190 }, { "epoch": 0.09757251411844711, "grad_norm": 0.09070125222206116, "learning_rate": 4.5288758309591644e-05, "loss": 0.021, "step": 13200 }, { "epoch": 0.09764643268974897, "grad_norm": 0.1022719293832779, "learning_rate": 4.528504867046534e-05, "loss": 0.0203, "step": 13210 }, { "epoch": 0.09772035126105083, "grad_norm": 0.11356259137392044, "learning_rate": 4.528133903133903e-05, "loss": 0.0201, "step": 13220 }, { "epoch": 0.09779426983235268, "grad_norm": 0.10243353992700577, "learning_rate": 4.5277629392212725e-05, "loss": 0.019, "step": 13230 }, { "epoch": 0.09786818840365454, "grad_norm": 0.08349420875310898, "learning_rate": 4.527391975308642e-05, "loss": 0.0228, "step": 13240 }, { "epoch": 0.0979421069749564, "grad_norm": 0.07490584254264832, "learning_rate": 4.527021011396012e-05, "loss": 0.0188, "step": 13250 }, { "epoch": 0.09801602554625824, "grad_norm": 0.11197661608457565, "learning_rate": 4.526650047483381e-05, "loss": 0.019, "step": 13260 }, { "epoch": 0.0980899441175601, "grad_norm": 0.10814948379993439, "learning_rate": 4.52627908357075e-05, "loss": 0.0214, "step": 13270 }, { "epoch": 0.09816386268886194, "grad_norm": 0.13824844360351562, "learning_rate": 4.52590811965812e-05, "loss": 0.0226, "step": 13280 }, { "epoch": 0.0982377812601638, "grad_norm": 0.09959527105093002, "learning_rate": 4.5255371557454894e-05, "loss": 0.0223, "step": 13290 }, { "epoch": 0.09831169983146566, "grad_norm": 0.0893290713429451, "learning_rate": 4.5251661918328584e-05, "loss": 0.0219, "step": 13300 }, { "epoch": 0.09838561840276751, "grad_norm": 0.15800920128822327, "learning_rate": 4.524795227920228e-05, "loss": 0.0236, "step": 13310 }, { "epoch": 0.09845953697406937, "grad_norm": 0.08085887879133224, "learning_rate": 4.5244242640075976e-05, "loss": 0.0224, "step": 13320 }, { "epoch": 0.09853345554537121, "grad_norm": 0.07695917040109634, "learning_rate": 4.524053300094967e-05, "loss": 0.0214, "step": 13330 }, { "epoch": 0.09860737411667307, "grad_norm": 0.0860675647854805, "learning_rate": 4.523682336182337e-05, "loss": 0.0167, "step": 13340 }, { "epoch": 0.09868129268797493, "grad_norm": 0.10916229337453842, "learning_rate": 4.523311372269706e-05, "loss": 0.0214, "step": 13350 }, { "epoch": 0.09875521125927678, "grad_norm": 0.09651117026805878, "learning_rate": 4.522940408357075e-05, "loss": 0.0205, "step": 13360 }, { "epoch": 0.09882912983057864, "grad_norm": 0.10305950790643692, "learning_rate": 4.522569444444444e-05, "loss": 0.023, "step": 13370 }, { "epoch": 0.0989030484018805, "grad_norm": 0.12070255726575851, "learning_rate": 4.522198480531814e-05, "loss": 0.0209, "step": 13380 }, { "epoch": 0.09897696697318234, "grad_norm": 0.07445371896028519, "learning_rate": 4.5218275166191834e-05, "loss": 0.0202, "step": 13390 }, { "epoch": 0.0990508855444842, "grad_norm": 0.09945213794708252, "learning_rate": 4.521456552706553e-05, "loss": 0.0206, "step": 13400 }, { "epoch": 0.09912480411578604, "grad_norm": 0.0881875604391098, "learning_rate": 4.5210855887939226e-05, "loss": 0.0204, "step": 13410 }, { "epoch": 0.0991987226870879, "grad_norm": 0.07720271497964859, "learning_rate": 4.5207146248812915e-05, "loss": 0.0195, "step": 13420 }, { "epoch": 0.09927264125838976, "grad_norm": 0.09193691611289978, "learning_rate": 4.520343660968661e-05, "loss": 0.0205, "step": 13430 }, { "epoch": 0.09934655982969161, "grad_norm": 0.0846005380153656, "learning_rate": 4.519972697056031e-05, "loss": 0.022, "step": 13440 }, { "epoch": 0.09942047840099347, "grad_norm": 0.10466866195201874, "learning_rate": 4.5196017331433996e-05, "loss": 0.0182, "step": 13450 }, { "epoch": 0.09949439697229531, "grad_norm": 0.07748100161552429, "learning_rate": 4.519230769230769e-05, "loss": 0.0205, "step": 13460 }, { "epoch": 0.09956831554359717, "grad_norm": 0.10007157176733017, "learning_rate": 4.518859805318139e-05, "loss": 0.0224, "step": 13470 }, { "epoch": 0.09964223411489903, "grad_norm": 0.08469849824905396, "learning_rate": 4.5184888414055084e-05, "loss": 0.0232, "step": 13480 }, { "epoch": 0.09971615268620088, "grad_norm": 0.09315604716539383, "learning_rate": 4.518117877492878e-05, "loss": 0.0177, "step": 13490 }, { "epoch": 0.09979007125750274, "grad_norm": 0.08141258358955383, "learning_rate": 4.517746913580247e-05, "loss": 0.0201, "step": 13500 }, { "epoch": 0.0998639898288046, "grad_norm": 0.09147261083126068, "learning_rate": 4.5173759496676165e-05, "loss": 0.0216, "step": 13510 }, { "epoch": 0.09993790840010644, "grad_norm": 0.07396159321069717, "learning_rate": 4.517004985754986e-05, "loss": 0.0181, "step": 13520 }, { "epoch": 0.1000118269714083, "grad_norm": 0.104294054210186, "learning_rate": 4.516634021842355e-05, "loss": 0.0202, "step": 13530 }, { "epoch": 0.10008574554271014, "grad_norm": 0.08349345624446869, "learning_rate": 4.5162630579297246e-05, "loss": 0.0205, "step": 13540 }, { "epoch": 0.100159664114012, "grad_norm": 0.08465448766946793, "learning_rate": 4.515892094017094e-05, "loss": 0.0205, "step": 13550 }, { "epoch": 0.10023358268531386, "grad_norm": 0.10030587017536163, "learning_rate": 4.515521130104464e-05, "loss": 0.0207, "step": 13560 }, { "epoch": 0.10030750125661571, "grad_norm": 0.07631184905767441, "learning_rate": 4.5151501661918334e-05, "loss": 0.0206, "step": 13570 }, { "epoch": 0.10038141982791757, "grad_norm": 0.11903389543294907, "learning_rate": 4.514779202279202e-05, "loss": 0.0224, "step": 13580 }, { "epoch": 0.10045533839921943, "grad_norm": 0.08355337381362915, "learning_rate": 4.514408238366572e-05, "loss": 0.022, "step": 13590 }, { "epoch": 0.10052925697052127, "grad_norm": 0.07425417751073837, "learning_rate": 4.514037274453941e-05, "loss": 0.0182, "step": 13600 }, { "epoch": 0.10060317554182313, "grad_norm": 0.09235212951898575, "learning_rate": 4.5136663105413104e-05, "loss": 0.0203, "step": 13610 }, { "epoch": 0.10067709411312498, "grad_norm": 0.09667506068944931, "learning_rate": 4.51329534662868e-05, "loss": 0.0218, "step": 13620 }, { "epoch": 0.10075101268442684, "grad_norm": 0.09671285003423691, "learning_rate": 4.5129243827160496e-05, "loss": 0.0255, "step": 13630 }, { "epoch": 0.1008249312557287, "grad_norm": 0.10232903808355331, "learning_rate": 4.512553418803419e-05, "loss": 0.0226, "step": 13640 }, { "epoch": 0.10089884982703054, "grad_norm": 0.08583911508321762, "learning_rate": 4.512182454890788e-05, "loss": 0.0212, "step": 13650 }, { "epoch": 0.1009727683983324, "grad_norm": 0.10278382152318954, "learning_rate": 4.511811490978158e-05, "loss": 0.0203, "step": 13660 }, { "epoch": 0.10104668696963424, "grad_norm": 0.09178370237350464, "learning_rate": 4.5114405270655273e-05, "loss": 0.0186, "step": 13670 }, { "epoch": 0.1011206055409361, "grad_norm": 0.08496574312448502, "learning_rate": 4.511069563152896e-05, "loss": 0.0196, "step": 13680 }, { "epoch": 0.10119452411223796, "grad_norm": 0.09893114119768143, "learning_rate": 4.510698599240266e-05, "loss": 0.0186, "step": 13690 }, { "epoch": 0.10126844268353981, "grad_norm": 0.1062416285276413, "learning_rate": 4.510327635327636e-05, "loss": 0.0203, "step": 13700 }, { "epoch": 0.10134236125484167, "grad_norm": 0.11008942872285843, "learning_rate": 4.509956671415005e-05, "loss": 0.0224, "step": 13710 }, { "epoch": 0.10141627982614353, "grad_norm": 0.09531824290752411, "learning_rate": 4.5095857075023746e-05, "loss": 0.0205, "step": 13720 }, { "epoch": 0.10149019839744537, "grad_norm": 0.09373880177736282, "learning_rate": 4.5092147435897436e-05, "loss": 0.0225, "step": 13730 }, { "epoch": 0.10156411696874723, "grad_norm": 0.09772521257400513, "learning_rate": 4.508843779677113e-05, "loss": 0.0209, "step": 13740 }, { "epoch": 0.10163803554004908, "grad_norm": 0.11807812750339508, "learning_rate": 4.508472815764483e-05, "loss": 0.0218, "step": 13750 }, { "epoch": 0.10171195411135094, "grad_norm": 0.09919760376214981, "learning_rate": 4.508101851851852e-05, "loss": 0.0213, "step": 13760 }, { "epoch": 0.1017858726826528, "grad_norm": 0.13111914694309235, "learning_rate": 4.507730887939221e-05, "loss": 0.02, "step": 13770 }, { "epoch": 0.10185979125395464, "grad_norm": 0.11977981775999069, "learning_rate": 4.507359924026591e-05, "loss": 0.0218, "step": 13780 }, { "epoch": 0.1019337098252565, "grad_norm": 0.10085966438055038, "learning_rate": 4.5069889601139605e-05, "loss": 0.0223, "step": 13790 }, { "epoch": 0.10200762839655834, "grad_norm": 0.09333731234073639, "learning_rate": 4.50661799620133e-05, "loss": 0.0221, "step": 13800 }, { "epoch": 0.1020815469678602, "grad_norm": 0.11282014846801758, "learning_rate": 4.506247032288699e-05, "loss": 0.0223, "step": 13810 }, { "epoch": 0.10215546553916206, "grad_norm": 0.09185537695884705, "learning_rate": 4.5058760683760686e-05, "loss": 0.0199, "step": 13820 }, { "epoch": 0.10222938411046391, "grad_norm": 0.1047159805893898, "learning_rate": 4.5055051044634375e-05, "loss": 0.0214, "step": 13830 }, { "epoch": 0.10230330268176577, "grad_norm": 0.09216520935297012, "learning_rate": 4.505134140550807e-05, "loss": 0.0222, "step": 13840 }, { "epoch": 0.10237722125306763, "grad_norm": 0.10986544191837311, "learning_rate": 4.5047631766381774e-05, "loss": 0.0231, "step": 13850 }, { "epoch": 0.10245113982436947, "grad_norm": 0.1287481188774109, "learning_rate": 4.504392212725546e-05, "loss": 0.0206, "step": 13860 }, { "epoch": 0.10252505839567133, "grad_norm": 0.11915592104196548, "learning_rate": 4.504021248812916e-05, "loss": 0.0213, "step": 13870 }, { "epoch": 0.10259897696697318, "grad_norm": 0.10342303663492203, "learning_rate": 4.503650284900285e-05, "loss": 0.0211, "step": 13880 }, { "epoch": 0.10267289553827504, "grad_norm": 0.10142095386981964, "learning_rate": 4.5032793209876544e-05, "loss": 0.0206, "step": 13890 }, { "epoch": 0.1027468141095769, "grad_norm": 0.11021839082241058, "learning_rate": 4.502908357075024e-05, "loss": 0.0227, "step": 13900 }, { "epoch": 0.10282073268087874, "grad_norm": 0.09386792778968811, "learning_rate": 4.502537393162393e-05, "loss": 0.0216, "step": 13910 }, { "epoch": 0.1028946512521806, "grad_norm": 0.09008444100618362, "learning_rate": 4.5021664292497625e-05, "loss": 0.0241, "step": 13920 }, { "epoch": 0.10296856982348244, "grad_norm": 0.1056261882185936, "learning_rate": 4.501795465337133e-05, "loss": 0.0205, "step": 13930 }, { "epoch": 0.1030424883947843, "grad_norm": 0.09542742371559143, "learning_rate": 4.501424501424502e-05, "loss": 0.0189, "step": 13940 }, { "epoch": 0.10311640696608616, "grad_norm": 0.08561396598815918, "learning_rate": 4.501053537511871e-05, "loss": 0.0186, "step": 13950 }, { "epoch": 0.10319032553738801, "grad_norm": 0.06652536988258362, "learning_rate": 4.50068257359924e-05, "loss": 0.0179, "step": 13960 }, { "epoch": 0.10326424410868987, "grad_norm": 0.08740688860416412, "learning_rate": 4.50031160968661e-05, "loss": 0.0204, "step": 13970 }, { "epoch": 0.10333816267999173, "grad_norm": 0.1050075963139534, "learning_rate": 4.4999406457739794e-05, "loss": 0.021, "step": 13980 }, { "epoch": 0.10341208125129357, "grad_norm": 0.128191277384758, "learning_rate": 4.499569681861348e-05, "loss": 0.022, "step": 13990 }, { "epoch": 0.10348599982259543, "grad_norm": 0.0734374076128006, "learning_rate": 4.4991987179487186e-05, "loss": 0.02, "step": 14000 }, { "epoch": 0.10355991839389728, "grad_norm": 0.07442174106836319, "learning_rate": 4.4988277540360875e-05, "loss": 0.0174, "step": 14010 }, { "epoch": 0.10363383696519914, "grad_norm": 0.10080066323280334, "learning_rate": 4.498456790123457e-05, "loss": 0.0212, "step": 14020 }, { "epoch": 0.103707755536501, "grad_norm": 0.12612009048461914, "learning_rate": 4.498085826210827e-05, "loss": 0.0205, "step": 14030 }, { "epoch": 0.10378167410780284, "grad_norm": 0.08199171721935272, "learning_rate": 4.4977148622981956e-05, "loss": 0.023, "step": 14040 }, { "epoch": 0.1038555926791047, "grad_norm": 0.08755198121070862, "learning_rate": 4.497343898385565e-05, "loss": 0.0219, "step": 14050 }, { "epoch": 0.10392951125040656, "grad_norm": 0.10621949285268784, "learning_rate": 4.496972934472934e-05, "loss": 0.0212, "step": 14060 }, { "epoch": 0.1040034298217084, "grad_norm": 0.08471550047397614, "learning_rate": 4.496601970560304e-05, "loss": 0.0219, "step": 14070 }, { "epoch": 0.10407734839301026, "grad_norm": 0.07074908912181854, "learning_rate": 4.496231006647674e-05, "loss": 0.0182, "step": 14080 }, { "epoch": 0.10415126696431211, "grad_norm": 0.12259144335985184, "learning_rate": 4.495860042735043e-05, "loss": 0.0203, "step": 14090 }, { "epoch": 0.10422518553561397, "grad_norm": 0.06853964924812317, "learning_rate": 4.4954890788224125e-05, "loss": 0.0192, "step": 14100 }, { "epoch": 0.10429910410691583, "grad_norm": 0.10010594129562378, "learning_rate": 4.4951181149097815e-05, "loss": 0.0211, "step": 14110 }, { "epoch": 0.10437302267821767, "grad_norm": 0.09315397590398788, "learning_rate": 4.494747150997151e-05, "loss": 0.0246, "step": 14120 }, { "epoch": 0.10444694124951953, "grad_norm": 0.07536359876394272, "learning_rate": 4.4943761870845207e-05, "loss": 0.0201, "step": 14130 }, { "epoch": 0.10452085982082138, "grad_norm": 0.08098624646663666, "learning_rate": 4.4940052231718896e-05, "loss": 0.0211, "step": 14140 }, { "epoch": 0.10459477839212324, "grad_norm": 0.08475301414728165, "learning_rate": 4.49363425925926e-05, "loss": 0.0204, "step": 14150 }, { "epoch": 0.1046686969634251, "grad_norm": 0.10505864769220352, "learning_rate": 4.4932632953466294e-05, "loss": 0.0206, "step": 14160 }, { "epoch": 0.10474261553472694, "grad_norm": 0.08343085646629333, "learning_rate": 4.4928923314339984e-05, "loss": 0.0234, "step": 14170 }, { "epoch": 0.1048165341060288, "grad_norm": 0.10162326693534851, "learning_rate": 4.492521367521368e-05, "loss": 0.0232, "step": 14180 }, { "epoch": 0.10489045267733066, "grad_norm": 0.09022769331932068, "learning_rate": 4.492150403608737e-05, "loss": 0.0202, "step": 14190 }, { "epoch": 0.1049643712486325, "grad_norm": 0.12614984810352325, "learning_rate": 4.4917794396961065e-05, "loss": 0.0198, "step": 14200 }, { "epoch": 0.10503828981993436, "grad_norm": 0.07757915556430817, "learning_rate": 4.491408475783476e-05, "loss": 0.0197, "step": 14210 }, { "epoch": 0.10511220839123621, "grad_norm": 0.10445569455623627, "learning_rate": 4.491037511870845e-05, "loss": 0.0216, "step": 14220 }, { "epoch": 0.10518612696253807, "grad_norm": 0.08331640064716339, "learning_rate": 4.490666547958215e-05, "loss": 0.0204, "step": 14230 }, { "epoch": 0.10526004553383993, "grad_norm": 0.0920739397406578, "learning_rate": 4.490295584045584e-05, "loss": 0.02, "step": 14240 }, { "epoch": 0.10533396410514177, "grad_norm": 0.07688970863819122, "learning_rate": 4.489924620132954e-05, "loss": 0.0193, "step": 14250 }, { "epoch": 0.10540788267644363, "grad_norm": 0.09069108217954636, "learning_rate": 4.4895536562203234e-05, "loss": 0.0187, "step": 14260 }, { "epoch": 0.10548180124774548, "grad_norm": 0.10242221504449844, "learning_rate": 4.489182692307692e-05, "loss": 0.0232, "step": 14270 }, { "epoch": 0.10555571981904734, "grad_norm": 0.08866092562675476, "learning_rate": 4.488811728395062e-05, "loss": 0.021, "step": 14280 }, { "epoch": 0.1056296383903492, "grad_norm": 0.12382587045431137, "learning_rate": 4.488440764482431e-05, "loss": 0.0243, "step": 14290 }, { "epoch": 0.10570355696165104, "grad_norm": 0.09086389094591141, "learning_rate": 4.4880698005698004e-05, "loss": 0.0208, "step": 14300 }, { "epoch": 0.1057774755329529, "grad_norm": 0.0720980316400528, "learning_rate": 4.487698836657171e-05, "loss": 0.0216, "step": 14310 }, { "epoch": 0.10585139410425476, "grad_norm": 0.10160847753286362, "learning_rate": 4.4873278727445396e-05, "loss": 0.0198, "step": 14320 }, { "epoch": 0.1059253126755566, "grad_norm": 0.084689661860466, "learning_rate": 4.486956908831909e-05, "loss": 0.0197, "step": 14330 }, { "epoch": 0.10599923124685846, "grad_norm": 0.09439302235841751, "learning_rate": 4.486585944919278e-05, "loss": 0.0226, "step": 14340 }, { "epoch": 0.10607314981816031, "grad_norm": 0.11122968792915344, "learning_rate": 4.486214981006648e-05, "loss": 0.0246, "step": 14350 }, { "epoch": 0.10614706838946217, "grad_norm": 0.11770051717758179, "learning_rate": 4.485844017094017e-05, "loss": 0.0226, "step": 14360 }, { "epoch": 0.10622098696076403, "grad_norm": 0.0702885165810585, "learning_rate": 4.485473053181386e-05, "loss": 0.0204, "step": 14370 }, { "epoch": 0.10629490553206587, "grad_norm": 0.11021970957517624, "learning_rate": 4.4851020892687565e-05, "loss": 0.0212, "step": 14380 }, { "epoch": 0.10636882410336773, "grad_norm": 0.089713916182518, "learning_rate": 4.484731125356126e-05, "loss": 0.021, "step": 14390 }, { "epoch": 0.10644274267466958, "grad_norm": 0.08593543618917465, "learning_rate": 4.484360161443495e-05, "loss": 0.0238, "step": 14400 }, { "epoch": 0.10651666124597144, "grad_norm": 0.07170099765062332, "learning_rate": 4.4839891975308646e-05, "loss": 0.0214, "step": 14410 }, { "epoch": 0.1065905798172733, "grad_norm": 0.09407330304384232, "learning_rate": 4.4836182336182335e-05, "loss": 0.02, "step": 14420 }, { "epoch": 0.10666449838857514, "grad_norm": 0.09657812863588333, "learning_rate": 4.483247269705603e-05, "loss": 0.0197, "step": 14430 }, { "epoch": 0.106738416959877, "grad_norm": 0.05521457642316818, "learning_rate": 4.482876305792973e-05, "loss": 0.0181, "step": 14440 }, { "epoch": 0.10681233553117886, "grad_norm": 0.10616671293973923, "learning_rate": 4.4825053418803417e-05, "loss": 0.0219, "step": 14450 }, { "epoch": 0.1068862541024807, "grad_norm": 0.08795152604579926, "learning_rate": 4.482134377967712e-05, "loss": 0.0229, "step": 14460 }, { "epoch": 0.10696017267378256, "grad_norm": 0.1144707202911377, "learning_rate": 4.481763414055081e-05, "loss": 0.0192, "step": 14470 }, { "epoch": 0.10703409124508441, "grad_norm": 0.1001119464635849, "learning_rate": 4.4813924501424504e-05, "loss": 0.0198, "step": 14480 }, { "epoch": 0.10710800981638627, "grad_norm": 0.08099383115768433, "learning_rate": 4.48102148622982e-05, "loss": 0.021, "step": 14490 }, { "epoch": 0.10718192838768813, "grad_norm": 0.12439311295747757, "learning_rate": 4.480650522317189e-05, "loss": 0.0229, "step": 14500 }, { "epoch": 0.10725584695898997, "grad_norm": 0.076289102435112, "learning_rate": 4.4802795584045586e-05, "loss": 0.0188, "step": 14510 }, { "epoch": 0.10732976553029183, "grad_norm": 0.08145805448293686, "learning_rate": 4.4799085944919275e-05, "loss": 0.0197, "step": 14520 }, { "epoch": 0.10740368410159369, "grad_norm": 0.09102274477481842, "learning_rate": 4.479537630579298e-05, "loss": 0.0205, "step": 14530 }, { "epoch": 0.10747760267289554, "grad_norm": 0.09698771685361862, "learning_rate": 4.4791666666666673e-05, "loss": 0.0194, "step": 14540 }, { "epoch": 0.1075515212441974, "grad_norm": 0.11331616342067719, "learning_rate": 4.478795702754036e-05, "loss": 0.02, "step": 14550 }, { "epoch": 0.10762543981549924, "grad_norm": 0.09942898899316788, "learning_rate": 4.478424738841406e-05, "loss": 0.0189, "step": 14560 }, { "epoch": 0.1076993583868011, "grad_norm": 0.11539186537265778, "learning_rate": 4.478053774928775e-05, "loss": 0.0233, "step": 14570 }, { "epoch": 0.10777327695810296, "grad_norm": 0.08130382001399994, "learning_rate": 4.4776828110161444e-05, "loss": 0.0205, "step": 14580 }, { "epoch": 0.1078471955294048, "grad_norm": 0.09737993031740189, "learning_rate": 4.477311847103514e-05, "loss": 0.0228, "step": 14590 }, { "epoch": 0.10792111410070666, "grad_norm": 0.10642804205417633, "learning_rate": 4.476940883190883e-05, "loss": 0.0233, "step": 14600 }, { "epoch": 0.10799503267200851, "grad_norm": 0.08564222604036331, "learning_rate": 4.476569919278253e-05, "loss": 0.0194, "step": 14610 }, { "epoch": 0.10806895124331037, "grad_norm": 0.07180308550596237, "learning_rate": 4.476198955365623e-05, "loss": 0.0213, "step": 14620 }, { "epoch": 0.10814286981461223, "grad_norm": 0.08087430894374847, "learning_rate": 4.475827991452992e-05, "loss": 0.0185, "step": 14630 }, { "epoch": 0.10821678838591407, "grad_norm": 0.08423203974962234, "learning_rate": 4.475457027540361e-05, "loss": 0.018, "step": 14640 }, { "epoch": 0.10829070695721593, "grad_norm": 0.08719424903392792, "learning_rate": 4.47508606362773e-05, "loss": 0.0218, "step": 14650 }, { "epoch": 0.10836462552851779, "grad_norm": 0.08597151190042496, "learning_rate": 4.4747150997151e-05, "loss": 0.0205, "step": 14660 }, { "epoch": 0.10843854409981964, "grad_norm": 0.07289619743824005, "learning_rate": 4.4743441358024694e-05, "loss": 0.0217, "step": 14670 }, { "epoch": 0.1085124626711215, "grad_norm": 0.10662366449832916, "learning_rate": 4.473973171889839e-05, "loss": 0.0201, "step": 14680 }, { "epoch": 0.10858638124242334, "grad_norm": 0.0755082443356514, "learning_rate": 4.4736022079772086e-05, "loss": 0.0202, "step": 14690 }, { "epoch": 0.1086602998137252, "grad_norm": 0.07999914884567261, "learning_rate": 4.4732312440645775e-05, "loss": 0.0188, "step": 14700 }, { "epoch": 0.10873421838502706, "grad_norm": 0.09803368896245956, "learning_rate": 4.472860280151947e-05, "loss": 0.0206, "step": 14710 }, { "epoch": 0.1088081369563289, "grad_norm": 0.09981626272201538, "learning_rate": 4.472489316239317e-05, "loss": 0.021, "step": 14720 }, { "epoch": 0.10888205552763076, "grad_norm": 0.11182073503732681, "learning_rate": 4.4721183523266856e-05, "loss": 0.0218, "step": 14730 }, { "epoch": 0.10895597409893261, "grad_norm": 0.0778021365404129, "learning_rate": 4.471747388414055e-05, "loss": 0.0174, "step": 14740 }, { "epoch": 0.10902989267023447, "grad_norm": 0.10926984250545502, "learning_rate": 4.471376424501424e-05, "loss": 0.0232, "step": 14750 }, { "epoch": 0.10910381124153633, "grad_norm": 0.09038849174976349, "learning_rate": 4.4710054605887944e-05, "loss": 0.0225, "step": 14760 }, { "epoch": 0.10917772981283817, "grad_norm": 0.11312000453472137, "learning_rate": 4.470634496676164e-05, "loss": 0.0204, "step": 14770 }, { "epoch": 0.10925164838414003, "grad_norm": 0.11730439215898514, "learning_rate": 4.470263532763533e-05, "loss": 0.0227, "step": 14780 }, { "epoch": 0.10932556695544189, "grad_norm": 0.10705330222845078, "learning_rate": 4.4698925688509025e-05, "loss": 0.023, "step": 14790 }, { "epoch": 0.10939948552674374, "grad_norm": 0.0921492725610733, "learning_rate": 4.4695216049382714e-05, "loss": 0.0225, "step": 14800 }, { "epoch": 0.1094734040980456, "grad_norm": 0.14325913786888123, "learning_rate": 4.469150641025641e-05, "loss": 0.0217, "step": 14810 }, { "epoch": 0.10954732266934744, "grad_norm": 0.08691754192113876, "learning_rate": 4.4687796771130106e-05, "loss": 0.0202, "step": 14820 }, { "epoch": 0.1096212412406493, "grad_norm": 0.07764049619436264, "learning_rate": 4.46840871320038e-05, "loss": 0.0209, "step": 14830 }, { "epoch": 0.10969515981195116, "grad_norm": 0.12285088002681732, "learning_rate": 4.46803774928775e-05, "loss": 0.02, "step": 14840 }, { "epoch": 0.109769078383253, "grad_norm": 0.09305769950151443, "learning_rate": 4.4676667853751194e-05, "loss": 0.0211, "step": 14850 }, { "epoch": 0.10984299695455486, "grad_norm": 0.09885692596435547, "learning_rate": 4.4672958214624883e-05, "loss": 0.0194, "step": 14860 }, { "epoch": 0.10991691552585671, "grad_norm": 0.10101620852947235, "learning_rate": 4.466924857549858e-05, "loss": 0.0202, "step": 14870 }, { "epoch": 0.10999083409715857, "grad_norm": 0.10346149653196335, "learning_rate": 4.466553893637227e-05, "loss": 0.0205, "step": 14880 }, { "epoch": 0.11006475266846043, "grad_norm": 0.0820925235748291, "learning_rate": 4.4661829297245965e-05, "loss": 0.0208, "step": 14890 }, { "epoch": 0.11013867123976227, "grad_norm": 0.08407224714756012, "learning_rate": 4.465811965811966e-05, "loss": 0.0181, "step": 14900 }, { "epoch": 0.11021258981106413, "grad_norm": 0.12362422794103622, "learning_rate": 4.4654410018993356e-05, "loss": 0.022, "step": 14910 }, { "epoch": 0.11028650838236599, "grad_norm": 0.13008932769298553, "learning_rate": 4.465070037986705e-05, "loss": 0.0212, "step": 14920 }, { "epoch": 0.11036042695366784, "grad_norm": 0.08195054531097412, "learning_rate": 4.464699074074074e-05, "loss": 0.0198, "step": 14930 }, { "epoch": 0.1104343455249697, "grad_norm": 0.12018315494060516, "learning_rate": 4.464328110161444e-05, "loss": 0.0234, "step": 14940 }, { "epoch": 0.11050826409627154, "grad_norm": 0.16510340571403503, "learning_rate": 4.4639571462488134e-05, "loss": 0.0203, "step": 14950 }, { "epoch": 0.1105821826675734, "grad_norm": 0.09181898087263107, "learning_rate": 4.463586182336182e-05, "loss": 0.0207, "step": 14960 }, { "epoch": 0.11065610123887526, "grad_norm": 0.052205272018909454, "learning_rate": 4.463215218423552e-05, "loss": 0.0187, "step": 14970 }, { "epoch": 0.1107300198101771, "grad_norm": 0.09707576036453247, "learning_rate": 4.4628442545109215e-05, "loss": 0.0205, "step": 14980 }, { "epoch": 0.11080393838147896, "grad_norm": 0.0807408019900322, "learning_rate": 4.462473290598291e-05, "loss": 0.0196, "step": 14990 }, { "epoch": 0.11087785695278082, "grad_norm": 0.11290616542100906, "learning_rate": 4.462102326685661e-05, "loss": 0.0199, "step": 15000 }, { "epoch": 0.11095177552408267, "grad_norm": 0.09845750033855438, "learning_rate": 4.4617313627730296e-05, "loss": 0.0197, "step": 15010 }, { "epoch": 0.11102569409538453, "grad_norm": 0.07282011210918427, "learning_rate": 4.461360398860399e-05, "loss": 0.0225, "step": 15020 }, { "epoch": 0.11109961266668637, "grad_norm": 0.09254945814609528, "learning_rate": 4.460989434947768e-05, "loss": 0.0185, "step": 15030 }, { "epoch": 0.11117353123798823, "grad_norm": 0.08924317359924316, "learning_rate": 4.460618471035138e-05, "loss": 0.021, "step": 15040 }, { "epoch": 0.11124744980929009, "grad_norm": 0.0864916741847992, "learning_rate": 4.460247507122507e-05, "loss": 0.0192, "step": 15050 }, { "epoch": 0.11132136838059194, "grad_norm": 0.10492896288633347, "learning_rate": 4.459876543209877e-05, "loss": 0.0197, "step": 15060 }, { "epoch": 0.1113952869518938, "grad_norm": 0.0884445384144783, "learning_rate": 4.4595055792972465e-05, "loss": 0.02, "step": 15070 }, { "epoch": 0.11146920552319564, "grad_norm": 0.09605001658201218, "learning_rate": 4.459134615384616e-05, "loss": 0.0238, "step": 15080 }, { "epoch": 0.1115431240944975, "grad_norm": 0.1008792594075203, "learning_rate": 4.458763651471985e-05, "loss": 0.0208, "step": 15090 }, { "epoch": 0.11161704266579936, "grad_norm": 0.0722879096865654, "learning_rate": 4.4583926875593546e-05, "loss": 0.0198, "step": 15100 }, { "epoch": 0.1116909612371012, "grad_norm": 0.09575898945331573, "learning_rate": 4.4580217236467235e-05, "loss": 0.0212, "step": 15110 }, { "epoch": 0.11176487980840306, "grad_norm": 0.08117416501045227, "learning_rate": 4.457650759734093e-05, "loss": 0.0218, "step": 15120 }, { "epoch": 0.11183879837970492, "grad_norm": 0.14386782050132751, "learning_rate": 4.457279795821463e-05, "loss": 0.0218, "step": 15130 }, { "epoch": 0.11191271695100677, "grad_norm": 0.09009039402008057, "learning_rate": 4.456908831908832e-05, "loss": 0.0197, "step": 15140 }, { "epoch": 0.11198663552230863, "grad_norm": 0.08412953466176987, "learning_rate": 4.456537867996202e-05, "loss": 0.0188, "step": 15150 }, { "epoch": 0.11206055409361047, "grad_norm": 0.13613708317279816, "learning_rate": 4.456166904083571e-05, "loss": 0.0205, "step": 15160 }, { "epoch": 0.11213447266491233, "grad_norm": 0.08158623427152634, "learning_rate": 4.4557959401709404e-05, "loss": 0.0218, "step": 15170 }, { "epoch": 0.11220839123621419, "grad_norm": 0.11550930142402649, "learning_rate": 4.45542497625831e-05, "loss": 0.0231, "step": 15180 }, { "epoch": 0.11228230980751604, "grad_norm": 0.09520114958286285, "learning_rate": 4.455054012345679e-05, "loss": 0.0191, "step": 15190 }, { "epoch": 0.1123562283788179, "grad_norm": 0.10363199561834335, "learning_rate": 4.4546830484330485e-05, "loss": 0.023, "step": 15200 }, { "epoch": 0.11243014695011974, "grad_norm": 0.08404412120580673, "learning_rate": 4.454312084520418e-05, "loss": 0.0224, "step": 15210 }, { "epoch": 0.1125040655214216, "grad_norm": 0.08799764513969421, "learning_rate": 4.453941120607788e-05, "loss": 0.021, "step": 15220 }, { "epoch": 0.11257798409272346, "grad_norm": 0.09482520073652267, "learning_rate": 4.453570156695157e-05, "loss": 0.0209, "step": 15230 }, { "epoch": 0.1126519026640253, "grad_norm": 0.10100306570529938, "learning_rate": 4.453199192782526e-05, "loss": 0.0223, "step": 15240 }, { "epoch": 0.11272582123532716, "grad_norm": 0.10380079597234726, "learning_rate": 4.452828228869896e-05, "loss": 0.021, "step": 15250 }, { "epoch": 0.11279973980662902, "grad_norm": 0.10141590982675552, "learning_rate": 4.452457264957265e-05, "loss": 0.0188, "step": 15260 }, { "epoch": 0.11287365837793087, "grad_norm": 0.14011965692043304, "learning_rate": 4.4520863010446344e-05, "loss": 0.0218, "step": 15270 }, { "epoch": 0.11294757694923273, "grad_norm": 0.10623647272586823, "learning_rate": 4.451715337132004e-05, "loss": 0.0218, "step": 15280 }, { "epoch": 0.11302149552053457, "grad_norm": 0.06410671770572662, "learning_rate": 4.4513443732193735e-05, "loss": 0.0189, "step": 15290 }, { "epoch": 0.11309541409183643, "grad_norm": 0.1001250296831131, "learning_rate": 4.450973409306743e-05, "loss": 0.0209, "step": 15300 }, { "epoch": 0.11316933266313829, "grad_norm": 0.09662005305290222, "learning_rate": 4.450602445394113e-05, "loss": 0.02, "step": 15310 }, { "epoch": 0.11324325123444014, "grad_norm": 0.09809233248233795, "learning_rate": 4.4502314814814817e-05, "loss": 0.0212, "step": 15320 }, { "epoch": 0.113317169805742, "grad_norm": 0.0838894173502922, "learning_rate": 4.449860517568851e-05, "loss": 0.0191, "step": 15330 }, { "epoch": 0.11339108837704384, "grad_norm": 0.08231469243764877, "learning_rate": 4.44948955365622e-05, "loss": 0.02, "step": 15340 }, { "epoch": 0.1134650069483457, "grad_norm": 0.09870616346597672, "learning_rate": 4.44911858974359e-05, "loss": 0.0192, "step": 15350 }, { "epoch": 0.11353892551964756, "grad_norm": 0.09969879686832428, "learning_rate": 4.4487476258309594e-05, "loss": 0.0212, "step": 15360 }, { "epoch": 0.1136128440909494, "grad_norm": 0.11603955179452896, "learning_rate": 4.448376661918329e-05, "loss": 0.0204, "step": 15370 }, { "epoch": 0.11368676266225126, "grad_norm": 0.09245004504919052, "learning_rate": 4.4480056980056986e-05, "loss": 0.0223, "step": 15380 }, { "epoch": 0.11376068123355312, "grad_norm": 0.11372753977775574, "learning_rate": 4.4476347340930675e-05, "loss": 0.0203, "step": 15390 }, { "epoch": 0.11383459980485497, "grad_norm": 0.08970644325017929, "learning_rate": 4.447263770180437e-05, "loss": 0.019, "step": 15400 }, { "epoch": 0.11390851837615683, "grad_norm": 0.13420316576957703, "learning_rate": 4.446892806267807e-05, "loss": 0.0208, "step": 15410 }, { "epoch": 0.11398243694745867, "grad_norm": 0.09992843866348267, "learning_rate": 4.4465218423551756e-05, "loss": 0.0192, "step": 15420 }, { "epoch": 0.11405635551876053, "grad_norm": 0.11263995617628098, "learning_rate": 4.446150878442545e-05, "loss": 0.0201, "step": 15430 }, { "epoch": 0.11413027409006239, "grad_norm": 0.10258904844522476, "learning_rate": 4.445779914529915e-05, "loss": 0.0193, "step": 15440 }, { "epoch": 0.11420419266136424, "grad_norm": 0.09087394177913666, "learning_rate": 4.4454089506172844e-05, "loss": 0.0192, "step": 15450 }, { "epoch": 0.1142781112326661, "grad_norm": 0.07284771651029587, "learning_rate": 4.445037986704654e-05, "loss": 0.0189, "step": 15460 }, { "epoch": 0.11435202980396796, "grad_norm": 0.14658352732658386, "learning_rate": 4.444667022792023e-05, "loss": 0.0202, "step": 15470 }, { "epoch": 0.1144259483752698, "grad_norm": 0.08134383708238602, "learning_rate": 4.4442960588793925e-05, "loss": 0.0191, "step": 15480 }, { "epoch": 0.11449986694657166, "grad_norm": 0.09578991681337357, "learning_rate": 4.4439250949667614e-05, "loss": 0.0197, "step": 15490 }, { "epoch": 0.1145737855178735, "grad_norm": 0.12194101512432098, "learning_rate": 4.443554131054131e-05, "loss": 0.0214, "step": 15500 }, { "epoch": 0.11464770408917536, "grad_norm": 0.12530599534511566, "learning_rate": 4.4431831671415006e-05, "loss": 0.0192, "step": 15510 }, { "epoch": 0.11472162266047722, "grad_norm": 0.10177845507860184, "learning_rate": 4.44281220322887e-05, "loss": 0.0183, "step": 15520 }, { "epoch": 0.11479554123177907, "grad_norm": 0.14865587651729584, "learning_rate": 4.44244123931624e-05, "loss": 0.0193, "step": 15530 }, { "epoch": 0.11486945980308093, "grad_norm": 0.12489147484302521, "learning_rate": 4.4420702754036094e-05, "loss": 0.0221, "step": 15540 }, { "epoch": 0.11494337837438277, "grad_norm": 0.09141378849744797, "learning_rate": 4.441699311490978e-05, "loss": 0.019, "step": 15550 }, { "epoch": 0.11501729694568463, "grad_norm": 0.10528656095266342, "learning_rate": 4.441328347578348e-05, "loss": 0.0196, "step": 15560 }, { "epoch": 0.11509121551698649, "grad_norm": 0.10016972571611404, "learning_rate": 4.440957383665717e-05, "loss": 0.0217, "step": 15570 }, { "epoch": 0.11516513408828834, "grad_norm": 0.11202121526002884, "learning_rate": 4.4405864197530864e-05, "loss": 0.0205, "step": 15580 }, { "epoch": 0.1152390526595902, "grad_norm": 0.1344679892063141, "learning_rate": 4.440215455840456e-05, "loss": 0.0217, "step": 15590 }, { "epoch": 0.11531297123089206, "grad_norm": 0.09601288288831711, "learning_rate": 4.4398444919278256e-05, "loss": 0.0203, "step": 15600 }, { "epoch": 0.1153868898021939, "grad_norm": 0.15555888414382935, "learning_rate": 4.439473528015195e-05, "loss": 0.0194, "step": 15610 }, { "epoch": 0.11546080837349576, "grad_norm": 0.11112314462661743, "learning_rate": 4.439102564102564e-05, "loss": 0.0211, "step": 15620 }, { "epoch": 0.1155347269447976, "grad_norm": 0.07886022329330444, "learning_rate": 4.438731600189934e-05, "loss": 0.0212, "step": 15630 }, { "epoch": 0.11560864551609946, "grad_norm": 0.1283668577671051, "learning_rate": 4.438360636277303e-05, "loss": 0.0213, "step": 15640 }, { "epoch": 0.11568256408740132, "grad_norm": 0.07390929013490677, "learning_rate": 4.437989672364672e-05, "loss": 0.0195, "step": 15650 }, { "epoch": 0.11575648265870317, "grad_norm": 0.1706046760082245, "learning_rate": 4.437618708452042e-05, "loss": 0.0223, "step": 15660 }, { "epoch": 0.11583040123000503, "grad_norm": 0.11129933595657349, "learning_rate": 4.4372477445394114e-05, "loss": 0.0206, "step": 15670 }, { "epoch": 0.11590431980130687, "grad_norm": 0.14023716747760773, "learning_rate": 4.436876780626781e-05, "loss": 0.0219, "step": 15680 }, { "epoch": 0.11597823837260873, "grad_norm": 0.10003252327442169, "learning_rate": 4.4365058167141506e-05, "loss": 0.0203, "step": 15690 }, { "epoch": 0.11605215694391059, "grad_norm": 0.11612115055322647, "learning_rate": 4.4361348528015196e-05, "loss": 0.0199, "step": 15700 }, { "epoch": 0.11612607551521244, "grad_norm": 0.11864006519317627, "learning_rate": 4.435763888888889e-05, "loss": 0.0232, "step": 15710 }, { "epoch": 0.1161999940865143, "grad_norm": 0.09690623730421066, "learning_rate": 4.435392924976258e-05, "loss": 0.0181, "step": 15720 }, { "epoch": 0.11627391265781616, "grad_norm": 0.13921846449375153, "learning_rate": 4.435021961063628e-05, "loss": 0.022, "step": 15730 }, { "epoch": 0.116347831229118, "grad_norm": 0.12367802113294601, "learning_rate": 4.434650997150997e-05, "loss": 0.0194, "step": 15740 }, { "epoch": 0.11642174980041986, "grad_norm": 0.1278046816587448, "learning_rate": 4.434280033238367e-05, "loss": 0.0196, "step": 15750 }, { "epoch": 0.1164956683717217, "grad_norm": 0.1132681667804718, "learning_rate": 4.4339090693257365e-05, "loss": 0.0196, "step": 15760 }, { "epoch": 0.11656958694302356, "grad_norm": 0.1054956465959549, "learning_rate": 4.433538105413106e-05, "loss": 0.0194, "step": 15770 }, { "epoch": 0.11664350551432542, "grad_norm": 0.11253593862056732, "learning_rate": 4.433167141500475e-05, "loss": 0.0204, "step": 15780 }, { "epoch": 0.11671742408562727, "grad_norm": 0.11990190297365189, "learning_rate": 4.4327961775878446e-05, "loss": 0.0205, "step": 15790 }, { "epoch": 0.11679134265692913, "grad_norm": 0.11037104576826096, "learning_rate": 4.4324252136752135e-05, "loss": 0.0214, "step": 15800 }, { "epoch": 0.11686526122823097, "grad_norm": 0.1701211929321289, "learning_rate": 4.432054249762583e-05, "loss": 0.0229, "step": 15810 }, { "epoch": 0.11693917979953283, "grad_norm": 0.10061797499656677, "learning_rate": 4.431683285849953e-05, "loss": 0.0204, "step": 15820 }, { "epoch": 0.11701309837083469, "grad_norm": 0.09710580110549927, "learning_rate": 4.431312321937322e-05, "loss": 0.0228, "step": 15830 }, { "epoch": 0.11708701694213654, "grad_norm": 0.07980790734291077, "learning_rate": 4.430941358024692e-05, "loss": 0.0186, "step": 15840 }, { "epoch": 0.1171609355134384, "grad_norm": 0.09913813322782516, "learning_rate": 4.430570394112061e-05, "loss": 0.019, "step": 15850 }, { "epoch": 0.11723485408474026, "grad_norm": 0.09745988994836807, "learning_rate": 4.4301994301994304e-05, "loss": 0.0202, "step": 15860 }, { "epoch": 0.1173087726560421, "grad_norm": 0.0765075534582138, "learning_rate": 4.4298284662868e-05, "loss": 0.0199, "step": 15870 }, { "epoch": 0.11738269122734396, "grad_norm": 0.12214318662881851, "learning_rate": 4.429457502374169e-05, "loss": 0.0219, "step": 15880 }, { "epoch": 0.1174566097986458, "grad_norm": 0.12174449115991592, "learning_rate": 4.4290865384615385e-05, "loss": 0.0227, "step": 15890 }, { "epoch": 0.11753052836994766, "grad_norm": 0.0814589262008667, "learning_rate": 4.428715574548908e-05, "loss": 0.0225, "step": 15900 }, { "epoch": 0.11760444694124952, "grad_norm": 0.08090159296989441, "learning_rate": 4.428344610636278e-05, "loss": 0.0208, "step": 15910 }, { "epoch": 0.11767836551255137, "grad_norm": 0.08562711626291275, "learning_rate": 4.427973646723647e-05, "loss": 0.0222, "step": 15920 }, { "epoch": 0.11775228408385323, "grad_norm": 0.09362155199050903, "learning_rate": 4.427602682811016e-05, "loss": 0.0206, "step": 15930 }, { "epoch": 0.11782620265515509, "grad_norm": 0.14854660630226135, "learning_rate": 4.427231718898386e-05, "loss": 0.0218, "step": 15940 }, { "epoch": 0.11790012122645693, "grad_norm": 0.11786948144435883, "learning_rate": 4.426860754985755e-05, "loss": 0.0192, "step": 15950 }, { "epoch": 0.11797403979775879, "grad_norm": 0.08195752650499344, "learning_rate": 4.426489791073124e-05, "loss": 0.019, "step": 15960 }, { "epoch": 0.11804795836906064, "grad_norm": 0.1212175115942955, "learning_rate": 4.426118827160494e-05, "loss": 0.0231, "step": 15970 }, { "epoch": 0.1181218769403625, "grad_norm": 0.08144454658031464, "learning_rate": 4.4257478632478635e-05, "loss": 0.0202, "step": 15980 }, { "epoch": 0.11819579551166436, "grad_norm": 0.12607130408287048, "learning_rate": 4.425376899335233e-05, "loss": 0.0239, "step": 15990 }, { "epoch": 0.1182697140829662, "grad_norm": 0.0952596589922905, "learning_rate": 4.425005935422603e-05, "loss": 0.0195, "step": 16000 }, { "epoch": 0.11834363265426806, "grad_norm": 0.07940717041492462, "learning_rate": 4.4246349715099716e-05, "loss": 0.0186, "step": 16010 }, { "epoch": 0.1184175512255699, "grad_norm": 0.08581467717885971, "learning_rate": 4.424264007597341e-05, "loss": 0.0186, "step": 16020 }, { "epoch": 0.11849146979687177, "grad_norm": 0.06981861591339111, "learning_rate": 4.42389304368471e-05, "loss": 0.0201, "step": 16030 }, { "epoch": 0.11856538836817362, "grad_norm": 0.06816834211349487, "learning_rate": 4.42352207977208e-05, "loss": 0.0217, "step": 16040 }, { "epoch": 0.11863930693947547, "grad_norm": 0.09746452420949936, "learning_rate": 4.4231511158594493e-05, "loss": 0.023, "step": 16050 }, { "epoch": 0.11871322551077733, "grad_norm": 0.08126521855592728, "learning_rate": 4.422780151946819e-05, "loss": 0.0174, "step": 16060 }, { "epoch": 0.11878714408207919, "grad_norm": 0.13056722283363342, "learning_rate": 4.4224091880341885e-05, "loss": 0.0203, "step": 16070 }, { "epoch": 0.11886106265338103, "grad_norm": 0.13683772087097168, "learning_rate": 4.4220382241215575e-05, "loss": 0.0233, "step": 16080 }, { "epoch": 0.11893498122468289, "grad_norm": 0.09745480865240097, "learning_rate": 4.421667260208927e-05, "loss": 0.0193, "step": 16090 }, { "epoch": 0.11900889979598474, "grad_norm": 0.237451434135437, "learning_rate": 4.4212962962962966e-05, "loss": 0.0201, "step": 16100 }, { "epoch": 0.1190828183672866, "grad_norm": 0.11330193281173706, "learning_rate": 4.4209253323836656e-05, "loss": 0.023, "step": 16110 }, { "epoch": 0.11915673693858846, "grad_norm": 0.09872958064079285, "learning_rate": 4.420554368471035e-05, "loss": 0.0223, "step": 16120 }, { "epoch": 0.1192306555098903, "grad_norm": 0.11739817261695862, "learning_rate": 4.420183404558405e-05, "loss": 0.0211, "step": 16130 }, { "epoch": 0.11930457408119216, "grad_norm": 0.09575287997722626, "learning_rate": 4.4198124406457744e-05, "loss": 0.0205, "step": 16140 }, { "epoch": 0.119378492652494, "grad_norm": 0.08751419186592102, "learning_rate": 4.419441476733144e-05, "loss": 0.0201, "step": 16150 }, { "epoch": 0.11945241122379587, "grad_norm": 0.11821751296520233, "learning_rate": 4.419070512820513e-05, "loss": 0.0194, "step": 16160 }, { "epoch": 0.11952632979509772, "grad_norm": 0.09098798781633377, "learning_rate": 4.4186995489078825e-05, "loss": 0.0177, "step": 16170 }, { "epoch": 0.11960024836639957, "grad_norm": 0.10671043395996094, "learning_rate": 4.4183285849952514e-05, "loss": 0.0216, "step": 16180 }, { "epoch": 0.11967416693770143, "grad_norm": 0.12490768730640411, "learning_rate": 4.417957621082621e-05, "loss": 0.0193, "step": 16190 }, { "epoch": 0.11974808550900329, "grad_norm": 0.128347709774971, "learning_rate": 4.4175866571699906e-05, "loss": 0.0225, "step": 16200 }, { "epoch": 0.11982200408030513, "grad_norm": 0.11123131215572357, "learning_rate": 4.41721569325736e-05, "loss": 0.019, "step": 16210 }, { "epoch": 0.11989592265160699, "grad_norm": 0.0979531928896904, "learning_rate": 4.41684472934473e-05, "loss": 0.0222, "step": 16220 }, { "epoch": 0.11996984122290884, "grad_norm": 0.09113921225070953, "learning_rate": 4.4164737654320994e-05, "loss": 0.0217, "step": 16230 }, { "epoch": 0.1200437597942107, "grad_norm": 0.1220354214310646, "learning_rate": 4.416102801519468e-05, "loss": 0.0217, "step": 16240 }, { "epoch": 0.12011767836551256, "grad_norm": 0.0948338508605957, "learning_rate": 4.415731837606838e-05, "loss": 0.0241, "step": 16250 }, { "epoch": 0.1201915969368144, "grad_norm": 0.07255394011735916, "learning_rate": 4.415360873694207e-05, "loss": 0.0197, "step": 16260 }, { "epoch": 0.12026551550811626, "grad_norm": 0.11732756346464157, "learning_rate": 4.4149899097815764e-05, "loss": 0.0199, "step": 16270 }, { "epoch": 0.1203394340794181, "grad_norm": 0.0827333927154541, "learning_rate": 4.414618945868946e-05, "loss": 0.0209, "step": 16280 }, { "epoch": 0.12041335265071997, "grad_norm": 0.09649761021137238, "learning_rate": 4.4142479819563156e-05, "loss": 0.0192, "step": 16290 }, { "epoch": 0.12048727122202182, "grad_norm": 0.08975580334663391, "learning_rate": 4.413877018043685e-05, "loss": 0.0227, "step": 16300 }, { "epoch": 0.12056118979332367, "grad_norm": 0.08119421452283859, "learning_rate": 4.413506054131054e-05, "loss": 0.0211, "step": 16310 }, { "epoch": 0.12063510836462553, "grad_norm": 0.1294875144958496, "learning_rate": 4.413135090218424e-05, "loss": 0.0212, "step": 16320 }, { "epoch": 0.12070902693592739, "grad_norm": 0.1081729456782341, "learning_rate": 4.412764126305793e-05, "loss": 0.0224, "step": 16330 }, { "epoch": 0.12078294550722923, "grad_norm": 0.07694138586521149, "learning_rate": 4.412393162393162e-05, "loss": 0.0206, "step": 16340 }, { "epoch": 0.12085686407853109, "grad_norm": 0.08305425196886063, "learning_rate": 4.412022198480532e-05, "loss": 0.0191, "step": 16350 }, { "epoch": 0.12093078264983294, "grad_norm": 0.07094188034534454, "learning_rate": 4.4116512345679014e-05, "loss": 0.0201, "step": 16360 }, { "epoch": 0.1210047012211348, "grad_norm": 0.11019996553659439, "learning_rate": 4.411280270655271e-05, "loss": 0.019, "step": 16370 }, { "epoch": 0.12107861979243666, "grad_norm": 0.11648508161306381, "learning_rate": 4.4109093067426406e-05, "loss": 0.0206, "step": 16380 }, { "epoch": 0.1211525383637385, "grad_norm": 0.08518712222576141, "learning_rate": 4.4105383428300095e-05, "loss": 0.0232, "step": 16390 }, { "epoch": 0.12122645693504036, "grad_norm": 0.08771713823080063, "learning_rate": 4.410167378917379e-05, "loss": 0.0195, "step": 16400 }, { "epoch": 0.12130037550634222, "grad_norm": 0.0767797976732254, "learning_rate": 4.409796415004749e-05, "loss": 0.0225, "step": 16410 }, { "epoch": 0.12137429407764407, "grad_norm": 0.1259419023990631, "learning_rate": 4.4094254510921176e-05, "loss": 0.0212, "step": 16420 }, { "epoch": 0.12144821264894592, "grad_norm": 0.08342539519071579, "learning_rate": 4.409054487179487e-05, "loss": 0.0194, "step": 16430 }, { "epoch": 0.12152213122024777, "grad_norm": 0.13784949481487274, "learning_rate": 4.408683523266857e-05, "loss": 0.0218, "step": 16440 }, { "epoch": 0.12159604979154963, "grad_norm": 0.09963231533765793, "learning_rate": 4.4083125593542264e-05, "loss": 0.0204, "step": 16450 }, { "epoch": 0.12166996836285149, "grad_norm": 0.09076182544231415, "learning_rate": 4.407941595441596e-05, "loss": 0.0221, "step": 16460 }, { "epoch": 0.12174388693415333, "grad_norm": 0.09481775015592575, "learning_rate": 4.407570631528965e-05, "loss": 0.0197, "step": 16470 }, { "epoch": 0.12181780550545519, "grad_norm": 0.11794617772102356, "learning_rate": 4.4071996676163345e-05, "loss": 0.0197, "step": 16480 }, { "epoch": 0.12189172407675704, "grad_norm": 0.08506674319505692, "learning_rate": 4.4068287037037035e-05, "loss": 0.0219, "step": 16490 }, { "epoch": 0.1219656426480589, "grad_norm": 0.10553494095802307, "learning_rate": 4.406457739791073e-05, "loss": 0.0215, "step": 16500 }, { "epoch": 0.12203956121936076, "grad_norm": 0.10346570611000061, "learning_rate": 4.4060867758784427e-05, "loss": 0.0224, "step": 16510 }, { "epoch": 0.1221134797906626, "grad_norm": 0.10566481202840805, "learning_rate": 4.405715811965812e-05, "loss": 0.0199, "step": 16520 }, { "epoch": 0.12218739836196446, "grad_norm": 0.0927921012043953, "learning_rate": 4.405344848053182e-05, "loss": 0.0211, "step": 16530 }, { "epoch": 0.12226131693326632, "grad_norm": 0.08882363885641098, "learning_rate": 4.404973884140551e-05, "loss": 0.0171, "step": 16540 }, { "epoch": 0.12233523550456817, "grad_norm": 0.09634187072515488, "learning_rate": 4.4046029202279204e-05, "loss": 0.0199, "step": 16550 }, { "epoch": 0.12240915407587002, "grad_norm": 0.1210630014538765, "learning_rate": 4.40423195631529e-05, "loss": 0.0196, "step": 16560 }, { "epoch": 0.12248307264717187, "grad_norm": 0.14356377720832825, "learning_rate": 4.403860992402659e-05, "loss": 0.0205, "step": 16570 }, { "epoch": 0.12255699121847373, "grad_norm": 0.0979805588722229, "learning_rate": 4.4034900284900285e-05, "loss": 0.0178, "step": 16580 }, { "epoch": 0.12263090978977559, "grad_norm": 0.1003301665186882, "learning_rate": 4.403119064577398e-05, "loss": 0.0215, "step": 16590 }, { "epoch": 0.12270482836107743, "grad_norm": 0.08601482957601547, "learning_rate": 4.402748100664768e-05, "loss": 0.0194, "step": 16600 }, { "epoch": 0.12277874693237929, "grad_norm": 0.1280229091644287, "learning_rate": 4.402377136752137e-05, "loss": 0.0201, "step": 16610 }, { "epoch": 0.12285266550368114, "grad_norm": 0.08791964501142502, "learning_rate": 4.402006172839506e-05, "loss": 0.0205, "step": 16620 }, { "epoch": 0.122926584074983, "grad_norm": 0.06108580902218819, "learning_rate": 4.401635208926876e-05, "loss": 0.0167, "step": 16630 }, { "epoch": 0.12300050264628486, "grad_norm": 0.08036208152770996, "learning_rate": 4.4012642450142454e-05, "loss": 0.0193, "step": 16640 }, { "epoch": 0.1230744212175867, "grad_norm": 0.1433434784412384, "learning_rate": 4.400893281101614e-05, "loss": 0.0211, "step": 16650 }, { "epoch": 0.12314833978888856, "grad_norm": 0.09189280867576599, "learning_rate": 4.400522317188984e-05, "loss": 0.0214, "step": 16660 }, { "epoch": 0.12322225836019042, "grad_norm": 0.08462630957365036, "learning_rate": 4.4001513532763535e-05, "loss": 0.02, "step": 16670 }, { "epoch": 0.12329617693149227, "grad_norm": 0.09673482924699783, "learning_rate": 4.399780389363723e-05, "loss": 0.0188, "step": 16680 }, { "epoch": 0.12337009550279412, "grad_norm": 0.10847456008195877, "learning_rate": 4.399409425451093e-05, "loss": 0.0201, "step": 16690 }, { "epoch": 0.12344401407409597, "grad_norm": 0.08582521229982376, "learning_rate": 4.3990384615384616e-05, "loss": 0.0215, "step": 16700 }, { "epoch": 0.12351793264539783, "grad_norm": 0.08930855244398117, "learning_rate": 4.398667497625831e-05, "loss": 0.0228, "step": 16710 }, { "epoch": 0.12359185121669969, "grad_norm": 0.08313935995101929, "learning_rate": 4.3982965337132e-05, "loss": 0.02, "step": 16720 }, { "epoch": 0.12366576978800153, "grad_norm": 0.09864447265863419, "learning_rate": 4.39792556980057e-05, "loss": 0.0222, "step": 16730 }, { "epoch": 0.12373968835930339, "grad_norm": 0.07049126923084259, "learning_rate": 4.39755460588794e-05, "loss": 0.0178, "step": 16740 }, { "epoch": 0.12381360693060524, "grad_norm": 0.09831076860427856, "learning_rate": 4.397183641975309e-05, "loss": 0.0199, "step": 16750 }, { "epoch": 0.1238875255019071, "grad_norm": 0.08048636466264725, "learning_rate": 4.3968126780626785e-05, "loss": 0.0202, "step": 16760 }, { "epoch": 0.12396144407320896, "grad_norm": 0.14722725749015808, "learning_rate": 4.3964417141500474e-05, "loss": 0.0191, "step": 16770 }, { "epoch": 0.1240353626445108, "grad_norm": 0.09972026944160461, "learning_rate": 4.396070750237417e-05, "loss": 0.0192, "step": 16780 }, { "epoch": 0.12410928121581266, "grad_norm": 0.08267451077699661, "learning_rate": 4.3956997863247866e-05, "loss": 0.0205, "step": 16790 }, { "epoch": 0.12418319978711452, "grad_norm": 0.08474751561880112, "learning_rate": 4.3953288224121555e-05, "loss": 0.0203, "step": 16800 }, { "epoch": 0.12425711835841637, "grad_norm": 0.09754516929388046, "learning_rate": 4.394957858499525e-05, "loss": 0.0189, "step": 16810 }, { "epoch": 0.12433103692971822, "grad_norm": 0.07999278604984283, "learning_rate": 4.394586894586895e-05, "loss": 0.0182, "step": 16820 }, { "epoch": 0.12440495550102007, "grad_norm": 0.08049635589122772, "learning_rate": 4.394215930674264e-05, "loss": 0.0195, "step": 16830 }, { "epoch": 0.12447887407232193, "grad_norm": 0.090194471180439, "learning_rate": 4.393844966761634e-05, "loss": 0.0194, "step": 16840 }, { "epoch": 0.12455279264362379, "grad_norm": 0.09693975001573563, "learning_rate": 4.393474002849003e-05, "loss": 0.0201, "step": 16850 }, { "epoch": 0.12462671121492563, "grad_norm": 0.1546517014503479, "learning_rate": 4.3931030389363724e-05, "loss": 0.0199, "step": 16860 }, { "epoch": 0.1247006297862275, "grad_norm": 0.11674577742815018, "learning_rate": 4.392732075023742e-05, "loss": 0.0207, "step": 16870 }, { "epoch": 0.12477454835752935, "grad_norm": 0.09541032463312149, "learning_rate": 4.392361111111111e-05, "loss": 0.0228, "step": 16880 }, { "epoch": 0.1248484669288312, "grad_norm": 0.09020016342401505, "learning_rate": 4.391990147198481e-05, "loss": 0.0193, "step": 16890 }, { "epoch": 0.12492238550013306, "grad_norm": 0.08206814527511597, "learning_rate": 4.39161918328585e-05, "loss": 0.0228, "step": 16900 }, { "epoch": 0.1249963040714349, "grad_norm": 0.07679471373558044, "learning_rate": 4.39124821937322e-05, "loss": 0.02, "step": 16910 }, { "epoch": 0.12507022264273676, "grad_norm": 0.08841928094625473, "learning_rate": 4.3908772554605893e-05, "loss": 0.0212, "step": 16920 }, { "epoch": 0.1251441412140386, "grad_norm": 0.08420246094465256, "learning_rate": 4.390506291547958e-05, "loss": 0.0219, "step": 16930 }, { "epoch": 0.12521805978534048, "grad_norm": 0.11030604690313339, "learning_rate": 4.390135327635328e-05, "loss": 0.0221, "step": 16940 }, { "epoch": 0.12529197835664233, "grad_norm": 0.08040262013673782, "learning_rate": 4.389764363722697e-05, "loss": 0.0184, "step": 16950 }, { "epoch": 0.12536589692794417, "grad_norm": 0.07484613358974457, "learning_rate": 4.3893933998100664e-05, "loss": 0.0226, "step": 16960 }, { "epoch": 0.12543981549924604, "grad_norm": 0.06491480022668839, "learning_rate": 4.3890224358974367e-05, "loss": 0.0207, "step": 16970 }, { "epoch": 0.1255137340705479, "grad_norm": 0.07864172756671906, "learning_rate": 4.3886514719848056e-05, "loss": 0.0184, "step": 16980 }, { "epoch": 0.12558765264184973, "grad_norm": 0.11037025600671768, "learning_rate": 4.388280508072175e-05, "loss": 0.0184, "step": 16990 }, { "epoch": 0.12566157121315158, "grad_norm": 0.0728209838271141, "learning_rate": 4.387909544159544e-05, "loss": 0.0201, "step": 17000 }, { "epoch": 0.12573548978445345, "grad_norm": 0.10479336977005005, "learning_rate": 4.387538580246914e-05, "loss": 0.0212, "step": 17010 }, { "epoch": 0.1258094083557553, "grad_norm": 0.09833086282014847, "learning_rate": 4.387167616334283e-05, "loss": 0.0204, "step": 17020 }, { "epoch": 0.12588332692705714, "grad_norm": 0.07104408740997314, "learning_rate": 4.386796652421652e-05, "loss": 0.0212, "step": 17030 }, { "epoch": 0.12595724549835902, "grad_norm": 0.0565701425075531, "learning_rate": 4.386425688509022e-05, "loss": 0.0209, "step": 17040 }, { "epoch": 0.12603116406966086, "grad_norm": 0.08204209804534912, "learning_rate": 4.3860547245963914e-05, "loss": 0.0183, "step": 17050 }, { "epoch": 0.1261050826409627, "grad_norm": 0.10821973532438278, "learning_rate": 4.385683760683761e-05, "loss": 0.0234, "step": 17060 }, { "epoch": 0.12617900121226458, "grad_norm": 0.0890052542090416, "learning_rate": 4.3853127967711306e-05, "loss": 0.0186, "step": 17070 }, { "epoch": 0.12625291978356643, "grad_norm": 0.10424130409955978, "learning_rate": 4.3849418328584995e-05, "loss": 0.0216, "step": 17080 }, { "epoch": 0.12632683835486827, "grad_norm": 0.0998992845416069, "learning_rate": 4.384570868945869e-05, "loss": 0.0228, "step": 17090 }, { "epoch": 0.12640075692617014, "grad_norm": 0.11746528744697571, "learning_rate": 4.384199905033239e-05, "loss": 0.0189, "step": 17100 }, { "epoch": 0.126474675497472, "grad_norm": 0.10497764497995377, "learning_rate": 4.3838289411206076e-05, "loss": 0.0188, "step": 17110 }, { "epoch": 0.12654859406877383, "grad_norm": 0.07906600832939148, "learning_rate": 4.383457977207978e-05, "loss": 0.0191, "step": 17120 }, { "epoch": 0.12662251264007568, "grad_norm": 0.09107011556625366, "learning_rate": 4.383087013295347e-05, "loss": 0.0178, "step": 17130 }, { "epoch": 0.12669643121137755, "grad_norm": 0.10516030341386795, "learning_rate": 4.3827160493827164e-05, "loss": 0.0195, "step": 17140 }, { "epoch": 0.1267703497826794, "grad_norm": 0.05971836671233177, "learning_rate": 4.382345085470086e-05, "loss": 0.0182, "step": 17150 }, { "epoch": 0.12684426835398124, "grad_norm": 0.07957801967859268, "learning_rate": 4.381974121557455e-05, "loss": 0.021, "step": 17160 }, { "epoch": 0.12691818692528312, "grad_norm": 0.0885632112622261, "learning_rate": 4.3816031576448245e-05, "loss": 0.0205, "step": 17170 }, { "epoch": 0.12699210549658496, "grad_norm": 0.11395470798015594, "learning_rate": 4.3812321937321934e-05, "loss": 0.0208, "step": 17180 }, { "epoch": 0.1270660240678868, "grad_norm": 0.06711099296808243, "learning_rate": 4.380861229819563e-05, "loss": 0.02, "step": 17190 }, { "epoch": 0.12713994263918868, "grad_norm": 0.07606938481330872, "learning_rate": 4.380490265906933e-05, "loss": 0.0246, "step": 17200 }, { "epoch": 0.12721386121049053, "grad_norm": 0.08802618086338043, "learning_rate": 4.380119301994302e-05, "loss": 0.0192, "step": 17210 }, { "epoch": 0.12728777978179237, "grad_norm": 0.10931966453790665, "learning_rate": 4.379748338081672e-05, "loss": 0.0219, "step": 17220 }, { "epoch": 0.12736169835309424, "grad_norm": 0.0964532196521759, "learning_rate": 4.379377374169041e-05, "loss": 0.0203, "step": 17230 }, { "epoch": 0.1274356169243961, "grad_norm": 0.07935398072004318, "learning_rate": 4.3790064102564103e-05, "loss": 0.0168, "step": 17240 }, { "epoch": 0.12750953549569793, "grad_norm": 0.0767403393983841, "learning_rate": 4.37863544634378e-05, "loss": 0.0209, "step": 17250 }, { "epoch": 0.12758345406699978, "grad_norm": 0.10330316424369812, "learning_rate": 4.378264482431149e-05, "loss": 0.0215, "step": 17260 }, { "epoch": 0.12765737263830165, "grad_norm": 0.09282982349395752, "learning_rate": 4.377893518518519e-05, "loss": 0.0228, "step": 17270 }, { "epoch": 0.1277312912096035, "grad_norm": 0.07249481230974197, "learning_rate": 4.377522554605888e-05, "loss": 0.019, "step": 17280 }, { "epoch": 0.12780520978090534, "grad_norm": 0.06935325264930725, "learning_rate": 4.3771515906932576e-05, "loss": 0.0208, "step": 17290 }, { "epoch": 0.12787912835220722, "grad_norm": 0.0685378834605217, "learning_rate": 4.376780626780627e-05, "loss": 0.0202, "step": 17300 }, { "epoch": 0.12795304692350906, "grad_norm": 0.11913516372442245, "learning_rate": 4.376409662867996e-05, "loss": 0.0209, "step": 17310 }, { "epoch": 0.1280269654948109, "grad_norm": 0.10438575595617294, "learning_rate": 4.376038698955366e-05, "loss": 0.0213, "step": 17320 }, { "epoch": 0.12810088406611278, "grad_norm": 0.12262512743473053, "learning_rate": 4.3756677350427354e-05, "loss": 0.0217, "step": 17330 }, { "epoch": 0.12817480263741463, "grad_norm": 0.16375628113746643, "learning_rate": 4.375296771130104e-05, "loss": 0.0196, "step": 17340 }, { "epoch": 0.12824872120871647, "grad_norm": 0.08159057796001434, "learning_rate": 4.3749258072174746e-05, "loss": 0.0228, "step": 17350 }, { "epoch": 0.12832263978001834, "grad_norm": 0.08955547213554382, "learning_rate": 4.3745548433048435e-05, "loss": 0.02, "step": 17360 }, { "epoch": 0.1283965583513202, "grad_norm": 0.09596460312604904, "learning_rate": 4.374183879392213e-05, "loss": 0.0193, "step": 17370 }, { "epoch": 0.12847047692262203, "grad_norm": 0.1025465801358223, "learning_rate": 4.373812915479583e-05, "loss": 0.0218, "step": 17380 }, { "epoch": 0.12854439549392388, "grad_norm": 0.06650226563215256, "learning_rate": 4.3734419515669516e-05, "loss": 0.0191, "step": 17390 }, { "epoch": 0.12861831406522575, "grad_norm": 0.11879001557826996, "learning_rate": 4.373070987654321e-05, "loss": 0.0214, "step": 17400 }, { "epoch": 0.1286922326365276, "grad_norm": 0.10580756515264511, "learning_rate": 4.37270002374169e-05, "loss": 0.0224, "step": 17410 }, { "epoch": 0.12876615120782944, "grad_norm": 0.1196075975894928, "learning_rate": 4.3723290598290604e-05, "loss": 0.0201, "step": 17420 }, { "epoch": 0.12884006977913132, "grad_norm": 0.11283430457115173, "learning_rate": 4.37195809591643e-05, "loss": 0.0214, "step": 17430 }, { "epoch": 0.12891398835043316, "grad_norm": 0.0617198720574379, "learning_rate": 4.371587132003799e-05, "loss": 0.0199, "step": 17440 }, { "epoch": 0.128987906921735, "grad_norm": 0.0883866548538208, "learning_rate": 4.3712161680911685e-05, "loss": 0.0195, "step": 17450 }, { "epoch": 0.12906182549303688, "grad_norm": 0.10958422720432281, "learning_rate": 4.3708452041785374e-05, "loss": 0.0211, "step": 17460 }, { "epoch": 0.12913574406433873, "grad_norm": 0.08933533728122711, "learning_rate": 4.370474240265907e-05, "loss": 0.0221, "step": 17470 }, { "epoch": 0.12920966263564057, "grad_norm": 0.12817147374153137, "learning_rate": 4.3701032763532766e-05, "loss": 0.0227, "step": 17480 }, { "epoch": 0.12928358120694244, "grad_norm": 0.1111520379781723, "learning_rate": 4.3697323124406455e-05, "loss": 0.0197, "step": 17490 }, { "epoch": 0.1293574997782443, "grad_norm": 0.07977577298879623, "learning_rate": 4.369361348528016e-05, "loss": 0.0199, "step": 17500 }, { "epoch": 0.12943141834954613, "grad_norm": 0.09112214297056198, "learning_rate": 4.368990384615385e-05, "loss": 0.0203, "step": 17510 }, { "epoch": 0.129505336920848, "grad_norm": 0.11383547633886337, "learning_rate": 4.368619420702754e-05, "loss": 0.0197, "step": 17520 }, { "epoch": 0.12957925549214985, "grad_norm": 0.11252755671739578, "learning_rate": 4.368248456790124e-05, "loss": 0.0224, "step": 17530 }, { "epoch": 0.1296531740634517, "grad_norm": 0.10879248380661011, "learning_rate": 4.367877492877493e-05, "loss": 0.0235, "step": 17540 }, { "epoch": 0.12972709263475354, "grad_norm": 0.10132326930761337, "learning_rate": 4.3675065289648624e-05, "loss": 0.0186, "step": 17550 }, { "epoch": 0.12980101120605542, "grad_norm": 0.10024122148752213, "learning_rate": 4.367135565052232e-05, "loss": 0.0201, "step": 17560 }, { "epoch": 0.12987492977735726, "grad_norm": 0.11927720159292221, "learning_rate": 4.3667646011396016e-05, "loss": 0.0206, "step": 17570 }, { "epoch": 0.1299488483486591, "grad_norm": 0.10507655143737793, "learning_rate": 4.366393637226971e-05, "loss": 0.0187, "step": 17580 }, { "epoch": 0.13002276691996098, "grad_norm": 0.13051213324069977, "learning_rate": 4.36602267331434e-05, "loss": 0.0206, "step": 17590 }, { "epoch": 0.13009668549126283, "grad_norm": 0.09864769130945206, "learning_rate": 4.36565170940171e-05, "loss": 0.0231, "step": 17600 }, { "epoch": 0.13017060406256467, "grad_norm": 0.12273035198450089, "learning_rate": 4.365280745489079e-05, "loss": 0.0194, "step": 17610 }, { "epoch": 0.13024452263386654, "grad_norm": 0.08343586325645447, "learning_rate": 4.364909781576448e-05, "loss": 0.02, "step": 17620 }, { "epoch": 0.1303184412051684, "grad_norm": 0.09308631718158722, "learning_rate": 4.364538817663818e-05, "loss": 0.0161, "step": 17630 }, { "epoch": 0.13039235977647023, "grad_norm": 0.1017531156539917, "learning_rate": 4.364167853751187e-05, "loss": 0.0191, "step": 17640 }, { "epoch": 0.1304662783477721, "grad_norm": 0.11994847655296326, "learning_rate": 4.363796889838557e-05, "loss": 0.0206, "step": 17650 }, { "epoch": 0.13054019691907395, "grad_norm": 0.0923483669757843, "learning_rate": 4.3634259259259266e-05, "loss": 0.0199, "step": 17660 }, { "epoch": 0.1306141154903758, "grad_norm": 0.08812946081161499, "learning_rate": 4.3630549620132955e-05, "loss": 0.0198, "step": 17670 }, { "epoch": 0.13068803406167764, "grad_norm": 0.09632091224193573, "learning_rate": 4.362683998100665e-05, "loss": 0.0199, "step": 17680 }, { "epoch": 0.13076195263297952, "grad_norm": 0.07714105397462845, "learning_rate": 4.362313034188034e-05, "loss": 0.0183, "step": 17690 }, { "epoch": 0.13083587120428136, "grad_norm": 0.10829984396696091, "learning_rate": 4.3619420702754037e-05, "loss": 0.02, "step": 17700 }, { "epoch": 0.1309097897755832, "grad_norm": 0.10526656359434128, "learning_rate": 4.361571106362773e-05, "loss": 0.0195, "step": 17710 }, { "epoch": 0.13098370834688508, "grad_norm": 0.11419089138507843, "learning_rate": 4.361200142450143e-05, "loss": 0.0182, "step": 17720 }, { "epoch": 0.13105762691818693, "grad_norm": 0.07258699089288712, "learning_rate": 4.3608291785375125e-05, "loss": 0.0193, "step": 17730 }, { "epoch": 0.13113154548948877, "grad_norm": 0.1357620507478714, "learning_rate": 4.3604582146248814e-05, "loss": 0.0187, "step": 17740 }, { "epoch": 0.13120546406079064, "grad_norm": 0.0952298641204834, "learning_rate": 4.360087250712251e-05, "loss": 0.0223, "step": 17750 }, { "epoch": 0.1312793826320925, "grad_norm": 0.08755937218666077, "learning_rate": 4.3597162867996206e-05, "loss": 0.0214, "step": 17760 }, { "epoch": 0.13135330120339433, "grad_norm": 0.10102654248476028, "learning_rate": 4.3593453228869895e-05, "loss": 0.0181, "step": 17770 }, { "epoch": 0.1314272197746962, "grad_norm": 0.0867420956492424, "learning_rate": 4.358974358974359e-05, "loss": 0.0215, "step": 17780 }, { "epoch": 0.13150113834599805, "grad_norm": 0.1367752104997635, "learning_rate": 4.358603395061729e-05, "loss": 0.0219, "step": 17790 }, { "epoch": 0.1315750569172999, "grad_norm": 0.09189877659082413, "learning_rate": 4.358232431149098e-05, "loss": 0.0206, "step": 17800 }, { "epoch": 0.13164897548860174, "grad_norm": 0.07793890684843063, "learning_rate": 4.357861467236468e-05, "loss": 0.0196, "step": 17810 }, { "epoch": 0.13172289405990362, "grad_norm": 0.10152376443147659, "learning_rate": 4.357490503323837e-05, "loss": 0.0193, "step": 17820 }, { "epoch": 0.13179681263120546, "grad_norm": 0.10715579241514206, "learning_rate": 4.3571195394112064e-05, "loss": 0.0221, "step": 17830 }, { "epoch": 0.1318707312025073, "grad_norm": 0.07988528907299042, "learning_rate": 4.356748575498576e-05, "loss": 0.0172, "step": 17840 }, { "epoch": 0.13194464977380918, "grad_norm": 0.1156516894698143, "learning_rate": 4.356377611585945e-05, "loss": 0.0201, "step": 17850 }, { "epoch": 0.13201856834511103, "grad_norm": 0.07987456023693085, "learning_rate": 4.3560066476733145e-05, "loss": 0.0212, "step": 17860 }, { "epoch": 0.13209248691641287, "grad_norm": 0.1036411002278328, "learning_rate": 4.355635683760684e-05, "loss": 0.0184, "step": 17870 }, { "epoch": 0.13216640548771474, "grad_norm": 0.06169649586081505, "learning_rate": 4.355264719848054e-05, "loss": 0.0213, "step": 17880 }, { "epoch": 0.1322403240590166, "grad_norm": 0.0923336073756218, "learning_rate": 4.354893755935423e-05, "loss": 0.0193, "step": 17890 }, { "epoch": 0.13231424263031843, "grad_norm": 0.09562145173549652, "learning_rate": 4.354522792022792e-05, "loss": 0.0225, "step": 17900 }, { "epoch": 0.1323881612016203, "grad_norm": 0.09590861946344376, "learning_rate": 4.354151828110162e-05, "loss": 0.0205, "step": 17910 }, { "epoch": 0.13246207977292215, "grad_norm": 0.1031101867556572, "learning_rate": 4.353780864197531e-05, "loss": 0.0195, "step": 17920 }, { "epoch": 0.132535998344224, "grad_norm": 0.08585697412490845, "learning_rate": 4.3534099002849e-05, "loss": 0.0206, "step": 17930 }, { "epoch": 0.13260991691552584, "grad_norm": 0.08860399574041367, "learning_rate": 4.35303893637227e-05, "loss": 0.0194, "step": 17940 }, { "epoch": 0.13268383548682772, "grad_norm": 0.1192474216222763, "learning_rate": 4.3526679724596395e-05, "loss": 0.0201, "step": 17950 }, { "epoch": 0.13275775405812956, "grad_norm": 0.08405131101608276, "learning_rate": 4.352297008547009e-05, "loss": 0.0195, "step": 17960 }, { "epoch": 0.1328316726294314, "grad_norm": 0.07257207483053207, "learning_rate": 4.351926044634378e-05, "loss": 0.0164, "step": 17970 }, { "epoch": 0.13290559120073328, "grad_norm": 0.08803092688322067, "learning_rate": 4.3515550807217476e-05, "loss": 0.0187, "step": 17980 }, { "epoch": 0.13297950977203513, "grad_norm": 0.092405766248703, "learning_rate": 4.351184116809117e-05, "loss": 0.0187, "step": 17990 }, { "epoch": 0.13305342834333697, "grad_norm": 0.08581379801034927, "learning_rate": 4.350813152896486e-05, "loss": 0.0199, "step": 18000 }, { "epoch": 0.13312734691463884, "grad_norm": 0.10120215266942978, "learning_rate": 4.350442188983856e-05, "loss": 0.0219, "step": 18010 }, { "epoch": 0.1332012654859407, "grad_norm": 0.0820140391588211, "learning_rate": 4.350071225071225e-05, "loss": 0.0212, "step": 18020 }, { "epoch": 0.13327518405724254, "grad_norm": 0.10618920624256134, "learning_rate": 4.349700261158595e-05, "loss": 0.021, "step": 18030 }, { "epoch": 0.1333491026285444, "grad_norm": 0.0975532978773117, "learning_rate": 4.3493292972459645e-05, "loss": 0.0206, "step": 18040 }, { "epoch": 0.13342302119984625, "grad_norm": 0.09257347136735916, "learning_rate": 4.3489583333333334e-05, "loss": 0.0208, "step": 18050 }, { "epoch": 0.1334969397711481, "grad_norm": 0.09179476648569107, "learning_rate": 4.348587369420703e-05, "loss": 0.0208, "step": 18060 }, { "epoch": 0.13357085834244994, "grad_norm": 0.09116066247224808, "learning_rate": 4.3482164055080726e-05, "loss": 0.0197, "step": 18070 }, { "epoch": 0.13364477691375182, "grad_norm": 0.07741699367761612, "learning_rate": 4.3478454415954416e-05, "loss": 0.0204, "step": 18080 }, { "epoch": 0.13371869548505366, "grad_norm": 0.08600520342588425, "learning_rate": 4.347474477682811e-05, "loss": 0.0197, "step": 18090 }, { "epoch": 0.1337926140563555, "grad_norm": 0.07641606777906418, "learning_rate": 4.347103513770181e-05, "loss": 0.021, "step": 18100 }, { "epoch": 0.13386653262765738, "grad_norm": 0.12952347099781036, "learning_rate": 4.3467325498575503e-05, "loss": 0.0187, "step": 18110 }, { "epoch": 0.13394045119895923, "grad_norm": 0.08709315955638885, "learning_rate": 4.34636158594492e-05, "loss": 0.0187, "step": 18120 }, { "epoch": 0.13401436977026107, "grad_norm": 0.0985182449221611, "learning_rate": 4.345990622032289e-05, "loss": 0.0218, "step": 18130 }, { "epoch": 0.13408828834156294, "grad_norm": 0.0989292562007904, "learning_rate": 4.3456196581196585e-05, "loss": 0.0198, "step": 18140 }, { "epoch": 0.1341622069128648, "grad_norm": 0.10038740187883377, "learning_rate": 4.3452486942070274e-05, "loss": 0.0213, "step": 18150 }, { "epoch": 0.13423612548416664, "grad_norm": 0.08420637995004654, "learning_rate": 4.344877730294397e-05, "loss": 0.0227, "step": 18160 }, { "epoch": 0.1343100440554685, "grad_norm": 0.09484747052192688, "learning_rate": 4.3445067663817666e-05, "loss": 0.0206, "step": 18170 }, { "epoch": 0.13438396262677035, "grad_norm": 0.09223677217960358, "learning_rate": 4.344135802469136e-05, "loss": 0.0216, "step": 18180 }, { "epoch": 0.1344578811980722, "grad_norm": 0.06965033710002899, "learning_rate": 4.343764838556506e-05, "loss": 0.0207, "step": 18190 }, { "epoch": 0.13453179976937404, "grad_norm": 0.06636201590299606, "learning_rate": 4.343393874643875e-05, "loss": 0.0195, "step": 18200 }, { "epoch": 0.13460571834067592, "grad_norm": 0.07372688502073288, "learning_rate": 4.343022910731244e-05, "loss": 0.0224, "step": 18210 }, { "epoch": 0.13467963691197776, "grad_norm": 0.08220159262418747, "learning_rate": 4.342651946818614e-05, "loss": 0.0187, "step": 18220 }, { "epoch": 0.1347535554832796, "grad_norm": 0.09175535291433334, "learning_rate": 4.342280982905983e-05, "loss": 0.0198, "step": 18230 }, { "epoch": 0.13482747405458148, "grad_norm": 0.09849115461111069, "learning_rate": 4.3419100189933524e-05, "loss": 0.0188, "step": 18240 }, { "epoch": 0.13490139262588333, "grad_norm": 0.11270340532064438, "learning_rate": 4.341539055080722e-05, "loss": 0.0188, "step": 18250 }, { "epoch": 0.13497531119718517, "grad_norm": 0.0937158614397049, "learning_rate": 4.3411680911680916e-05, "loss": 0.0219, "step": 18260 }, { "epoch": 0.13504922976848704, "grad_norm": 0.09184368699789047, "learning_rate": 4.340797127255461e-05, "loss": 0.021, "step": 18270 }, { "epoch": 0.1351231483397889, "grad_norm": 0.1325238049030304, "learning_rate": 4.34042616334283e-05, "loss": 0.0221, "step": 18280 }, { "epoch": 0.13519706691109074, "grad_norm": 0.10193447023630142, "learning_rate": 4.3400551994302e-05, "loss": 0.0203, "step": 18290 }, { "epoch": 0.1352709854823926, "grad_norm": 0.06822310388088226, "learning_rate": 4.339684235517569e-05, "loss": 0.0199, "step": 18300 }, { "epoch": 0.13534490405369445, "grad_norm": 0.08294135332107544, "learning_rate": 4.339313271604938e-05, "loss": 0.0167, "step": 18310 }, { "epoch": 0.1354188226249963, "grad_norm": 0.08948735147714615, "learning_rate": 4.338942307692308e-05, "loss": 0.019, "step": 18320 }, { "epoch": 0.13549274119629814, "grad_norm": 0.09147325903177261, "learning_rate": 4.3385713437796774e-05, "loss": 0.0197, "step": 18330 }, { "epoch": 0.13556665976760002, "grad_norm": 0.08658456802368164, "learning_rate": 4.338200379867047e-05, "loss": 0.0205, "step": 18340 }, { "epoch": 0.13564057833890186, "grad_norm": 0.10264665633440018, "learning_rate": 4.3378294159544166e-05, "loss": 0.0192, "step": 18350 }, { "epoch": 0.1357144969102037, "grad_norm": 0.07289276272058487, "learning_rate": 4.3374584520417855e-05, "loss": 0.0202, "step": 18360 }, { "epoch": 0.13578841548150558, "grad_norm": 0.08398177474737167, "learning_rate": 4.337087488129155e-05, "loss": 0.0212, "step": 18370 }, { "epoch": 0.13586233405280743, "grad_norm": 0.08534097671508789, "learning_rate": 4.336716524216524e-05, "loss": 0.0203, "step": 18380 }, { "epoch": 0.13593625262410927, "grad_norm": 0.08274282515048981, "learning_rate": 4.3363455603038936e-05, "loss": 0.0203, "step": 18390 }, { "epoch": 0.13601017119541114, "grad_norm": 0.10768471658229828, "learning_rate": 4.335974596391263e-05, "loss": 0.0192, "step": 18400 }, { "epoch": 0.136084089766713, "grad_norm": 0.10010208189487457, "learning_rate": 4.335603632478633e-05, "loss": 0.0218, "step": 18410 }, { "epoch": 0.13615800833801484, "grad_norm": 0.10978821665048599, "learning_rate": 4.3352326685660024e-05, "loss": 0.02, "step": 18420 }, { "epoch": 0.1362319269093167, "grad_norm": 0.09286827594041824, "learning_rate": 4.3348617046533713e-05, "loss": 0.0222, "step": 18430 }, { "epoch": 0.13630584548061855, "grad_norm": 0.10667553544044495, "learning_rate": 4.334490740740741e-05, "loss": 0.0192, "step": 18440 }, { "epoch": 0.1363797640519204, "grad_norm": 0.0845090001821518, "learning_rate": 4.3341197768281105e-05, "loss": 0.0198, "step": 18450 }, { "epoch": 0.13645368262322227, "grad_norm": 0.09107456356287003, "learning_rate": 4.3337488129154795e-05, "loss": 0.0229, "step": 18460 }, { "epoch": 0.13652760119452412, "grad_norm": 0.11485371738672256, "learning_rate": 4.333377849002849e-05, "loss": 0.0209, "step": 18470 }, { "epoch": 0.13660151976582596, "grad_norm": 0.09362673759460449, "learning_rate": 4.3330068850902187e-05, "loss": 0.0209, "step": 18480 }, { "epoch": 0.1366754383371278, "grad_norm": 0.08930040150880814, "learning_rate": 4.332635921177588e-05, "loss": 0.0216, "step": 18490 }, { "epoch": 0.13674935690842968, "grad_norm": 0.08274761587381363, "learning_rate": 4.332264957264958e-05, "loss": 0.0184, "step": 18500 }, { "epoch": 0.13682327547973153, "grad_norm": 0.10678842663764954, "learning_rate": 4.331893993352327e-05, "loss": 0.0188, "step": 18510 }, { "epoch": 0.13689719405103337, "grad_norm": 0.08956782519817352, "learning_rate": 4.3315230294396964e-05, "loss": 0.0199, "step": 18520 }, { "epoch": 0.13697111262233524, "grad_norm": 0.10181953758001328, "learning_rate": 4.331152065527066e-05, "loss": 0.0196, "step": 18530 }, { "epoch": 0.1370450311936371, "grad_norm": 0.09772741794586182, "learning_rate": 4.330781101614435e-05, "loss": 0.0195, "step": 18540 }, { "epoch": 0.13711894976493894, "grad_norm": 0.08765088766813278, "learning_rate": 4.3304101377018045e-05, "loss": 0.0191, "step": 18550 }, { "epoch": 0.1371928683362408, "grad_norm": 0.10872490704059601, "learning_rate": 4.330039173789174e-05, "loss": 0.0187, "step": 18560 }, { "epoch": 0.13726678690754265, "grad_norm": 0.07029812783002853, "learning_rate": 4.329668209876544e-05, "loss": 0.0189, "step": 18570 }, { "epoch": 0.1373407054788445, "grad_norm": 0.09874571114778519, "learning_rate": 4.329297245963913e-05, "loss": 0.0193, "step": 18580 }, { "epoch": 0.13741462405014637, "grad_norm": 0.10533016175031662, "learning_rate": 4.328926282051282e-05, "loss": 0.0181, "step": 18590 }, { "epoch": 0.13748854262144822, "grad_norm": 0.08398744463920593, "learning_rate": 4.328555318138652e-05, "loss": 0.0188, "step": 18600 }, { "epoch": 0.13756246119275006, "grad_norm": 0.08623006194829941, "learning_rate": 4.328184354226021e-05, "loss": 0.0188, "step": 18610 }, { "epoch": 0.1376363797640519, "grad_norm": 0.1085953414440155, "learning_rate": 4.32781339031339e-05, "loss": 0.0207, "step": 18620 }, { "epoch": 0.13771029833535378, "grad_norm": 0.10261372476816177, "learning_rate": 4.32744242640076e-05, "loss": 0.0183, "step": 18630 }, { "epoch": 0.13778421690665563, "grad_norm": 0.07687580585479736, "learning_rate": 4.3270714624881295e-05, "loss": 0.02, "step": 18640 }, { "epoch": 0.13785813547795747, "grad_norm": 0.08356168121099472, "learning_rate": 4.326700498575499e-05, "loss": 0.0196, "step": 18650 }, { "epoch": 0.13793205404925934, "grad_norm": 0.07929971069097519, "learning_rate": 4.326329534662868e-05, "loss": 0.0215, "step": 18660 }, { "epoch": 0.1380059726205612, "grad_norm": 0.12214069813489914, "learning_rate": 4.3259585707502376e-05, "loss": 0.0206, "step": 18670 }, { "epoch": 0.13807989119186304, "grad_norm": 0.0971401110291481, "learning_rate": 4.325587606837607e-05, "loss": 0.0173, "step": 18680 }, { "epoch": 0.1381538097631649, "grad_norm": 0.1067282035946846, "learning_rate": 4.325216642924976e-05, "loss": 0.0218, "step": 18690 }, { "epoch": 0.13822772833446675, "grad_norm": 0.08311453461647034, "learning_rate": 4.324845679012346e-05, "loss": 0.0189, "step": 18700 }, { "epoch": 0.1383016469057686, "grad_norm": 0.07841376960277557, "learning_rate": 4.324474715099715e-05, "loss": 0.0196, "step": 18710 }, { "epoch": 0.13837556547707047, "grad_norm": 0.11007623374462128, "learning_rate": 4.324103751187085e-05, "loss": 0.0233, "step": 18720 }, { "epoch": 0.13844948404837232, "grad_norm": 0.06940075010061264, "learning_rate": 4.3237327872744545e-05, "loss": 0.0199, "step": 18730 }, { "epoch": 0.13852340261967416, "grad_norm": 0.0829441025853157, "learning_rate": 4.3233618233618234e-05, "loss": 0.0202, "step": 18740 }, { "epoch": 0.138597321190976, "grad_norm": 0.11176992207765579, "learning_rate": 4.322990859449193e-05, "loss": 0.0201, "step": 18750 }, { "epoch": 0.13867123976227788, "grad_norm": 0.08433730155229568, "learning_rate": 4.3226198955365626e-05, "loss": 0.0193, "step": 18760 }, { "epoch": 0.13874515833357973, "grad_norm": 0.10842785239219666, "learning_rate": 4.3222489316239315e-05, "loss": 0.0216, "step": 18770 }, { "epoch": 0.13881907690488157, "grad_norm": 0.12139491736888885, "learning_rate": 4.321877967711301e-05, "loss": 0.0216, "step": 18780 }, { "epoch": 0.13889299547618345, "grad_norm": 0.07391542941331863, "learning_rate": 4.321507003798671e-05, "loss": 0.0217, "step": 18790 }, { "epoch": 0.1389669140474853, "grad_norm": 0.10303296893835068, "learning_rate": 4.32113603988604e-05, "loss": 0.0209, "step": 18800 }, { "epoch": 0.13904083261878714, "grad_norm": 0.09348388016223907, "learning_rate": 4.32076507597341e-05, "loss": 0.0225, "step": 18810 }, { "epoch": 0.139114751190089, "grad_norm": 0.10517199337482452, "learning_rate": 4.320394112060779e-05, "loss": 0.0204, "step": 18820 }, { "epoch": 0.13918866976139085, "grad_norm": 0.0827675312757492, "learning_rate": 4.3200231481481484e-05, "loss": 0.0202, "step": 18830 }, { "epoch": 0.1392625883326927, "grad_norm": 0.14683835208415985, "learning_rate": 4.3196521842355174e-05, "loss": 0.0215, "step": 18840 }, { "epoch": 0.13933650690399457, "grad_norm": 0.08116668462753296, "learning_rate": 4.319281220322887e-05, "loss": 0.0224, "step": 18850 }, { "epoch": 0.13941042547529642, "grad_norm": 0.08344514667987823, "learning_rate": 4.3189102564102565e-05, "loss": 0.02, "step": 18860 }, { "epoch": 0.13948434404659826, "grad_norm": 0.0966254398226738, "learning_rate": 4.318539292497626e-05, "loss": 0.0234, "step": 18870 }, { "epoch": 0.1395582626179001, "grad_norm": 0.0772177055478096, "learning_rate": 4.318168328584996e-05, "loss": 0.0206, "step": 18880 }, { "epoch": 0.13963218118920198, "grad_norm": 0.07024878263473511, "learning_rate": 4.317797364672365e-05, "loss": 0.0211, "step": 18890 }, { "epoch": 0.13970609976050383, "grad_norm": 0.06805914640426636, "learning_rate": 4.317426400759734e-05, "loss": 0.0185, "step": 18900 }, { "epoch": 0.13978001833180567, "grad_norm": 0.08049175888299942, "learning_rate": 4.317055436847104e-05, "loss": 0.0188, "step": 18910 }, { "epoch": 0.13985393690310755, "grad_norm": 0.053536444902420044, "learning_rate": 4.316684472934473e-05, "loss": 0.0217, "step": 18920 }, { "epoch": 0.1399278554744094, "grad_norm": 0.08745238184928894, "learning_rate": 4.3163135090218424e-05, "loss": 0.0217, "step": 18930 }, { "epoch": 0.14000177404571124, "grad_norm": 0.11072596907615662, "learning_rate": 4.315942545109212e-05, "loss": 0.0201, "step": 18940 }, { "epoch": 0.1400756926170131, "grad_norm": 0.09789971262216568, "learning_rate": 4.3155715811965816e-05, "loss": 0.0218, "step": 18950 }, { "epoch": 0.14014961118831495, "grad_norm": 0.10173800587654114, "learning_rate": 4.315200617283951e-05, "loss": 0.0204, "step": 18960 }, { "epoch": 0.1402235297596168, "grad_norm": 0.10012296587228775, "learning_rate": 4.31482965337132e-05, "loss": 0.0211, "step": 18970 }, { "epoch": 0.14029744833091867, "grad_norm": 0.10211526602506638, "learning_rate": 4.31445868945869e-05, "loss": 0.0187, "step": 18980 }, { "epoch": 0.14037136690222052, "grad_norm": 0.09464067220687866, "learning_rate": 4.314087725546059e-05, "loss": 0.0203, "step": 18990 }, { "epoch": 0.14044528547352236, "grad_norm": 0.07152500003576279, "learning_rate": 4.313716761633428e-05, "loss": 0.0211, "step": 19000 }, { "epoch": 0.1405192040448242, "grad_norm": 0.09147216379642487, "learning_rate": 4.313345797720798e-05, "loss": 0.0221, "step": 19010 }, { "epoch": 0.14059312261612608, "grad_norm": 0.08784119039773941, "learning_rate": 4.3129748338081674e-05, "loss": 0.0181, "step": 19020 }, { "epoch": 0.14066704118742793, "grad_norm": 0.08497309684753418, "learning_rate": 4.312603869895537e-05, "loss": 0.0179, "step": 19030 }, { "epoch": 0.14074095975872977, "grad_norm": 0.11955258995294571, "learning_rate": 4.3122329059829066e-05, "loss": 0.021, "step": 19040 }, { "epoch": 0.14081487833003165, "grad_norm": 0.0956282839179039, "learning_rate": 4.3118619420702755e-05, "loss": 0.0197, "step": 19050 }, { "epoch": 0.1408887969013335, "grad_norm": 0.10729701071977615, "learning_rate": 4.311490978157645e-05, "loss": 0.0188, "step": 19060 }, { "epoch": 0.14096271547263534, "grad_norm": 0.11408047378063202, "learning_rate": 4.311120014245014e-05, "loss": 0.0211, "step": 19070 }, { "epoch": 0.1410366340439372, "grad_norm": 0.0880783423781395, "learning_rate": 4.3107490503323836e-05, "loss": 0.0213, "step": 19080 }, { "epoch": 0.14111055261523905, "grad_norm": 0.10292744636535645, "learning_rate": 4.310378086419753e-05, "loss": 0.0226, "step": 19090 }, { "epoch": 0.1411844711865409, "grad_norm": 0.0908803939819336, "learning_rate": 4.310007122507123e-05, "loss": 0.0207, "step": 19100 }, { "epoch": 0.14125838975784277, "grad_norm": 0.11729007959365845, "learning_rate": 4.3096361585944924e-05, "loss": 0.0225, "step": 19110 }, { "epoch": 0.14133230832914462, "grad_norm": 0.07894833385944366, "learning_rate": 4.309265194681861e-05, "loss": 0.0179, "step": 19120 }, { "epoch": 0.14140622690044646, "grad_norm": 0.10907064378261566, "learning_rate": 4.308894230769231e-05, "loss": 0.0185, "step": 19130 }, { "epoch": 0.1414801454717483, "grad_norm": 0.09175430983304977, "learning_rate": 4.3085232668566005e-05, "loss": 0.0206, "step": 19140 }, { "epoch": 0.14155406404305018, "grad_norm": 0.08137310296297073, "learning_rate": 4.3081523029439694e-05, "loss": 0.0202, "step": 19150 }, { "epoch": 0.14162798261435203, "grad_norm": 0.0989503413438797, "learning_rate": 4.307781339031339e-05, "loss": 0.0186, "step": 19160 }, { "epoch": 0.14170190118565387, "grad_norm": 0.09327124804258347, "learning_rate": 4.3074103751187086e-05, "loss": 0.0192, "step": 19170 }, { "epoch": 0.14177581975695575, "grad_norm": 0.08980970829725266, "learning_rate": 4.307039411206078e-05, "loss": 0.0194, "step": 19180 }, { "epoch": 0.1418497383282576, "grad_norm": 0.10795049369335175, "learning_rate": 4.306668447293448e-05, "loss": 0.0208, "step": 19190 }, { "epoch": 0.14192365689955944, "grad_norm": 0.06910710781812668, "learning_rate": 4.306297483380817e-05, "loss": 0.0211, "step": 19200 }, { "epoch": 0.1419975754708613, "grad_norm": 0.08890750259160995, "learning_rate": 4.305926519468186e-05, "loss": 0.0196, "step": 19210 }, { "epoch": 0.14207149404216315, "grad_norm": 0.09042514115571976, "learning_rate": 4.305555555555556e-05, "loss": 0.0179, "step": 19220 }, { "epoch": 0.142145412613465, "grad_norm": 0.0995037779211998, "learning_rate": 4.305184591642925e-05, "loss": 0.0191, "step": 19230 }, { "epoch": 0.14221933118476687, "grad_norm": 0.07466725260019302, "learning_rate": 4.3048136277302944e-05, "loss": 0.0184, "step": 19240 }, { "epoch": 0.14229324975606872, "grad_norm": 0.11802132427692413, "learning_rate": 4.304442663817664e-05, "loss": 0.0227, "step": 19250 }, { "epoch": 0.14236716832737056, "grad_norm": 0.09003034234046936, "learning_rate": 4.3040716999050336e-05, "loss": 0.0188, "step": 19260 }, { "epoch": 0.1424410868986724, "grad_norm": 0.06642922013998032, "learning_rate": 4.303700735992403e-05, "loss": 0.0186, "step": 19270 }, { "epoch": 0.14251500546997428, "grad_norm": 0.10610310733318329, "learning_rate": 4.303329772079772e-05, "loss": 0.0199, "step": 19280 }, { "epoch": 0.14258892404127613, "grad_norm": 0.08904029428958893, "learning_rate": 4.302958808167142e-05, "loss": 0.0218, "step": 19290 }, { "epoch": 0.14266284261257797, "grad_norm": 0.11130847781896591, "learning_rate": 4.302587844254511e-05, "loss": 0.0163, "step": 19300 }, { "epoch": 0.14273676118387985, "grad_norm": 0.07528279721736908, "learning_rate": 4.30221688034188e-05, "loss": 0.0185, "step": 19310 }, { "epoch": 0.1428106797551817, "grad_norm": 0.07024827599525452, "learning_rate": 4.30184591642925e-05, "loss": 0.0173, "step": 19320 }, { "epoch": 0.14288459832648354, "grad_norm": 0.11899475008249283, "learning_rate": 4.3014749525166195e-05, "loss": 0.0241, "step": 19330 }, { "epoch": 0.1429585168977854, "grad_norm": 0.09621811658143997, "learning_rate": 4.301103988603989e-05, "loss": 0.0202, "step": 19340 }, { "epoch": 0.14303243546908725, "grad_norm": 0.09382009506225586, "learning_rate": 4.300733024691358e-05, "loss": 0.0197, "step": 19350 }, { "epoch": 0.1431063540403891, "grad_norm": 0.0980682298541069, "learning_rate": 4.3003620607787276e-05, "loss": 0.0208, "step": 19360 }, { "epoch": 0.14318027261169097, "grad_norm": 0.09665199369192123, "learning_rate": 4.299991096866097e-05, "loss": 0.0204, "step": 19370 }, { "epoch": 0.14325419118299282, "grad_norm": 0.09231219440698624, "learning_rate": 4.299620132953466e-05, "loss": 0.0219, "step": 19380 }, { "epoch": 0.14332810975429466, "grad_norm": 0.10446962714195251, "learning_rate": 4.299249169040836e-05, "loss": 0.0202, "step": 19390 }, { "epoch": 0.14340202832559654, "grad_norm": 0.08622289448976517, "learning_rate": 4.298878205128205e-05, "loss": 0.0192, "step": 19400 }, { "epoch": 0.14347594689689838, "grad_norm": 0.10497544705867767, "learning_rate": 4.298507241215575e-05, "loss": 0.0212, "step": 19410 }, { "epoch": 0.14354986546820023, "grad_norm": 0.1282612383365631, "learning_rate": 4.2981362773029445e-05, "loss": 0.0203, "step": 19420 }, { "epoch": 0.14362378403950207, "grad_norm": 0.10771500319242477, "learning_rate": 4.2977653133903134e-05, "loss": 0.0218, "step": 19430 }, { "epoch": 0.14369770261080395, "grad_norm": 0.08456694334745407, "learning_rate": 4.297394349477683e-05, "loss": 0.0199, "step": 19440 }, { "epoch": 0.1437716211821058, "grad_norm": 0.08716116845607758, "learning_rate": 4.2970233855650526e-05, "loss": 0.0211, "step": 19450 }, { "epoch": 0.14384553975340764, "grad_norm": 0.06831954419612885, "learning_rate": 4.2966524216524215e-05, "loss": 0.0201, "step": 19460 }, { "epoch": 0.1439194583247095, "grad_norm": 0.09248486906290054, "learning_rate": 4.296281457739791e-05, "loss": 0.0216, "step": 19470 }, { "epoch": 0.14399337689601135, "grad_norm": 0.08724641799926758, "learning_rate": 4.295910493827161e-05, "loss": 0.0213, "step": 19480 }, { "epoch": 0.1440672954673132, "grad_norm": 0.10801932960748672, "learning_rate": 4.29553952991453e-05, "loss": 0.0236, "step": 19490 }, { "epoch": 0.14414121403861507, "grad_norm": 0.07270118594169617, "learning_rate": 4.2951685660019e-05, "loss": 0.0208, "step": 19500 }, { "epoch": 0.14421513260991692, "grad_norm": 0.07651976495981216, "learning_rate": 4.294797602089269e-05, "loss": 0.0196, "step": 19510 }, { "epoch": 0.14428905118121876, "grad_norm": 0.10494709014892578, "learning_rate": 4.2944266381766384e-05, "loss": 0.0194, "step": 19520 }, { "epoch": 0.14436296975252064, "grad_norm": 0.08860231935977936, "learning_rate": 4.294055674264007e-05, "loss": 0.0181, "step": 19530 }, { "epoch": 0.14443688832382248, "grad_norm": 0.12752337753772736, "learning_rate": 4.293684710351377e-05, "loss": 0.02, "step": 19540 }, { "epoch": 0.14451080689512433, "grad_norm": 0.08055710792541504, "learning_rate": 4.2933137464387465e-05, "loss": 0.0189, "step": 19550 }, { "epoch": 0.14458472546642617, "grad_norm": 0.12380864471197128, "learning_rate": 4.292942782526116e-05, "loss": 0.019, "step": 19560 }, { "epoch": 0.14465864403772805, "grad_norm": 0.11049962788820267, "learning_rate": 4.292571818613486e-05, "loss": 0.0206, "step": 19570 }, { "epoch": 0.1447325626090299, "grad_norm": 0.09611523896455765, "learning_rate": 4.2922008547008546e-05, "loss": 0.0214, "step": 19580 }, { "epoch": 0.14480648118033174, "grad_norm": 0.0665908008813858, "learning_rate": 4.291829890788224e-05, "loss": 0.0213, "step": 19590 }, { "epoch": 0.1448803997516336, "grad_norm": 0.07490904629230499, "learning_rate": 4.291458926875594e-05, "loss": 0.0187, "step": 19600 }, { "epoch": 0.14495431832293545, "grad_norm": 0.08843923360109329, "learning_rate": 4.291087962962963e-05, "loss": 0.0187, "step": 19610 }, { "epoch": 0.1450282368942373, "grad_norm": 0.09567807614803314, "learning_rate": 4.2907169990503323e-05, "loss": 0.0183, "step": 19620 }, { "epoch": 0.14510215546553917, "grad_norm": 0.08598290383815765, "learning_rate": 4.290346035137702e-05, "loss": 0.0217, "step": 19630 }, { "epoch": 0.14517607403684102, "grad_norm": 0.09431812167167664, "learning_rate": 4.2899750712250715e-05, "loss": 0.019, "step": 19640 }, { "epoch": 0.14524999260814286, "grad_norm": 0.06617843359708786, "learning_rate": 4.289604107312441e-05, "loss": 0.0174, "step": 19650 }, { "epoch": 0.14532391117944474, "grad_norm": 0.1028028130531311, "learning_rate": 4.28923314339981e-05, "loss": 0.0188, "step": 19660 }, { "epoch": 0.14539782975074658, "grad_norm": 0.10529684275388718, "learning_rate": 4.2888621794871797e-05, "loss": 0.019, "step": 19670 }, { "epoch": 0.14547174832204843, "grad_norm": 0.06480713933706284, "learning_rate": 4.288491215574549e-05, "loss": 0.0202, "step": 19680 }, { "epoch": 0.14554566689335027, "grad_norm": 0.13642440736293793, "learning_rate": 4.288120251661918e-05, "loss": 0.0211, "step": 19690 }, { "epoch": 0.14561958546465215, "grad_norm": 0.11017101258039474, "learning_rate": 4.287749287749288e-05, "loss": 0.0208, "step": 19700 }, { "epoch": 0.145693504035954, "grad_norm": 0.0895652249455452, "learning_rate": 4.2873783238366574e-05, "loss": 0.0231, "step": 19710 }, { "epoch": 0.14576742260725584, "grad_norm": 0.10605411976575851, "learning_rate": 4.287007359924027e-05, "loss": 0.0215, "step": 19720 }, { "epoch": 0.1458413411785577, "grad_norm": 0.10418490320444107, "learning_rate": 4.2866363960113966e-05, "loss": 0.0205, "step": 19730 }, { "epoch": 0.14591525974985955, "grad_norm": 0.11002498865127563, "learning_rate": 4.2862654320987655e-05, "loss": 0.0212, "step": 19740 }, { "epoch": 0.1459891783211614, "grad_norm": 0.10646151751279831, "learning_rate": 4.285894468186135e-05, "loss": 0.0203, "step": 19750 }, { "epoch": 0.14606309689246327, "grad_norm": 0.09739893674850464, "learning_rate": 4.285523504273504e-05, "loss": 0.0205, "step": 19760 }, { "epoch": 0.14613701546376512, "grad_norm": 0.08105024695396423, "learning_rate": 4.2851525403608736e-05, "loss": 0.022, "step": 19770 }, { "epoch": 0.14621093403506696, "grad_norm": 0.06371292471885681, "learning_rate": 4.284781576448243e-05, "loss": 0.0186, "step": 19780 }, { "epoch": 0.14628485260636884, "grad_norm": 0.12250304222106934, "learning_rate": 4.284410612535613e-05, "loss": 0.0218, "step": 19790 }, { "epoch": 0.14635877117767068, "grad_norm": 0.08064945042133331, "learning_rate": 4.2840396486229824e-05, "loss": 0.0197, "step": 19800 }, { "epoch": 0.14643268974897253, "grad_norm": 0.048164352774620056, "learning_rate": 4.283668684710351e-05, "loss": 0.0193, "step": 19810 }, { "epoch": 0.14650660832027437, "grad_norm": 0.144981250166893, "learning_rate": 4.283297720797721e-05, "loss": 0.0179, "step": 19820 }, { "epoch": 0.14658052689157625, "grad_norm": 0.07517068833112717, "learning_rate": 4.2829267568850905e-05, "loss": 0.0207, "step": 19830 }, { "epoch": 0.1466544454628781, "grad_norm": 0.13142111897468567, "learning_rate": 4.2825557929724594e-05, "loss": 0.02, "step": 19840 }, { "epoch": 0.14672836403417994, "grad_norm": 0.09935706853866577, "learning_rate": 4.282184829059829e-05, "loss": 0.0188, "step": 19850 }, { "epoch": 0.1468022826054818, "grad_norm": 0.08331234008073807, "learning_rate": 4.281813865147199e-05, "loss": 0.0167, "step": 19860 }, { "epoch": 0.14687620117678366, "grad_norm": 0.09987511485815048, "learning_rate": 4.281442901234568e-05, "loss": 0.0207, "step": 19870 }, { "epoch": 0.1469501197480855, "grad_norm": 0.05960327759385109, "learning_rate": 4.281071937321938e-05, "loss": 0.0205, "step": 19880 }, { "epoch": 0.14702403831938737, "grad_norm": 0.10007424652576447, "learning_rate": 4.280700973409307e-05, "loss": 0.0192, "step": 19890 }, { "epoch": 0.14709795689068922, "grad_norm": 0.10943324863910675, "learning_rate": 4.280330009496676e-05, "loss": 0.0224, "step": 19900 }, { "epoch": 0.14717187546199106, "grad_norm": 0.0814700797200203, "learning_rate": 4.279959045584046e-05, "loss": 0.0194, "step": 19910 }, { "epoch": 0.14724579403329294, "grad_norm": 0.07240025699138641, "learning_rate": 4.279588081671415e-05, "loss": 0.0174, "step": 19920 }, { "epoch": 0.14731971260459478, "grad_norm": 0.09242795407772064, "learning_rate": 4.2792171177587844e-05, "loss": 0.0191, "step": 19930 }, { "epoch": 0.14739363117589663, "grad_norm": 0.10732390731573105, "learning_rate": 4.278846153846154e-05, "loss": 0.0205, "step": 19940 }, { "epoch": 0.14746754974719847, "grad_norm": 0.10419422388076782, "learning_rate": 4.2784751899335236e-05, "loss": 0.0181, "step": 19950 }, { "epoch": 0.14754146831850035, "grad_norm": 0.11255902796983719, "learning_rate": 4.278104226020893e-05, "loss": 0.0195, "step": 19960 }, { "epoch": 0.1476153868898022, "grad_norm": 0.0901622548699379, "learning_rate": 4.277733262108262e-05, "loss": 0.0197, "step": 19970 }, { "epoch": 0.14768930546110404, "grad_norm": 0.071842260658741, "learning_rate": 4.277362298195632e-05, "loss": 0.0206, "step": 19980 }, { "epoch": 0.1477632240324059, "grad_norm": 0.08971525728702545, "learning_rate": 4.2769913342830006e-05, "loss": 0.0196, "step": 19990 }, { "epoch": 0.14783714260370776, "grad_norm": 0.07036489993333817, "learning_rate": 4.27662037037037e-05, "loss": 0.022, "step": 20000 }, { "epoch": 0.14783714260370776, "eval_f1": 0.5889918098753654, "eval_loss": 0.019852200523018837, "eval_precision": 0.4652586220167745, "eval_recall": 0.8023809954805737, "eval_runtime": 2652.3788, "eval_samples_per_second": 204.018, "eval_steps_per_second": 3.188, "step": 20000 }, { "epoch": 0.1479110611750096, "grad_norm": 0.07533308118581772, "learning_rate": 4.2762494064577405e-05, "loss": 0.0191, "step": 20010 }, { "epoch": 0.14798497974631147, "grad_norm": 0.07710433751344681, "learning_rate": 4.2758784425451094e-05, "loss": 0.0213, "step": 20020 }, { "epoch": 0.14805889831761332, "grad_norm": 0.09403133392333984, "learning_rate": 4.275507478632479e-05, "loss": 0.023, "step": 20030 }, { "epoch": 0.14813281688891516, "grad_norm": 0.0773426964879036, "learning_rate": 4.275136514719848e-05, "loss": 0.0185, "step": 20040 }, { "epoch": 0.14820673546021704, "grad_norm": 0.0869455635547638, "learning_rate": 4.2747655508072175e-05, "loss": 0.0198, "step": 20050 }, { "epoch": 0.14828065403151888, "grad_norm": 0.08084216713905334, "learning_rate": 4.274394586894587e-05, "loss": 0.0188, "step": 20060 }, { "epoch": 0.14835457260282073, "grad_norm": 0.0831877812743187, "learning_rate": 4.274023622981956e-05, "loss": 0.0174, "step": 20070 }, { "epoch": 0.14842849117412257, "grad_norm": 0.08009405434131622, "learning_rate": 4.273652659069326e-05, "loss": 0.0183, "step": 20080 }, { "epoch": 0.14850240974542445, "grad_norm": 0.09190283715724945, "learning_rate": 4.273281695156696e-05, "loss": 0.0216, "step": 20090 }, { "epoch": 0.1485763283167263, "grad_norm": 0.08109553903341293, "learning_rate": 4.272910731244065e-05, "loss": 0.0208, "step": 20100 }, { "epoch": 0.14865024688802814, "grad_norm": 0.08654297143220901, "learning_rate": 4.2725397673314345e-05, "loss": 0.021, "step": 20110 }, { "epoch": 0.14872416545933, "grad_norm": 0.0885516032576561, "learning_rate": 4.2721688034188034e-05, "loss": 0.0203, "step": 20120 }, { "epoch": 0.14879808403063186, "grad_norm": 0.05938749387860298, "learning_rate": 4.271797839506173e-05, "loss": 0.0185, "step": 20130 }, { "epoch": 0.1488720026019337, "grad_norm": 0.10479523986577988, "learning_rate": 4.2714268755935426e-05, "loss": 0.0201, "step": 20140 }, { "epoch": 0.14894592117323557, "grad_norm": 0.07493485510349274, "learning_rate": 4.2710559116809115e-05, "loss": 0.0178, "step": 20150 }, { "epoch": 0.14901983974453742, "grad_norm": 0.09768588840961456, "learning_rate": 4.270684947768282e-05, "loss": 0.0199, "step": 20160 }, { "epoch": 0.14909375831583926, "grad_norm": 0.08659044653177261, "learning_rate": 4.270313983855651e-05, "loss": 0.0183, "step": 20170 }, { "epoch": 0.14916767688714114, "grad_norm": 0.09599784016609192, "learning_rate": 4.26994301994302e-05, "loss": 0.021, "step": 20180 }, { "epoch": 0.14924159545844298, "grad_norm": 0.0763728991150856, "learning_rate": 4.26957205603039e-05, "loss": 0.0197, "step": 20190 }, { "epoch": 0.14931551402974483, "grad_norm": 0.09369050711393356, "learning_rate": 4.269201092117759e-05, "loss": 0.0204, "step": 20200 }, { "epoch": 0.14938943260104667, "grad_norm": 0.11977594345808029, "learning_rate": 4.2688301282051284e-05, "loss": 0.0208, "step": 20210 }, { "epoch": 0.14946335117234855, "grad_norm": 0.07575426250696182, "learning_rate": 4.268459164292497e-05, "loss": 0.0189, "step": 20220 }, { "epoch": 0.1495372697436504, "grad_norm": 0.08216328918933868, "learning_rate": 4.268088200379867e-05, "loss": 0.0202, "step": 20230 }, { "epoch": 0.14961118831495224, "grad_norm": 0.08688732981681824, "learning_rate": 4.267717236467237e-05, "loss": 0.0185, "step": 20240 }, { "epoch": 0.1496851068862541, "grad_norm": 0.0954701155424118, "learning_rate": 4.267346272554606e-05, "loss": 0.0207, "step": 20250 }, { "epoch": 0.14975902545755596, "grad_norm": 0.1011623814702034, "learning_rate": 4.266975308641976e-05, "loss": 0.0184, "step": 20260 }, { "epoch": 0.1498329440288578, "grad_norm": 0.10412371158599854, "learning_rate": 4.2666043447293446e-05, "loss": 0.0187, "step": 20270 }, { "epoch": 0.14990686260015967, "grad_norm": 0.08750808238983154, "learning_rate": 4.266233380816714e-05, "loss": 0.0203, "step": 20280 }, { "epoch": 0.14998078117146152, "grad_norm": 0.09496186673641205, "learning_rate": 4.265862416904084e-05, "loss": 0.0191, "step": 20290 }, { "epoch": 0.15005469974276336, "grad_norm": 0.10010498762130737, "learning_rate": 4.265491452991453e-05, "loss": 0.0228, "step": 20300 }, { "epoch": 0.15012861831406524, "grad_norm": 0.08491216599941254, "learning_rate": 4.265120489078823e-05, "loss": 0.017, "step": 20310 }, { "epoch": 0.15020253688536708, "grad_norm": 0.08513123542070389, "learning_rate": 4.2647495251661926e-05, "loss": 0.0191, "step": 20320 }, { "epoch": 0.15027645545666893, "grad_norm": 0.0948261246085167, "learning_rate": 4.2643785612535615e-05, "loss": 0.0194, "step": 20330 }, { "epoch": 0.1503503740279708, "grad_norm": 0.08445029705762863, "learning_rate": 4.264007597340931e-05, "loss": 0.02, "step": 20340 }, { "epoch": 0.15042429259927265, "grad_norm": 0.08538064360618591, "learning_rate": 4.2636366334283e-05, "loss": 0.0191, "step": 20350 }, { "epoch": 0.1504982111705745, "grad_norm": 0.10733731836080551, "learning_rate": 4.2632656695156696e-05, "loss": 0.0215, "step": 20360 }, { "epoch": 0.15057212974187634, "grad_norm": 0.10469716787338257, "learning_rate": 4.262894705603039e-05, "loss": 0.0215, "step": 20370 }, { "epoch": 0.1506460483131782, "grad_norm": 0.10288316756486893, "learning_rate": 4.262523741690408e-05, "loss": 0.0189, "step": 20380 }, { "epoch": 0.15071996688448006, "grad_norm": 0.08677443116903305, "learning_rate": 4.2621527777777784e-05, "loss": 0.0188, "step": 20390 }, { "epoch": 0.1507938854557819, "grad_norm": 0.10645607113838196, "learning_rate": 4.261781813865147e-05, "loss": 0.0209, "step": 20400 }, { "epoch": 0.15086780402708377, "grad_norm": 0.07837022095918655, "learning_rate": 4.261410849952517e-05, "loss": 0.0173, "step": 20410 }, { "epoch": 0.15094172259838562, "grad_norm": 0.1119476780295372, "learning_rate": 4.2610398860398865e-05, "loss": 0.0201, "step": 20420 }, { "epoch": 0.15101564116968746, "grad_norm": 0.08979900926351547, "learning_rate": 4.2606689221272554e-05, "loss": 0.0183, "step": 20430 }, { "epoch": 0.15108955974098934, "grad_norm": 0.07695356011390686, "learning_rate": 4.260297958214625e-05, "loss": 0.0227, "step": 20440 }, { "epoch": 0.15116347831229118, "grad_norm": 0.07926007360219955, "learning_rate": 4.259926994301994e-05, "loss": 0.0207, "step": 20450 }, { "epoch": 0.15123739688359303, "grad_norm": 0.07667418569326401, "learning_rate": 4.259556030389364e-05, "loss": 0.0206, "step": 20460 }, { "epoch": 0.1513113154548949, "grad_norm": 0.09290957450866699, "learning_rate": 4.259185066476734e-05, "loss": 0.0197, "step": 20470 }, { "epoch": 0.15138523402619675, "grad_norm": 0.10091643780469894, "learning_rate": 4.258814102564103e-05, "loss": 0.0222, "step": 20480 }, { "epoch": 0.1514591525974986, "grad_norm": 0.07847228646278381, "learning_rate": 4.2584431386514724e-05, "loss": 0.0197, "step": 20490 }, { "epoch": 0.15153307116880044, "grad_norm": 0.07392504066228867, "learning_rate": 4.258072174738841e-05, "loss": 0.0173, "step": 20500 }, { "epoch": 0.1516069897401023, "grad_norm": 0.09294796735048294, "learning_rate": 4.257701210826211e-05, "loss": 0.018, "step": 20510 }, { "epoch": 0.15168090831140416, "grad_norm": 0.09442655742168427, "learning_rate": 4.2573302469135805e-05, "loss": 0.019, "step": 20520 }, { "epoch": 0.151754826882706, "grad_norm": 0.10183858871459961, "learning_rate": 4.2569592830009494e-05, "loss": 0.0201, "step": 20530 }, { "epoch": 0.15182874545400787, "grad_norm": 0.09779280424118042, "learning_rate": 4.2565883190883197e-05, "loss": 0.0193, "step": 20540 }, { "epoch": 0.15190266402530972, "grad_norm": 0.08100099116563797, "learning_rate": 4.256217355175689e-05, "loss": 0.0187, "step": 20550 }, { "epoch": 0.15197658259661156, "grad_norm": 0.0783228799700737, "learning_rate": 4.255846391263058e-05, "loss": 0.0186, "step": 20560 }, { "epoch": 0.15205050116791344, "grad_norm": 0.08579540997743607, "learning_rate": 4.255475427350428e-05, "loss": 0.0218, "step": 20570 }, { "epoch": 0.15212441973921528, "grad_norm": 0.09593216329813004, "learning_rate": 4.255104463437797e-05, "loss": 0.0192, "step": 20580 }, { "epoch": 0.15219833831051713, "grad_norm": 0.11332932114601135, "learning_rate": 4.254733499525166e-05, "loss": 0.0196, "step": 20590 }, { "epoch": 0.152272256881819, "grad_norm": 0.08314672857522964, "learning_rate": 4.254362535612536e-05, "loss": 0.0228, "step": 20600 }, { "epoch": 0.15234617545312085, "grad_norm": 0.08538781851530075, "learning_rate": 4.2539915716999055e-05, "loss": 0.0205, "step": 20610 }, { "epoch": 0.1524200940244227, "grad_norm": 0.12145442515611649, "learning_rate": 4.253620607787275e-05, "loss": 0.0228, "step": 20620 }, { "epoch": 0.15249401259572454, "grad_norm": 0.08186414837837219, "learning_rate": 4.253249643874644e-05, "loss": 0.0184, "step": 20630 }, { "epoch": 0.1525679311670264, "grad_norm": 0.101750448346138, "learning_rate": 4.2528786799620136e-05, "loss": 0.0198, "step": 20640 }, { "epoch": 0.15264184973832826, "grad_norm": 0.08295189589262009, "learning_rate": 4.252507716049383e-05, "loss": 0.0182, "step": 20650 }, { "epoch": 0.1527157683096301, "grad_norm": 0.1188788115978241, "learning_rate": 4.252136752136752e-05, "loss": 0.0182, "step": 20660 }, { "epoch": 0.15278968688093197, "grad_norm": 0.06974223256111145, "learning_rate": 4.251765788224122e-05, "loss": 0.0215, "step": 20670 }, { "epoch": 0.15286360545223382, "grad_norm": 0.08505337685346603, "learning_rate": 4.2513948243114906e-05, "loss": 0.0206, "step": 20680 }, { "epoch": 0.15293752402353566, "grad_norm": 0.0816953107714653, "learning_rate": 4.251023860398861e-05, "loss": 0.0186, "step": 20690 }, { "epoch": 0.15301144259483754, "grad_norm": 0.08393016457557678, "learning_rate": 4.2506528964862305e-05, "loss": 0.0188, "step": 20700 }, { "epoch": 0.15308536116613938, "grad_norm": 0.09978709369897842, "learning_rate": 4.2502819325735994e-05, "loss": 0.0194, "step": 20710 }, { "epoch": 0.15315927973744123, "grad_norm": 0.08010877668857574, "learning_rate": 4.249910968660969e-05, "loss": 0.0204, "step": 20720 }, { "epoch": 0.1532331983087431, "grad_norm": 0.08997316658496857, "learning_rate": 4.249540004748338e-05, "loss": 0.0199, "step": 20730 }, { "epoch": 0.15330711688004495, "grad_norm": 0.1026889905333519, "learning_rate": 4.2491690408357075e-05, "loss": 0.0194, "step": 20740 }, { "epoch": 0.1533810354513468, "grad_norm": 0.10672967880964279, "learning_rate": 4.248798076923077e-05, "loss": 0.024, "step": 20750 }, { "epoch": 0.15345495402264864, "grad_norm": 0.09047634899616241, "learning_rate": 4.248427113010447e-05, "loss": 0.0199, "step": 20760 }, { "epoch": 0.1535288725939505, "grad_norm": 0.07985531538724899, "learning_rate": 4.248056149097816e-05, "loss": 0.0207, "step": 20770 }, { "epoch": 0.15360279116525236, "grad_norm": 0.10958772897720337, "learning_rate": 4.247685185185186e-05, "loss": 0.0197, "step": 20780 }, { "epoch": 0.1536767097365542, "grad_norm": 0.10120689123868942, "learning_rate": 4.247314221272555e-05, "loss": 0.021, "step": 20790 }, { "epoch": 0.15375062830785607, "grad_norm": 0.0742512047290802, "learning_rate": 4.2469432573599244e-05, "loss": 0.0196, "step": 20800 }, { "epoch": 0.15382454687915792, "grad_norm": 0.08500395715236664, "learning_rate": 4.2465722934472933e-05, "loss": 0.0203, "step": 20810 }, { "epoch": 0.15389846545045976, "grad_norm": 0.08425117284059525, "learning_rate": 4.246201329534663e-05, "loss": 0.0206, "step": 20820 }, { "epoch": 0.15397238402176164, "grad_norm": 0.09172620624303818, "learning_rate": 4.2458303656220325e-05, "loss": 0.0215, "step": 20830 }, { "epoch": 0.15404630259306348, "grad_norm": 0.08757595717906952, "learning_rate": 4.245459401709402e-05, "loss": 0.0174, "step": 20840 }, { "epoch": 0.15412022116436533, "grad_norm": 0.060954876244068146, "learning_rate": 4.245088437796772e-05, "loss": 0.0202, "step": 20850 }, { "epoch": 0.1541941397356672, "grad_norm": 0.10628701001405716, "learning_rate": 4.2447174738841407e-05, "loss": 0.0176, "step": 20860 }, { "epoch": 0.15426805830696905, "grad_norm": 0.08656299114227295, "learning_rate": 4.24434650997151e-05, "loss": 0.0207, "step": 20870 }, { "epoch": 0.1543419768782709, "grad_norm": 0.1467808336019516, "learning_rate": 4.24397554605888e-05, "loss": 0.0179, "step": 20880 }, { "epoch": 0.15441589544957274, "grad_norm": 0.0891091376543045, "learning_rate": 4.243604582146249e-05, "loss": 0.02, "step": 20890 }, { "epoch": 0.1544898140208746, "grad_norm": 0.08970539271831512, "learning_rate": 4.2432336182336184e-05, "loss": 0.02, "step": 20900 }, { "epoch": 0.15456373259217646, "grad_norm": 0.07861539721488953, "learning_rate": 4.242862654320987e-05, "loss": 0.0222, "step": 20910 }, { "epoch": 0.1546376511634783, "grad_norm": 0.09449228644371033, "learning_rate": 4.2424916904083576e-05, "loss": 0.0195, "step": 20920 }, { "epoch": 0.15471156973478017, "grad_norm": 0.10891766846179962, "learning_rate": 4.242120726495727e-05, "loss": 0.02, "step": 20930 }, { "epoch": 0.15478548830608202, "grad_norm": 0.08089979737997055, "learning_rate": 4.241749762583096e-05, "loss": 0.0209, "step": 20940 }, { "epoch": 0.15485940687738387, "grad_norm": 0.08476491272449493, "learning_rate": 4.241378798670466e-05, "loss": 0.021, "step": 20950 }, { "epoch": 0.15493332544868574, "grad_norm": 0.0795905813574791, "learning_rate": 4.2410078347578346e-05, "loss": 0.0222, "step": 20960 }, { "epoch": 0.15500724401998758, "grad_norm": 0.11419697105884552, "learning_rate": 4.240636870845204e-05, "loss": 0.0208, "step": 20970 }, { "epoch": 0.15508116259128943, "grad_norm": 0.08529966324567795, "learning_rate": 4.240265906932574e-05, "loss": 0.0201, "step": 20980 }, { "epoch": 0.1551550811625913, "grad_norm": 0.07745010405778885, "learning_rate": 4.2398949430199434e-05, "loss": 0.019, "step": 20990 }, { "epoch": 0.15522899973389315, "grad_norm": 0.10719358175992966, "learning_rate": 4.239523979107313e-05, "loss": 0.0205, "step": 21000 }, { "epoch": 0.155302918305195, "grad_norm": 0.09493697434663773, "learning_rate": 4.2391530151946826e-05, "loss": 0.0203, "step": 21010 }, { "epoch": 0.15537683687649684, "grad_norm": 0.09876014292240143, "learning_rate": 4.2387820512820515e-05, "loss": 0.0203, "step": 21020 }, { "epoch": 0.1554507554477987, "grad_norm": 0.0771779790520668, "learning_rate": 4.238411087369421e-05, "loss": 0.0177, "step": 21030 }, { "epoch": 0.15552467401910056, "grad_norm": 0.07993631809949875, "learning_rate": 4.23804012345679e-05, "loss": 0.0184, "step": 21040 }, { "epoch": 0.1555985925904024, "grad_norm": 0.07111372798681259, "learning_rate": 4.2376691595441596e-05, "loss": 0.0203, "step": 21050 }, { "epoch": 0.15567251116170427, "grad_norm": 0.07143562287092209, "learning_rate": 4.237298195631529e-05, "loss": 0.0205, "step": 21060 }, { "epoch": 0.15574642973300612, "grad_norm": 0.11168545484542847, "learning_rate": 4.236927231718899e-05, "loss": 0.0222, "step": 21070 }, { "epoch": 0.15582034830430797, "grad_norm": 0.08068099617958069, "learning_rate": 4.2365562678062684e-05, "loss": 0.0204, "step": 21080 }, { "epoch": 0.15589426687560984, "grad_norm": 0.10373964160680771, "learning_rate": 4.236185303893637e-05, "loss": 0.0172, "step": 21090 }, { "epoch": 0.15596818544691168, "grad_norm": 0.11680703610181808, "learning_rate": 4.235814339981007e-05, "loss": 0.0236, "step": 21100 }, { "epoch": 0.15604210401821353, "grad_norm": 0.09924782812595367, "learning_rate": 4.2354433760683765e-05, "loss": 0.0208, "step": 21110 }, { "epoch": 0.1561160225895154, "grad_norm": 0.07073837518692017, "learning_rate": 4.2350724121557454e-05, "loss": 0.0179, "step": 21120 }, { "epoch": 0.15618994116081725, "grad_norm": 0.09141246229410172, "learning_rate": 4.234701448243115e-05, "loss": 0.019, "step": 21130 }, { "epoch": 0.1562638597321191, "grad_norm": 0.1136874258518219, "learning_rate": 4.2343304843304846e-05, "loss": 0.0198, "step": 21140 }, { "epoch": 0.15633777830342094, "grad_norm": 0.07442887872457504, "learning_rate": 4.233959520417854e-05, "loss": 0.0186, "step": 21150 }, { "epoch": 0.1564116968747228, "grad_norm": 0.09873552620410919, "learning_rate": 4.233588556505224e-05, "loss": 0.0184, "step": 21160 }, { "epoch": 0.15648561544602466, "grad_norm": 0.09348463267087936, "learning_rate": 4.233217592592593e-05, "loss": 0.0192, "step": 21170 }, { "epoch": 0.1565595340173265, "grad_norm": 0.09827948361635208, "learning_rate": 4.232846628679962e-05, "loss": 0.0206, "step": 21180 }, { "epoch": 0.15663345258862837, "grad_norm": 0.09134156256914139, "learning_rate": 4.232475664767331e-05, "loss": 0.021, "step": 21190 }, { "epoch": 0.15670737115993022, "grad_norm": 0.09025607258081436, "learning_rate": 4.232104700854701e-05, "loss": 0.0205, "step": 21200 }, { "epoch": 0.15678128973123207, "grad_norm": 0.09077123552560806, "learning_rate": 4.2317337369420704e-05, "loss": 0.0223, "step": 21210 }, { "epoch": 0.15685520830253394, "grad_norm": 0.07134833931922913, "learning_rate": 4.23136277302944e-05, "loss": 0.0167, "step": 21220 }, { "epoch": 0.15692912687383578, "grad_norm": 0.10388438403606415, "learning_rate": 4.2309918091168096e-05, "loss": 0.0239, "step": 21230 }, { "epoch": 0.15700304544513763, "grad_norm": 0.11611325293779373, "learning_rate": 4.230620845204179e-05, "loss": 0.0229, "step": 21240 }, { "epoch": 0.1570769640164395, "grad_norm": 0.0772848129272461, "learning_rate": 4.230249881291548e-05, "loss": 0.019, "step": 21250 }, { "epoch": 0.15715088258774135, "grad_norm": 0.10972341895103455, "learning_rate": 4.229878917378918e-05, "loss": 0.0206, "step": 21260 }, { "epoch": 0.1572248011590432, "grad_norm": 0.08381499350070953, "learning_rate": 4.229507953466287e-05, "loss": 0.0209, "step": 21270 }, { "epoch": 0.15729871973034507, "grad_norm": 0.10344956070184708, "learning_rate": 4.229136989553656e-05, "loss": 0.0227, "step": 21280 }, { "epoch": 0.1573726383016469, "grad_norm": 0.08652588725090027, "learning_rate": 4.228766025641026e-05, "loss": 0.0205, "step": 21290 }, { "epoch": 0.15744655687294876, "grad_norm": 0.09220878779888153, "learning_rate": 4.2283950617283955e-05, "loss": 0.0222, "step": 21300 }, { "epoch": 0.1575204754442506, "grad_norm": 0.111487478017807, "learning_rate": 4.228024097815765e-05, "loss": 0.0196, "step": 21310 }, { "epoch": 0.15759439401555247, "grad_norm": 0.09026884287595749, "learning_rate": 4.227653133903134e-05, "loss": 0.0211, "step": 21320 }, { "epoch": 0.15766831258685432, "grad_norm": 0.10143148899078369, "learning_rate": 4.2272821699905036e-05, "loss": 0.0196, "step": 21330 }, { "epoch": 0.15774223115815617, "grad_norm": 0.07933638989925385, "learning_rate": 4.226911206077873e-05, "loss": 0.0177, "step": 21340 }, { "epoch": 0.15781614972945804, "grad_norm": 0.0977751761674881, "learning_rate": 4.226540242165242e-05, "loss": 0.0199, "step": 21350 }, { "epoch": 0.15789006830075988, "grad_norm": 0.09351316839456558, "learning_rate": 4.226169278252612e-05, "loss": 0.0209, "step": 21360 }, { "epoch": 0.15796398687206173, "grad_norm": 0.0827011987566948, "learning_rate": 4.225798314339981e-05, "loss": 0.0178, "step": 21370 }, { "epoch": 0.1580379054433636, "grad_norm": 0.0982867032289505, "learning_rate": 4.225427350427351e-05, "loss": 0.0216, "step": 21380 }, { "epoch": 0.15811182401466545, "grad_norm": 0.0931718572974205, "learning_rate": 4.2250563865147205e-05, "loss": 0.0185, "step": 21390 }, { "epoch": 0.1581857425859673, "grad_norm": 0.08066631108522415, "learning_rate": 4.2246854226020894e-05, "loss": 0.0217, "step": 21400 }, { "epoch": 0.15825966115726917, "grad_norm": 0.09623356908559799, "learning_rate": 4.224314458689459e-05, "loss": 0.017, "step": 21410 }, { "epoch": 0.158333579728571, "grad_norm": 0.0713859349489212, "learning_rate": 4.223943494776828e-05, "loss": 0.0191, "step": 21420 }, { "epoch": 0.15840749829987286, "grad_norm": 0.08927904069423676, "learning_rate": 4.2235725308641975e-05, "loss": 0.0205, "step": 21430 }, { "epoch": 0.1584814168711747, "grad_norm": 0.0775395855307579, "learning_rate": 4.223201566951567e-05, "loss": 0.0208, "step": 21440 }, { "epoch": 0.15855533544247657, "grad_norm": 0.08013869822025299, "learning_rate": 4.222830603038937e-05, "loss": 0.0172, "step": 21450 }, { "epoch": 0.15862925401377842, "grad_norm": 0.0839141458272934, "learning_rate": 4.222459639126306e-05, "loss": 0.0229, "step": 21460 }, { "epoch": 0.15870317258508027, "grad_norm": 0.09782402217388153, "learning_rate": 4.222088675213676e-05, "loss": 0.0194, "step": 21470 }, { "epoch": 0.15877709115638214, "grad_norm": 0.10169842839241028, "learning_rate": 4.221717711301045e-05, "loss": 0.0204, "step": 21480 }, { "epoch": 0.15885100972768398, "grad_norm": 0.08428753167390823, "learning_rate": 4.2213467473884144e-05, "loss": 0.0172, "step": 21490 }, { "epoch": 0.15892492829898583, "grad_norm": 0.08111728727817535, "learning_rate": 4.220975783475783e-05, "loss": 0.019, "step": 21500 }, { "epoch": 0.1589988468702877, "grad_norm": 0.08612488210201263, "learning_rate": 4.220604819563153e-05, "loss": 0.0186, "step": 21510 }, { "epoch": 0.15907276544158955, "grad_norm": 0.08112023025751114, "learning_rate": 4.2202338556505225e-05, "loss": 0.0183, "step": 21520 }, { "epoch": 0.1591466840128914, "grad_norm": 0.08562341332435608, "learning_rate": 4.219862891737892e-05, "loss": 0.0213, "step": 21530 }, { "epoch": 0.15922060258419327, "grad_norm": 0.09395560622215271, "learning_rate": 4.219491927825262e-05, "loss": 0.0209, "step": 21540 }, { "epoch": 0.1592945211554951, "grad_norm": 0.09960179030895233, "learning_rate": 4.2191209639126306e-05, "loss": 0.0202, "step": 21550 }, { "epoch": 0.15936843972679696, "grad_norm": 0.10689588636159897, "learning_rate": 4.21875e-05, "loss": 0.0214, "step": 21560 }, { "epoch": 0.1594423582980988, "grad_norm": 0.11698466539382935, "learning_rate": 4.21837903608737e-05, "loss": 0.0194, "step": 21570 }, { "epoch": 0.15951627686940067, "grad_norm": 0.059294432401657104, "learning_rate": 4.218008072174739e-05, "loss": 0.0219, "step": 21580 }, { "epoch": 0.15959019544070252, "grad_norm": 0.07105332612991333, "learning_rate": 4.217637108262108e-05, "loss": 0.0184, "step": 21590 }, { "epoch": 0.15966411401200437, "grad_norm": 0.09126646816730499, "learning_rate": 4.217266144349478e-05, "loss": 0.0208, "step": 21600 }, { "epoch": 0.15973803258330624, "grad_norm": 0.08969204127788544, "learning_rate": 4.2168951804368475e-05, "loss": 0.0236, "step": 21610 }, { "epoch": 0.15981195115460808, "grad_norm": 0.0936492457985878, "learning_rate": 4.216524216524217e-05, "loss": 0.0194, "step": 21620 }, { "epoch": 0.15988586972590993, "grad_norm": 0.10618958622217178, "learning_rate": 4.216153252611586e-05, "loss": 0.0201, "step": 21630 }, { "epoch": 0.1599597882972118, "grad_norm": 0.09264793246984482, "learning_rate": 4.2157822886989556e-05, "loss": 0.0188, "step": 21640 }, { "epoch": 0.16003370686851365, "grad_norm": 0.0832374095916748, "learning_rate": 4.2154113247863246e-05, "loss": 0.0185, "step": 21650 }, { "epoch": 0.1601076254398155, "grad_norm": 0.06740816682577133, "learning_rate": 4.215040360873694e-05, "loss": 0.0189, "step": 21660 }, { "epoch": 0.16018154401111737, "grad_norm": 0.094356968998909, "learning_rate": 4.214669396961064e-05, "loss": 0.0202, "step": 21670 }, { "epoch": 0.1602554625824192, "grad_norm": 0.08573713898658752, "learning_rate": 4.2142984330484334e-05, "loss": 0.0187, "step": 21680 }, { "epoch": 0.16032938115372106, "grad_norm": 0.10325209051370621, "learning_rate": 4.213927469135803e-05, "loss": 0.0224, "step": 21690 }, { "epoch": 0.1604032997250229, "grad_norm": 0.08298230916261673, "learning_rate": 4.2135565052231725e-05, "loss": 0.0197, "step": 21700 }, { "epoch": 0.16047721829632478, "grad_norm": 0.06460912525653839, "learning_rate": 4.2131855413105415e-05, "loss": 0.02, "step": 21710 }, { "epoch": 0.16055113686762662, "grad_norm": 0.08635788410902023, "learning_rate": 4.212814577397911e-05, "loss": 0.0202, "step": 21720 }, { "epoch": 0.16062505543892847, "grad_norm": 0.07899269461631775, "learning_rate": 4.21244361348528e-05, "loss": 0.0214, "step": 21730 }, { "epoch": 0.16069897401023034, "grad_norm": 0.10716480016708374, "learning_rate": 4.2120726495726496e-05, "loss": 0.0219, "step": 21740 }, { "epoch": 0.16077289258153218, "grad_norm": 0.07382979989051819, "learning_rate": 4.211701685660019e-05, "loss": 0.0176, "step": 21750 }, { "epoch": 0.16084681115283403, "grad_norm": 0.08749647438526154, "learning_rate": 4.211330721747389e-05, "loss": 0.0221, "step": 21760 }, { "epoch": 0.1609207297241359, "grad_norm": 0.08402451872825623, "learning_rate": 4.2109597578347584e-05, "loss": 0.0182, "step": 21770 }, { "epoch": 0.16099464829543775, "grad_norm": 0.09932088106870651, "learning_rate": 4.210588793922127e-05, "loss": 0.0216, "step": 21780 }, { "epoch": 0.1610685668667396, "grad_norm": 0.1020146980881691, "learning_rate": 4.210217830009497e-05, "loss": 0.0191, "step": 21790 }, { "epoch": 0.16114248543804147, "grad_norm": 0.07757379114627838, "learning_rate": 4.2098468660968665e-05, "loss": 0.0177, "step": 21800 }, { "epoch": 0.1612164040093433, "grad_norm": 0.09609977900981903, "learning_rate": 4.2094759021842354e-05, "loss": 0.0182, "step": 21810 }, { "epoch": 0.16129032258064516, "grad_norm": 0.11304876953363419, "learning_rate": 4.209104938271605e-05, "loss": 0.017, "step": 21820 }, { "epoch": 0.161364241151947, "grad_norm": 0.11363095790147781, "learning_rate": 4.2087339743589746e-05, "loss": 0.0219, "step": 21830 }, { "epoch": 0.16143815972324888, "grad_norm": 0.09433922916650772, "learning_rate": 4.208363010446344e-05, "loss": 0.0179, "step": 21840 }, { "epoch": 0.16151207829455072, "grad_norm": 0.0955984964966774, "learning_rate": 4.207992046533714e-05, "loss": 0.0195, "step": 21850 }, { "epoch": 0.16158599686585257, "grad_norm": 0.09357141703367233, "learning_rate": 4.207621082621083e-05, "loss": 0.0198, "step": 21860 }, { "epoch": 0.16165991543715444, "grad_norm": 0.08176647871732712, "learning_rate": 4.207250118708452e-05, "loss": 0.0175, "step": 21870 }, { "epoch": 0.16173383400845628, "grad_norm": 0.10562469065189362, "learning_rate": 4.206879154795821e-05, "loss": 0.0197, "step": 21880 }, { "epoch": 0.16180775257975813, "grad_norm": 0.10479024797677994, "learning_rate": 4.206508190883191e-05, "loss": 0.0205, "step": 21890 }, { "epoch": 0.16188167115106, "grad_norm": 0.11500013619661331, "learning_rate": 4.2061372269705604e-05, "loss": 0.0208, "step": 21900 }, { "epoch": 0.16195558972236185, "grad_norm": 0.0811997801065445, "learning_rate": 4.20576626305793e-05, "loss": 0.0186, "step": 21910 }, { "epoch": 0.1620295082936637, "grad_norm": 0.08326699584722519, "learning_rate": 4.2053952991452996e-05, "loss": 0.0182, "step": 21920 }, { "epoch": 0.16210342686496557, "grad_norm": 0.08639881014823914, "learning_rate": 4.205024335232669e-05, "loss": 0.0179, "step": 21930 }, { "epoch": 0.1621773454362674, "grad_norm": 0.08141583949327469, "learning_rate": 4.204653371320038e-05, "loss": 0.0178, "step": 21940 }, { "epoch": 0.16225126400756926, "grad_norm": 0.12498077005147934, "learning_rate": 4.204282407407408e-05, "loss": 0.0223, "step": 21950 }, { "epoch": 0.1623251825788711, "grad_norm": 0.09967196732759476, "learning_rate": 4.2039114434947766e-05, "loss": 0.0201, "step": 21960 }, { "epoch": 0.16239910115017298, "grad_norm": 0.1027727872133255, "learning_rate": 4.203540479582146e-05, "loss": 0.0209, "step": 21970 }, { "epoch": 0.16247301972147482, "grad_norm": 0.08557023853063583, "learning_rate": 4.203169515669516e-05, "loss": 0.0185, "step": 21980 }, { "epoch": 0.16254693829277667, "grad_norm": 0.09070567041635513, "learning_rate": 4.2027985517568854e-05, "loss": 0.0184, "step": 21990 }, { "epoch": 0.16262085686407854, "grad_norm": 0.11034265905618668, "learning_rate": 4.202427587844255e-05, "loss": 0.0205, "step": 22000 }, { "epoch": 0.16269477543538038, "grad_norm": 0.07222605496644974, "learning_rate": 4.202056623931624e-05, "loss": 0.0207, "step": 22010 }, { "epoch": 0.16276869400668223, "grad_norm": 0.09490154683589935, "learning_rate": 4.2016856600189935e-05, "loss": 0.0174, "step": 22020 }, { "epoch": 0.1628426125779841, "grad_norm": 0.1052534207701683, "learning_rate": 4.201314696106363e-05, "loss": 0.0205, "step": 22030 }, { "epoch": 0.16291653114928595, "grad_norm": 0.06723055988550186, "learning_rate": 4.200943732193732e-05, "loss": 0.0203, "step": 22040 }, { "epoch": 0.1629904497205878, "grad_norm": 0.06851140409708023, "learning_rate": 4.2005727682811017e-05, "loss": 0.0207, "step": 22050 }, { "epoch": 0.16306436829188967, "grad_norm": 0.0777968242764473, "learning_rate": 4.200201804368471e-05, "loss": 0.0218, "step": 22060 }, { "epoch": 0.1631382868631915, "grad_norm": 0.07410554587841034, "learning_rate": 4.199830840455841e-05, "loss": 0.0212, "step": 22070 }, { "epoch": 0.16321220543449336, "grad_norm": 0.10092426836490631, "learning_rate": 4.1994598765432104e-05, "loss": 0.0203, "step": 22080 }, { "epoch": 0.1632861240057952, "grad_norm": 0.10312744975090027, "learning_rate": 4.1990889126305794e-05, "loss": 0.0192, "step": 22090 }, { "epoch": 0.16336004257709708, "grad_norm": 0.09843014925718307, "learning_rate": 4.198717948717949e-05, "loss": 0.0214, "step": 22100 }, { "epoch": 0.16343396114839892, "grad_norm": 0.0896533951163292, "learning_rate": 4.198346984805318e-05, "loss": 0.0198, "step": 22110 }, { "epoch": 0.16350787971970077, "grad_norm": 0.09775464981794357, "learning_rate": 4.1979760208926875e-05, "loss": 0.0197, "step": 22120 }, { "epoch": 0.16358179829100264, "grad_norm": 0.11483138799667358, "learning_rate": 4.197605056980057e-05, "loss": 0.017, "step": 22130 }, { "epoch": 0.16365571686230448, "grad_norm": 0.08392385393381119, "learning_rate": 4.197234093067427e-05, "loss": 0.0196, "step": 22140 }, { "epoch": 0.16372963543360633, "grad_norm": 0.07911352068185806, "learning_rate": 4.196863129154796e-05, "loss": 0.0201, "step": 22150 }, { "epoch": 0.1638035540049082, "grad_norm": 0.08723487704992294, "learning_rate": 4.196492165242166e-05, "loss": 0.0217, "step": 22160 }, { "epoch": 0.16387747257621005, "grad_norm": 0.08294124156236649, "learning_rate": 4.196121201329535e-05, "loss": 0.017, "step": 22170 }, { "epoch": 0.1639513911475119, "grad_norm": 0.08533074706792831, "learning_rate": 4.1957502374169044e-05, "loss": 0.0195, "step": 22180 }, { "epoch": 0.16402530971881377, "grad_norm": 0.11008051037788391, "learning_rate": 4.195379273504273e-05, "loss": 0.0189, "step": 22190 }, { "epoch": 0.1640992282901156, "grad_norm": 0.08344793319702148, "learning_rate": 4.195008309591643e-05, "loss": 0.0194, "step": 22200 }, { "epoch": 0.16417314686141746, "grad_norm": 0.0651671513915062, "learning_rate": 4.1946373456790125e-05, "loss": 0.0202, "step": 22210 }, { "epoch": 0.16424706543271933, "grad_norm": 0.06644406169652939, "learning_rate": 4.194266381766382e-05, "loss": 0.0201, "step": 22220 }, { "epoch": 0.16432098400402118, "grad_norm": 0.08951295912265778, "learning_rate": 4.193895417853752e-05, "loss": 0.0175, "step": 22230 }, { "epoch": 0.16439490257532302, "grad_norm": 0.09983833134174347, "learning_rate": 4.1935244539411206e-05, "loss": 0.0187, "step": 22240 }, { "epoch": 0.16446882114662487, "grad_norm": 0.08202794194221497, "learning_rate": 4.19315349002849e-05, "loss": 0.019, "step": 22250 }, { "epoch": 0.16454273971792674, "grad_norm": 0.10202916711568832, "learning_rate": 4.19278252611586e-05, "loss": 0.0193, "step": 22260 }, { "epoch": 0.16461665828922858, "grad_norm": 0.09498098492622375, "learning_rate": 4.192411562203229e-05, "loss": 0.0206, "step": 22270 }, { "epoch": 0.16469057686053043, "grad_norm": 0.07276278734207153, "learning_rate": 4.192040598290598e-05, "loss": 0.0216, "step": 22280 }, { "epoch": 0.1647644954318323, "grad_norm": 0.09117782860994339, "learning_rate": 4.191669634377968e-05, "loss": 0.0194, "step": 22290 }, { "epoch": 0.16483841400313415, "grad_norm": 0.08927126228809357, "learning_rate": 4.1912986704653375e-05, "loss": 0.0178, "step": 22300 }, { "epoch": 0.164912332574436, "grad_norm": 0.09630057960748672, "learning_rate": 4.190927706552707e-05, "loss": 0.0189, "step": 22310 }, { "epoch": 0.16498625114573787, "grad_norm": 0.0837571918964386, "learning_rate": 4.190556742640076e-05, "loss": 0.0204, "step": 22320 }, { "epoch": 0.1650601697170397, "grad_norm": 0.1012878343462944, "learning_rate": 4.1901857787274456e-05, "loss": 0.0198, "step": 22330 }, { "epoch": 0.16513408828834156, "grad_norm": 0.11893408745527267, "learning_rate": 4.1898148148148145e-05, "loss": 0.019, "step": 22340 }, { "epoch": 0.16520800685964343, "grad_norm": 0.07912056893110275, "learning_rate": 4.189443850902184e-05, "loss": 0.0191, "step": 22350 }, { "epoch": 0.16528192543094528, "grad_norm": 0.11213517934083939, "learning_rate": 4.189072886989554e-05, "loss": 0.0196, "step": 22360 }, { "epoch": 0.16535584400224712, "grad_norm": 0.08836356550455093, "learning_rate": 4.188701923076923e-05, "loss": 0.0166, "step": 22370 }, { "epoch": 0.16542976257354897, "grad_norm": 0.07143910974264145, "learning_rate": 4.188330959164293e-05, "loss": 0.0226, "step": 22380 }, { "epoch": 0.16550368114485084, "grad_norm": 0.08796315640211105, "learning_rate": 4.1879599952516625e-05, "loss": 0.0223, "step": 22390 }, { "epoch": 0.16557759971615268, "grad_norm": 0.10248327255249023, "learning_rate": 4.1875890313390314e-05, "loss": 0.0219, "step": 22400 }, { "epoch": 0.16565151828745453, "grad_norm": 0.10602504760026932, "learning_rate": 4.187218067426401e-05, "loss": 0.0195, "step": 22410 }, { "epoch": 0.1657254368587564, "grad_norm": 0.087444968521595, "learning_rate": 4.18684710351377e-05, "loss": 0.0204, "step": 22420 }, { "epoch": 0.16579935543005825, "grad_norm": 0.08767131716012955, "learning_rate": 4.1864761396011396e-05, "loss": 0.0208, "step": 22430 }, { "epoch": 0.1658732740013601, "grad_norm": 0.10391967743635178, "learning_rate": 4.186105175688509e-05, "loss": 0.0211, "step": 22440 }, { "epoch": 0.16594719257266197, "grad_norm": 0.06989863514900208, "learning_rate": 4.185734211775879e-05, "loss": 0.0213, "step": 22450 }, { "epoch": 0.1660211111439638, "grad_norm": 0.10912055522203445, "learning_rate": 4.1853632478632483e-05, "loss": 0.0217, "step": 22460 }, { "epoch": 0.16609502971526566, "grad_norm": 0.08249878883361816, "learning_rate": 4.184992283950617e-05, "loss": 0.0189, "step": 22470 }, { "epoch": 0.16616894828656753, "grad_norm": 0.10511092096567154, "learning_rate": 4.184621320037987e-05, "loss": 0.0191, "step": 22480 }, { "epoch": 0.16624286685786938, "grad_norm": 0.11219155788421631, "learning_rate": 4.1842503561253565e-05, "loss": 0.0207, "step": 22490 }, { "epoch": 0.16631678542917122, "grad_norm": 0.09815070778131485, "learning_rate": 4.1838793922127254e-05, "loss": 0.0223, "step": 22500 }, { "epoch": 0.16639070400047307, "grad_norm": 0.12557922303676605, "learning_rate": 4.183508428300095e-05, "loss": 0.0216, "step": 22510 }, { "epoch": 0.16646462257177494, "grad_norm": 0.08924151957035065, "learning_rate": 4.1831374643874646e-05, "loss": 0.0211, "step": 22520 }, { "epoch": 0.16653854114307678, "grad_norm": 0.0938444435596466, "learning_rate": 4.182766500474834e-05, "loss": 0.0178, "step": 22530 }, { "epoch": 0.16661245971437863, "grad_norm": 0.06955837458372116, "learning_rate": 4.182395536562204e-05, "loss": 0.0189, "step": 22540 }, { "epoch": 0.1666863782856805, "grad_norm": 0.09439922124147415, "learning_rate": 4.182024572649573e-05, "loss": 0.0193, "step": 22550 }, { "epoch": 0.16676029685698235, "grad_norm": 0.07448028773069382, "learning_rate": 4.181653608736942e-05, "loss": 0.0218, "step": 22560 }, { "epoch": 0.1668342154282842, "grad_norm": 0.08249770104885101, "learning_rate": 4.181282644824311e-05, "loss": 0.017, "step": 22570 }, { "epoch": 0.16690813399958607, "grad_norm": 0.10910207033157349, "learning_rate": 4.180911680911681e-05, "loss": 0.0185, "step": 22580 }, { "epoch": 0.1669820525708879, "grad_norm": 0.10977750271558762, "learning_rate": 4.1805407169990504e-05, "loss": 0.0187, "step": 22590 }, { "epoch": 0.16705597114218976, "grad_norm": 0.06731917709112167, "learning_rate": 4.18016975308642e-05, "loss": 0.0191, "step": 22600 }, { "epoch": 0.16712988971349163, "grad_norm": 0.09132971614599228, "learning_rate": 4.1797987891737896e-05, "loss": 0.0195, "step": 22610 }, { "epoch": 0.16720380828479348, "grad_norm": 0.09934677183628082, "learning_rate": 4.179427825261159e-05, "loss": 0.019, "step": 22620 }, { "epoch": 0.16727772685609532, "grad_norm": 0.09889721870422363, "learning_rate": 4.179056861348528e-05, "loss": 0.0222, "step": 22630 }, { "epoch": 0.16735164542739717, "grad_norm": 0.09013309329748154, "learning_rate": 4.178685897435898e-05, "loss": 0.0201, "step": 22640 }, { "epoch": 0.16742556399869904, "grad_norm": 0.09726738929748535, "learning_rate": 4.1783149335232666e-05, "loss": 0.0192, "step": 22650 }, { "epoch": 0.16749948257000088, "grad_norm": 0.1414596438407898, "learning_rate": 4.177943969610636e-05, "loss": 0.0186, "step": 22660 }, { "epoch": 0.16757340114130273, "grad_norm": 0.09074228256940842, "learning_rate": 4.177573005698006e-05, "loss": 0.0217, "step": 22670 }, { "epoch": 0.1676473197126046, "grad_norm": 0.0756804347038269, "learning_rate": 4.1772020417853754e-05, "loss": 0.0202, "step": 22680 }, { "epoch": 0.16772123828390645, "grad_norm": 0.08531906455755234, "learning_rate": 4.176831077872745e-05, "loss": 0.0182, "step": 22690 }, { "epoch": 0.1677951568552083, "grad_norm": 0.12203525751829147, "learning_rate": 4.176460113960114e-05, "loss": 0.0202, "step": 22700 }, { "epoch": 0.16786907542651017, "grad_norm": 0.06335960328578949, "learning_rate": 4.1760891500474835e-05, "loss": 0.0228, "step": 22710 }, { "epoch": 0.167942993997812, "grad_norm": 0.060673534870147705, "learning_rate": 4.175718186134853e-05, "loss": 0.0162, "step": 22720 }, { "epoch": 0.16801691256911386, "grad_norm": 0.08812720328569412, "learning_rate": 4.175347222222222e-05, "loss": 0.0203, "step": 22730 }, { "epoch": 0.16809083114041573, "grad_norm": 0.07789555191993713, "learning_rate": 4.1749762583095916e-05, "loss": 0.0182, "step": 22740 }, { "epoch": 0.16816474971171758, "grad_norm": 0.07631140947341919, "learning_rate": 4.174605294396961e-05, "loss": 0.0219, "step": 22750 }, { "epoch": 0.16823866828301942, "grad_norm": 0.08663522452116013, "learning_rate": 4.174234330484331e-05, "loss": 0.0185, "step": 22760 }, { "epoch": 0.16831258685432127, "grad_norm": 0.12132871896028519, "learning_rate": 4.1738633665717004e-05, "loss": 0.0195, "step": 22770 }, { "epoch": 0.16838650542562314, "grad_norm": 0.07200045883655548, "learning_rate": 4.173492402659069e-05, "loss": 0.0196, "step": 22780 }, { "epoch": 0.16846042399692499, "grad_norm": 0.08001714944839478, "learning_rate": 4.173121438746439e-05, "loss": 0.0219, "step": 22790 }, { "epoch": 0.16853434256822683, "grad_norm": 0.07372362911701202, "learning_rate": 4.172750474833808e-05, "loss": 0.0185, "step": 22800 }, { "epoch": 0.1686082611395287, "grad_norm": 0.08221118152141571, "learning_rate": 4.1723795109211774e-05, "loss": 0.0192, "step": 22810 }, { "epoch": 0.16868217971083055, "grad_norm": 0.0778815820813179, "learning_rate": 4.172008547008547e-05, "loss": 0.0196, "step": 22820 }, { "epoch": 0.1687560982821324, "grad_norm": 0.10039696842432022, "learning_rate": 4.1716375830959166e-05, "loss": 0.0243, "step": 22830 }, { "epoch": 0.16883001685343427, "grad_norm": 0.1025981679558754, "learning_rate": 4.171266619183286e-05, "loss": 0.0186, "step": 22840 }, { "epoch": 0.1689039354247361, "grad_norm": 0.07049137353897095, "learning_rate": 4.170895655270656e-05, "loss": 0.0186, "step": 22850 }, { "epoch": 0.16897785399603796, "grad_norm": 0.1277700960636139, "learning_rate": 4.170524691358025e-05, "loss": 0.0171, "step": 22860 }, { "epoch": 0.16905177256733983, "grad_norm": 0.07361399382352829, "learning_rate": 4.1701537274453944e-05, "loss": 0.021, "step": 22870 }, { "epoch": 0.16912569113864168, "grad_norm": 0.07534274458885193, "learning_rate": 4.169782763532763e-05, "loss": 0.021, "step": 22880 }, { "epoch": 0.16919960970994352, "grad_norm": 0.09960392117500305, "learning_rate": 4.169411799620133e-05, "loss": 0.0179, "step": 22890 }, { "epoch": 0.16927352828124537, "grad_norm": 0.08838877826929092, "learning_rate": 4.169040835707503e-05, "loss": 0.0211, "step": 22900 }, { "epoch": 0.16934744685254724, "grad_norm": 0.12791630625724792, "learning_rate": 4.168669871794872e-05, "loss": 0.021, "step": 22910 }, { "epoch": 0.16942136542384909, "grad_norm": 0.1230262815952301, "learning_rate": 4.1682989078822417e-05, "loss": 0.0173, "step": 22920 }, { "epoch": 0.16949528399515093, "grad_norm": 0.14303380250930786, "learning_rate": 4.1679279439696106e-05, "loss": 0.0198, "step": 22930 }, { "epoch": 0.1695692025664528, "grad_norm": 0.1258019655942917, "learning_rate": 4.16755698005698e-05, "loss": 0.0214, "step": 22940 }, { "epoch": 0.16964312113775465, "grad_norm": 0.09205642342567444, "learning_rate": 4.16718601614435e-05, "loss": 0.0194, "step": 22950 }, { "epoch": 0.1697170397090565, "grad_norm": 0.08836735039949417, "learning_rate": 4.166815052231719e-05, "loss": 0.0197, "step": 22960 }, { "epoch": 0.16979095828035837, "grad_norm": 0.09797254949808121, "learning_rate": 4.166444088319088e-05, "loss": 0.0196, "step": 22970 }, { "epoch": 0.1698648768516602, "grad_norm": 0.0916217640042305, "learning_rate": 4.166073124406458e-05, "loss": 0.0193, "step": 22980 }, { "epoch": 0.16993879542296206, "grad_norm": 0.07919829338788986, "learning_rate": 4.1657021604938275e-05, "loss": 0.0196, "step": 22990 }, { "epoch": 0.17001271399426393, "grad_norm": 0.09741434454917908, "learning_rate": 4.165331196581197e-05, "loss": 0.0211, "step": 23000 }, { "epoch": 0.17008663256556578, "grad_norm": 0.12706300616264343, "learning_rate": 4.164960232668566e-05, "loss": 0.0207, "step": 23010 }, { "epoch": 0.17016055113686762, "grad_norm": 0.11344017088413239, "learning_rate": 4.1645892687559356e-05, "loss": 0.0191, "step": 23020 }, { "epoch": 0.17023446970816947, "grad_norm": 0.09292125701904297, "learning_rate": 4.1642183048433045e-05, "loss": 0.0205, "step": 23030 }, { "epoch": 0.17030838827947134, "grad_norm": 0.0878138393163681, "learning_rate": 4.163847340930674e-05, "loss": 0.0227, "step": 23040 }, { "epoch": 0.17038230685077319, "grad_norm": 0.09296700358390808, "learning_rate": 4.1634763770180444e-05, "loss": 0.0218, "step": 23050 }, { "epoch": 0.17045622542207503, "grad_norm": 0.09765233844518661, "learning_rate": 4.163105413105413e-05, "loss": 0.0187, "step": 23060 }, { "epoch": 0.1705301439933769, "grad_norm": 0.07625902444124222, "learning_rate": 4.162734449192783e-05, "loss": 0.0184, "step": 23070 }, { "epoch": 0.17060406256467875, "grad_norm": 0.13808022439479828, "learning_rate": 4.1623634852801525e-05, "loss": 0.0199, "step": 23080 }, { "epoch": 0.1706779811359806, "grad_norm": 0.1144128069281578, "learning_rate": 4.1619925213675214e-05, "loss": 0.0211, "step": 23090 }, { "epoch": 0.17075189970728247, "grad_norm": 0.075762078166008, "learning_rate": 4.161621557454891e-05, "loss": 0.0187, "step": 23100 }, { "epoch": 0.1708258182785843, "grad_norm": 0.08701431006193161, "learning_rate": 4.16125059354226e-05, "loss": 0.0223, "step": 23110 }, { "epoch": 0.17089973684988616, "grad_norm": 0.10601526498794556, "learning_rate": 4.1608796296296295e-05, "loss": 0.0208, "step": 23120 }, { "epoch": 0.17097365542118803, "grad_norm": 0.10111930966377258, "learning_rate": 4.160508665717e-05, "loss": 0.0189, "step": 23130 }, { "epoch": 0.17104757399248988, "grad_norm": 0.09323742240667343, "learning_rate": 4.160137701804369e-05, "loss": 0.0184, "step": 23140 }, { "epoch": 0.17112149256379172, "grad_norm": 0.10205500572919846, "learning_rate": 4.159766737891738e-05, "loss": 0.0194, "step": 23150 }, { "epoch": 0.1711954111350936, "grad_norm": 0.07837098836898804, "learning_rate": 4.159395773979107e-05, "loss": 0.0225, "step": 23160 }, { "epoch": 0.17126932970639544, "grad_norm": 0.08919687569141388, "learning_rate": 4.159024810066477e-05, "loss": 0.0194, "step": 23170 }, { "epoch": 0.17134324827769729, "grad_norm": 0.09402740001678467, "learning_rate": 4.1586538461538464e-05, "loss": 0.0198, "step": 23180 }, { "epoch": 0.17141716684899913, "grad_norm": 0.12361681461334229, "learning_rate": 4.1582828822412153e-05, "loss": 0.0179, "step": 23190 }, { "epoch": 0.171491085420301, "grad_norm": 0.08092927187681198, "learning_rate": 4.1579119183285856e-05, "loss": 0.0206, "step": 23200 }, { "epoch": 0.17156500399160285, "grad_norm": 0.1065574660897255, "learning_rate": 4.1575409544159545e-05, "loss": 0.0212, "step": 23210 }, { "epoch": 0.1716389225629047, "grad_norm": 0.10360395163297653, "learning_rate": 4.157169990503324e-05, "loss": 0.02, "step": 23220 }, { "epoch": 0.17171284113420657, "grad_norm": 0.11819800734519958, "learning_rate": 4.156799026590694e-05, "loss": 0.0194, "step": 23230 }, { "epoch": 0.1717867597055084, "grad_norm": 0.1112608015537262, "learning_rate": 4.1564280626780627e-05, "loss": 0.0188, "step": 23240 }, { "epoch": 0.17186067827681026, "grad_norm": 0.12329550087451935, "learning_rate": 4.156057098765432e-05, "loss": 0.0201, "step": 23250 }, { "epoch": 0.17193459684811213, "grad_norm": 0.11046748608350754, "learning_rate": 4.155686134852801e-05, "loss": 0.0194, "step": 23260 }, { "epoch": 0.17200851541941398, "grad_norm": 0.09428173303604126, "learning_rate": 4.155315170940171e-05, "loss": 0.0241, "step": 23270 }, { "epoch": 0.17208243399071582, "grad_norm": 0.09326600283384323, "learning_rate": 4.154944207027541e-05, "loss": 0.0193, "step": 23280 }, { "epoch": 0.1721563525620177, "grad_norm": 0.06932845711708069, "learning_rate": 4.15457324311491e-05, "loss": 0.0196, "step": 23290 }, { "epoch": 0.17223027113331954, "grad_norm": 0.08526023477315903, "learning_rate": 4.1542022792022796e-05, "loss": 0.0191, "step": 23300 }, { "epoch": 0.17230418970462139, "grad_norm": 0.07734812796115875, "learning_rate": 4.153831315289649e-05, "loss": 0.0191, "step": 23310 }, { "epoch": 0.17237810827592323, "grad_norm": 0.08788762986660004, "learning_rate": 4.153460351377018e-05, "loss": 0.0205, "step": 23320 }, { "epoch": 0.1724520268472251, "grad_norm": 0.12777380645275116, "learning_rate": 4.153089387464388e-05, "loss": 0.0171, "step": 23330 }, { "epoch": 0.17252594541852695, "grad_norm": 0.08061201870441437, "learning_rate": 4.1527184235517566e-05, "loss": 0.021, "step": 23340 }, { "epoch": 0.1725998639898288, "grad_norm": 0.08452122658491135, "learning_rate": 4.152347459639127e-05, "loss": 0.0183, "step": 23350 }, { "epoch": 0.17267378256113067, "grad_norm": 0.08944263309240341, "learning_rate": 4.1519764957264965e-05, "loss": 0.0191, "step": 23360 }, { "epoch": 0.1727477011324325, "grad_norm": 0.13580366969108582, "learning_rate": 4.1516055318138654e-05, "loss": 0.0216, "step": 23370 }, { "epoch": 0.17282161970373436, "grad_norm": 0.10675542056560516, "learning_rate": 4.151234567901235e-05, "loss": 0.0192, "step": 23380 }, { "epoch": 0.17289553827503623, "grad_norm": 0.09346351772546768, "learning_rate": 4.150863603988604e-05, "loss": 0.0201, "step": 23390 }, { "epoch": 0.17296945684633808, "grad_norm": 0.09439420700073242, "learning_rate": 4.1504926400759735e-05, "loss": 0.022, "step": 23400 }, { "epoch": 0.17304337541763992, "grad_norm": 0.06360670179128647, "learning_rate": 4.150121676163343e-05, "loss": 0.0207, "step": 23410 }, { "epoch": 0.1731172939889418, "grad_norm": 0.07455422729253769, "learning_rate": 4.149750712250712e-05, "loss": 0.019, "step": 23420 }, { "epoch": 0.17319121256024364, "grad_norm": 0.0845806896686554, "learning_rate": 4.149379748338082e-05, "loss": 0.0187, "step": 23430 }, { "epoch": 0.17326513113154549, "grad_norm": 0.10896727442741394, "learning_rate": 4.149008784425451e-05, "loss": 0.0171, "step": 23440 }, { "epoch": 0.17333904970284733, "grad_norm": 0.08428969979286194, "learning_rate": 4.148637820512821e-05, "loss": 0.0186, "step": 23450 }, { "epoch": 0.1734129682741492, "grad_norm": 0.08725108951330185, "learning_rate": 4.1482668566001904e-05, "loss": 0.0203, "step": 23460 }, { "epoch": 0.17348688684545105, "grad_norm": 0.07068879902362823, "learning_rate": 4.147895892687559e-05, "loss": 0.0203, "step": 23470 }, { "epoch": 0.1735608054167529, "grad_norm": 0.09164883196353912, "learning_rate": 4.147524928774929e-05, "loss": 0.0172, "step": 23480 }, { "epoch": 0.17363472398805477, "grad_norm": 0.12593930959701538, "learning_rate": 4.147153964862298e-05, "loss": 0.0213, "step": 23490 }, { "epoch": 0.1737086425593566, "grad_norm": 0.08440453559160233, "learning_rate": 4.1467830009496674e-05, "loss": 0.0194, "step": 23500 }, { "epoch": 0.17378256113065846, "grad_norm": 0.1044711023569107, "learning_rate": 4.146412037037038e-05, "loss": 0.02, "step": 23510 }, { "epoch": 0.17385647970196033, "grad_norm": 0.08570936322212219, "learning_rate": 4.1460410731244066e-05, "loss": 0.0207, "step": 23520 }, { "epoch": 0.17393039827326218, "grad_norm": 0.07355409860610962, "learning_rate": 4.145670109211776e-05, "loss": 0.0179, "step": 23530 }, { "epoch": 0.17400431684456402, "grad_norm": 0.11744547635316849, "learning_rate": 4.145299145299146e-05, "loss": 0.023, "step": 23540 }, { "epoch": 0.1740782354158659, "grad_norm": 0.10709847509860992, "learning_rate": 4.144928181386515e-05, "loss": 0.0206, "step": 23550 }, { "epoch": 0.17415215398716774, "grad_norm": 0.10861247777938843, "learning_rate": 4.144557217473884e-05, "loss": 0.0199, "step": 23560 }, { "epoch": 0.17422607255846959, "grad_norm": 0.09161026030778885, "learning_rate": 4.144186253561253e-05, "loss": 0.0193, "step": 23570 }, { "epoch": 0.17429999112977143, "grad_norm": 0.09316155314445496, "learning_rate": 4.1438152896486235e-05, "loss": 0.023, "step": 23580 }, { "epoch": 0.1743739097010733, "grad_norm": 0.07369499653577805, "learning_rate": 4.143444325735993e-05, "loss": 0.0194, "step": 23590 }, { "epoch": 0.17444782827237515, "grad_norm": 0.10297918319702148, "learning_rate": 4.143073361823362e-05, "loss": 0.021, "step": 23600 }, { "epoch": 0.174521746843677, "grad_norm": 0.07225877791643143, "learning_rate": 4.1427023979107316e-05, "loss": 0.0178, "step": 23610 }, { "epoch": 0.17459566541497887, "grad_norm": 0.08436037600040436, "learning_rate": 4.1423314339981006e-05, "loss": 0.0198, "step": 23620 }, { "epoch": 0.1746695839862807, "grad_norm": 0.08097023516893387, "learning_rate": 4.14196047008547e-05, "loss": 0.0172, "step": 23630 }, { "epoch": 0.17474350255758256, "grad_norm": 0.09056053310632706, "learning_rate": 4.14158950617284e-05, "loss": 0.0195, "step": 23640 }, { "epoch": 0.17481742112888443, "grad_norm": 0.11529424786567688, "learning_rate": 4.141218542260209e-05, "loss": 0.0207, "step": 23650 }, { "epoch": 0.17489133970018628, "grad_norm": 0.08660317212343216, "learning_rate": 4.140847578347579e-05, "loss": 0.02, "step": 23660 }, { "epoch": 0.17496525827148812, "grad_norm": 0.09512817114591599, "learning_rate": 4.140476614434948e-05, "loss": 0.0184, "step": 23670 }, { "epoch": 0.17503917684279, "grad_norm": 0.07830807566642761, "learning_rate": 4.1401056505223175e-05, "loss": 0.0202, "step": 23680 }, { "epoch": 0.17511309541409184, "grad_norm": 0.11256569623947144, "learning_rate": 4.139734686609687e-05, "loss": 0.0195, "step": 23690 }, { "epoch": 0.17518701398539369, "grad_norm": 0.09436741471290588, "learning_rate": 4.139363722697056e-05, "loss": 0.0195, "step": 23700 }, { "epoch": 0.17526093255669553, "grad_norm": 0.05403030291199684, "learning_rate": 4.1389927587844256e-05, "loss": 0.0202, "step": 23710 }, { "epoch": 0.1753348511279974, "grad_norm": 0.08679580688476562, "learning_rate": 4.1386217948717945e-05, "loss": 0.0204, "step": 23720 }, { "epoch": 0.17540876969929925, "grad_norm": 0.07748806476593018, "learning_rate": 4.138250830959165e-05, "loss": 0.0183, "step": 23730 }, { "epoch": 0.1754826882706011, "grad_norm": 0.10431193560361862, "learning_rate": 4.1378798670465344e-05, "loss": 0.0203, "step": 23740 }, { "epoch": 0.17555660684190297, "grad_norm": 0.09392105042934418, "learning_rate": 4.137508903133903e-05, "loss": 0.0193, "step": 23750 }, { "epoch": 0.1756305254132048, "grad_norm": 0.08645907044410706, "learning_rate": 4.137137939221273e-05, "loss": 0.0186, "step": 23760 }, { "epoch": 0.17570444398450666, "grad_norm": 0.0924372673034668, "learning_rate": 4.1367669753086425e-05, "loss": 0.0164, "step": 23770 }, { "epoch": 0.17577836255580853, "grad_norm": 0.057999420911073685, "learning_rate": 4.1363960113960114e-05, "loss": 0.0186, "step": 23780 }, { "epoch": 0.17585228112711038, "grad_norm": 0.08546840399503708, "learning_rate": 4.136025047483381e-05, "loss": 0.0181, "step": 23790 }, { "epoch": 0.17592619969841222, "grad_norm": 0.09696268290281296, "learning_rate": 4.13565408357075e-05, "loss": 0.0212, "step": 23800 }, { "epoch": 0.1760001182697141, "grad_norm": 0.09261220693588257, "learning_rate": 4.13528311965812e-05, "loss": 0.021, "step": 23810 }, { "epoch": 0.17607403684101594, "grad_norm": 0.08623462170362473, "learning_rate": 4.13491215574549e-05, "loss": 0.0177, "step": 23820 }, { "epoch": 0.17614795541231779, "grad_norm": 0.08633705228567123, "learning_rate": 4.134541191832859e-05, "loss": 0.0198, "step": 23830 }, { "epoch": 0.17622187398361963, "grad_norm": 0.10043275356292725, "learning_rate": 4.134170227920228e-05, "loss": 0.0208, "step": 23840 }, { "epoch": 0.1762957925549215, "grad_norm": 0.10127022117376328, "learning_rate": 4.133799264007597e-05, "loss": 0.0177, "step": 23850 }, { "epoch": 0.17636971112622335, "grad_norm": 0.07167801260948181, "learning_rate": 4.133428300094967e-05, "loss": 0.0202, "step": 23860 }, { "epoch": 0.1764436296975252, "grad_norm": 0.10874171555042267, "learning_rate": 4.1330573361823364e-05, "loss": 0.0179, "step": 23870 }, { "epoch": 0.17651754826882707, "grad_norm": 0.08898110687732697, "learning_rate": 4.132686372269706e-05, "loss": 0.019, "step": 23880 }, { "epoch": 0.1765914668401289, "grad_norm": 0.1021110787987709, "learning_rate": 4.1323154083570756e-05, "loss": 0.0211, "step": 23890 }, { "epoch": 0.17666538541143076, "grad_norm": 0.10505396127700806, "learning_rate": 4.1319444444444445e-05, "loss": 0.0193, "step": 23900 }, { "epoch": 0.17673930398273263, "grad_norm": 0.08702701330184937, "learning_rate": 4.131573480531814e-05, "loss": 0.0222, "step": 23910 }, { "epoch": 0.17681322255403448, "grad_norm": 0.09350302815437317, "learning_rate": 4.131202516619184e-05, "loss": 0.0213, "step": 23920 }, { "epoch": 0.17688714112533632, "grad_norm": 0.0829959511756897, "learning_rate": 4.1308315527065526e-05, "loss": 0.0214, "step": 23930 }, { "epoch": 0.1769610596966382, "grad_norm": 0.08366640657186508, "learning_rate": 4.130460588793922e-05, "loss": 0.0185, "step": 23940 }, { "epoch": 0.17703497826794004, "grad_norm": 0.071568563580513, "learning_rate": 4.130089624881291e-05, "loss": 0.017, "step": 23950 }, { "epoch": 0.17710889683924189, "grad_norm": 0.08128269761800766, "learning_rate": 4.1297186609686614e-05, "loss": 0.0169, "step": 23960 }, { "epoch": 0.17718281541054373, "grad_norm": 0.08183693885803223, "learning_rate": 4.129347697056031e-05, "loss": 0.021, "step": 23970 }, { "epoch": 0.1772567339818456, "grad_norm": 0.08033385127782822, "learning_rate": 4.1289767331434e-05, "loss": 0.019, "step": 23980 }, { "epoch": 0.17733065255314745, "grad_norm": 0.11473685503005981, "learning_rate": 4.1286057692307695e-05, "loss": 0.0192, "step": 23990 }, { "epoch": 0.1774045711244493, "grad_norm": 0.09031099826097488, "learning_rate": 4.128234805318139e-05, "loss": 0.0192, "step": 24000 }, { "epoch": 0.17747848969575117, "grad_norm": 0.07809562981128693, "learning_rate": 4.127863841405508e-05, "loss": 0.0198, "step": 24010 }, { "epoch": 0.177552408267053, "grad_norm": 0.08787760883569717, "learning_rate": 4.1274928774928776e-05, "loss": 0.0207, "step": 24020 }, { "epoch": 0.17762632683835486, "grad_norm": 0.08965402096509933, "learning_rate": 4.127121913580247e-05, "loss": 0.0235, "step": 24030 }, { "epoch": 0.17770024540965673, "grad_norm": 0.18066206574440002, "learning_rate": 4.126750949667617e-05, "loss": 0.0217, "step": 24040 }, { "epoch": 0.17777416398095858, "grad_norm": 0.12065722048282623, "learning_rate": 4.1263799857549864e-05, "loss": 0.019, "step": 24050 }, { "epoch": 0.17784808255226042, "grad_norm": 0.06909769028425217, "learning_rate": 4.1260090218423554e-05, "loss": 0.0209, "step": 24060 }, { "epoch": 0.1779220011235623, "grad_norm": 0.09605135023593903, "learning_rate": 4.125638057929725e-05, "loss": 0.02, "step": 24070 }, { "epoch": 0.17799591969486414, "grad_norm": 0.11883515864610672, "learning_rate": 4.125267094017094e-05, "loss": 0.0204, "step": 24080 }, { "epoch": 0.178069838266166, "grad_norm": 0.06468243896961212, "learning_rate": 4.1248961301044635e-05, "loss": 0.0199, "step": 24090 }, { "epoch": 0.17814375683746786, "grad_norm": 0.08295508474111557, "learning_rate": 4.124525166191833e-05, "loss": 0.0199, "step": 24100 }, { "epoch": 0.1782176754087697, "grad_norm": 0.06782054901123047, "learning_rate": 4.1241542022792027e-05, "loss": 0.0189, "step": 24110 }, { "epoch": 0.17829159398007155, "grad_norm": 0.09335844218730927, "learning_rate": 4.123783238366572e-05, "loss": 0.0213, "step": 24120 }, { "epoch": 0.1783655125513734, "grad_norm": 0.08745142072439194, "learning_rate": 4.123412274453941e-05, "loss": 0.0179, "step": 24130 }, { "epoch": 0.17843943112267527, "grad_norm": 0.09213876724243164, "learning_rate": 4.123041310541311e-05, "loss": 0.0185, "step": 24140 }, { "epoch": 0.1785133496939771, "grad_norm": 0.12334982305765152, "learning_rate": 4.1226703466286804e-05, "loss": 0.0236, "step": 24150 }, { "epoch": 0.17858726826527896, "grad_norm": 0.07247071713209152, "learning_rate": 4.122299382716049e-05, "loss": 0.0181, "step": 24160 }, { "epoch": 0.17866118683658083, "grad_norm": 0.07788656651973724, "learning_rate": 4.121928418803419e-05, "loss": 0.0203, "step": 24170 }, { "epoch": 0.17873510540788268, "grad_norm": 0.08412611484527588, "learning_rate": 4.1215574548907885e-05, "loss": 0.0218, "step": 24180 }, { "epoch": 0.17880902397918452, "grad_norm": 0.10411692410707474, "learning_rate": 4.121186490978158e-05, "loss": 0.0186, "step": 24190 }, { "epoch": 0.1788829425504864, "grad_norm": 0.09697825461626053, "learning_rate": 4.120815527065528e-05, "loss": 0.0191, "step": 24200 }, { "epoch": 0.17895686112178824, "grad_norm": 0.11236854642629623, "learning_rate": 4.1204445631528966e-05, "loss": 0.0213, "step": 24210 }, { "epoch": 0.1790307796930901, "grad_norm": 0.06983228772878647, "learning_rate": 4.120073599240266e-05, "loss": 0.0172, "step": 24220 }, { "epoch": 0.17910469826439196, "grad_norm": 0.10572890192270279, "learning_rate": 4.119702635327636e-05, "loss": 0.0206, "step": 24230 }, { "epoch": 0.1791786168356938, "grad_norm": 0.08477522432804108, "learning_rate": 4.119331671415005e-05, "loss": 0.0185, "step": 24240 }, { "epoch": 0.17925253540699565, "grad_norm": 0.08904995024204254, "learning_rate": 4.118960707502374e-05, "loss": 0.0215, "step": 24250 }, { "epoch": 0.1793264539782975, "grad_norm": 0.11577491462230682, "learning_rate": 4.118589743589744e-05, "loss": 0.0193, "step": 24260 }, { "epoch": 0.17940037254959937, "grad_norm": 0.1336565613746643, "learning_rate": 4.1182187796771135e-05, "loss": 0.0194, "step": 24270 }, { "epoch": 0.1794742911209012, "grad_norm": 0.09863122552633286, "learning_rate": 4.117847815764483e-05, "loss": 0.0192, "step": 24280 }, { "epoch": 0.17954820969220306, "grad_norm": 0.071408212184906, "learning_rate": 4.117476851851852e-05, "loss": 0.0188, "step": 24290 }, { "epoch": 0.17962212826350493, "grad_norm": 0.078492172062397, "learning_rate": 4.1171058879392216e-05, "loss": 0.0186, "step": 24300 }, { "epoch": 0.17969604683480678, "grad_norm": 0.08483748137950897, "learning_rate": 4.1167349240265905e-05, "loss": 0.0187, "step": 24310 }, { "epoch": 0.17976996540610862, "grad_norm": 0.08622529357671738, "learning_rate": 4.11636396011396e-05, "loss": 0.0201, "step": 24320 }, { "epoch": 0.1798438839774105, "grad_norm": 0.09336697310209274, "learning_rate": 4.11599299620133e-05, "loss": 0.0192, "step": 24330 }, { "epoch": 0.17991780254871234, "grad_norm": 0.11593308299779892, "learning_rate": 4.115622032288699e-05, "loss": 0.0198, "step": 24340 }, { "epoch": 0.1799917211200142, "grad_norm": 0.09057379513978958, "learning_rate": 4.115251068376069e-05, "loss": 0.0174, "step": 24350 }, { "epoch": 0.18006563969131606, "grad_norm": 0.0750114917755127, "learning_rate": 4.114880104463438e-05, "loss": 0.0195, "step": 24360 }, { "epoch": 0.1801395582626179, "grad_norm": 0.0803409218788147, "learning_rate": 4.1145091405508074e-05, "loss": 0.0195, "step": 24370 }, { "epoch": 0.18021347683391975, "grad_norm": 0.07491756230592728, "learning_rate": 4.114138176638177e-05, "loss": 0.0194, "step": 24380 }, { "epoch": 0.1802873954052216, "grad_norm": 0.07920035719871521, "learning_rate": 4.113767212725546e-05, "loss": 0.0183, "step": 24390 }, { "epoch": 0.18036131397652347, "grad_norm": 0.075667604804039, "learning_rate": 4.1133962488129155e-05, "loss": 0.0212, "step": 24400 }, { "epoch": 0.1804352325478253, "grad_norm": 0.0879991203546524, "learning_rate": 4.113025284900285e-05, "loss": 0.0217, "step": 24410 }, { "epoch": 0.18050915111912716, "grad_norm": 0.08139169216156006, "learning_rate": 4.112654320987655e-05, "loss": 0.0189, "step": 24420 }, { "epoch": 0.18058306969042903, "grad_norm": 0.08189604431390762, "learning_rate": 4.112283357075024e-05, "loss": 0.0223, "step": 24430 }, { "epoch": 0.18065698826173088, "grad_norm": 0.09923578053712845, "learning_rate": 4.111912393162393e-05, "loss": 0.0206, "step": 24440 }, { "epoch": 0.18073090683303272, "grad_norm": 0.10648280382156372, "learning_rate": 4.111541429249763e-05, "loss": 0.0196, "step": 24450 }, { "epoch": 0.1808048254043346, "grad_norm": 0.11174852401018143, "learning_rate": 4.1111704653371324e-05, "loss": 0.0212, "step": 24460 }, { "epoch": 0.18087874397563644, "grad_norm": 0.08722157776355743, "learning_rate": 4.1107995014245014e-05, "loss": 0.0194, "step": 24470 }, { "epoch": 0.1809526625469383, "grad_norm": 0.08150345087051392, "learning_rate": 4.110428537511871e-05, "loss": 0.0192, "step": 24480 }, { "epoch": 0.18102658111824016, "grad_norm": 0.09848983585834503, "learning_rate": 4.1100575735992406e-05, "loss": 0.0196, "step": 24490 }, { "epoch": 0.181100499689542, "grad_norm": 0.07330475002527237, "learning_rate": 4.10968660968661e-05, "loss": 0.0189, "step": 24500 }, { "epoch": 0.18117441826084385, "grad_norm": 0.08347219973802567, "learning_rate": 4.10931564577398e-05, "loss": 0.0175, "step": 24510 }, { "epoch": 0.1812483368321457, "grad_norm": 0.11764758080244064, "learning_rate": 4.108944681861349e-05, "loss": 0.024, "step": 24520 }, { "epoch": 0.18132225540344757, "grad_norm": 0.07574162632226944, "learning_rate": 4.108573717948718e-05, "loss": 0.0205, "step": 24530 }, { "epoch": 0.18139617397474941, "grad_norm": 0.09680452197790146, "learning_rate": 4.108202754036087e-05, "loss": 0.0243, "step": 24540 }, { "epoch": 0.18147009254605126, "grad_norm": 0.08307170122861862, "learning_rate": 4.107831790123457e-05, "loss": 0.0205, "step": 24550 }, { "epoch": 0.18154401111735313, "grad_norm": 0.09651833027601242, "learning_rate": 4.1074608262108264e-05, "loss": 0.0194, "step": 24560 }, { "epoch": 0.18161792968865498, "grad_norm": 0.09410324692726135, "learning_rate": 4.107089862298196e-05, "loss": 0.0205, "step": 24570 }, { "epoch": 0.18169184825995682, "grad_norm": 0.08339502662420273, "learning_rate": 4.1067188983855656e-05, "loss": 0.0201, "step": 24580 }, { "epoch": 0.1817657668312587, "grad_norm": 0.08488886058330536, "learning_rate": 4.1063479344729345e-05, "loss": 0.0205, "step": 24590 }, { "epoch": 0.18183968540256054, "grad_norm": 0.0752316489815712, "learning_rate": 4.105976970560304e-05, "loss": 0.0223, "step": 24600 }, { "epoch": 0.1819136039738624, "grad_norm": 0.08579552173614502, "learning_rate": 4.105606006647674e-05, "loss": 0.0198, "step": 24610 }, { "epoch": 0.18198752254516426, "grad_norm": 0.06785819679498672, "learning_rate": 4.1052350427350426e-05, "loss": 0.019, "step": 24620 }, { "epoch": 0.1820614411164661, "grad_norm": 0.09498974680900574, "learning_rate": 4.104864078822412e-05, "loss": 0.0202, "step": 24630 }, { "epoch": 0.18213535968776795, "grad_norm": 0.08310835808515549, "learning_rate": 4.104493114909782e-05, "loss": 0.0188, "step": 24640 }, { "epoch": 0.1822092782590698, "grad_norm": 0.07385044544935226, "learning_rate": 4.1041221509971514e-05, "loss": 0.018, "step": 24650 }, { "epoch": 0.18228319683037167, "grad_norm": 0.09191499650478363, "learning_rate": 4.103751187084521e-05, "loss": 0.0183, "step": 24660 }, { "epoch": 0.18235711540167351, "grad_norm": 0.09280231595039368, "learning_rate": 4.10338022317189e-05, "loss": 0.0178, "step": 24670 }, { "epoch": 0.18243103397297536, "grad_norm": 0.1001996323466301, "learning_rate": 4.1030092592592595e-05, "loss": 0.0179, "step": 24680 }, { "epoch": 0.18250495254427723, "grad_norm": 0.08631417155265808, "learning_rate": 4.102638295346629e-05, "loss": 0.0205, "step": 24690 }, { "epoch": 0.18257887111557908, "grad_norm": 0.07611658424139023, "learning_rate": 4.102267331433998e-05, "loss": 0.0189, "step": 24700 }, { "epoch": 0.18265278968688092, "grad_norm": 0.09822043776512146, "learning_rate": 4.1018963675213676e-05, "loss": 0.022, "step": 24710 }, { "epoch": 0.1827267082581828, "grad_norm": 0.08817804604768753, "learning_rate": 4.101525403608737e-05, "loss": 0.0192, "step": 24720 }, { "epoch": 0.18280062682948464, "grad_norm": 0.08359253406524658, "learning_rate": 4.101154439696107e-05, "loss": 0.0247, "step": 24730 }, { "epoch": 0.1828745454007865, "grad_norm": 0.11328428238630295, "learning_rate": 4.1007834757834764e-05, "loss": 0.0209, "step": 24740 }, { "epoch": 0.18294846397208836, "grad_norm": 0.09812841564416885, "learning_rate": 4.100412511870845e-05, "loss": 0.0181, "step": 24750 }, { "epoch": 0.1830223825433902, "grad_norm": 0.09421461820602417, "learning_rate": 4.100041547958215e-05, "loss": 0.0188, "step": 24760 }, { "epoch": 0.18309630111469205, "grad_norm": 0.10235374420881271, "learning_rate": 4.099670584045584e-05, "loss": 0.0176, "step": 24770 }, { "epoch": 0.1831702196859939, "grad_norm": 0.10455603897571564, "learning_rate": 4.0992996201329534e-05, "loss": 0.0211, "step": 24780 }, { "epoch": 0.18324413825729577, "grad_norm": 0.07605478167533875, "learning_rate": 4.098928656220323e-05, "loss": 0.0207, "step": 24790 }, { "epoch": 0.18331805682859761, "grad_norm": 0.11007492244243622, "learning_rate": 4.0985576923076926e-05, "loss": 0.0204, "step": 24800 }, { "epoch": 0.18339197539989946, "grad_norm": 0.08086639642715454, "learning_rate": 4.098186728395062e-05, "loss": 0.0194, "step": 24810 }, { "epoch": 0.18346589397120133, "grad_norm": 0.11971864104270935, "learning_rate": 4.097815764482431e-05, "loss": 0.0206, "step": 24820 }, { "epoch": 0.18353981254250318, "grad_norm": 0.09048326313495636, "learning_rate": 4.097444800569801e-05, "loss": 0.02, "step": 24830 }, { "epoch": 0.18361373111380502, "grad_norm": 0.08733789622783661, "learning_rate": 4.0970738366571703e-05, "loss": 0.0185, "step": 24840 }, { "epoch": 0.1836876496851069, "grad_norm": 0.07789760828018188, "learning_rate": 4.096702872744539e-05, "loss": 0.0186, "step": 24850 }, { "epoch": 0.18376156825640874, "grad_norm": 0.06854735314846039, "learning_rate": 4.096331908831909e-05, "loss": 0.0203, "step": 24860 }, { "epoch": 0.1838354868277106, "grad_norm": 0.09135521203279495, "learning_rate": 4.0959609449192785e-05, "loss": 0.0199, "step": 24870 }, { "epoch": 0.18390940539901246, "grad_norm": 0.08071056753396988, "learning_rate": 4.095589981006648e-05, "loss": 0.0191, "step": 24880 }, { "epoch": 0.1839833239703143, "grad_norm": 0.08886650949716568, "learning_rate": 4.0952190170940176e-05, "loss": 0.0198, "step": 24890 }, { "epoch": 0.18405724254161615, "grad_norm": 0.13103017210960388, "learning_rate": 4.0948480531813866e-05, "loss": 0.0199, "step": 24900 }, { "epoch": 0.184131161112918, "grad_norm": 0.0853632241487503, "learning_rate": 4.094477089268756e-05, "loss": 0.0177, "step": 24910 }, { "epoch": 0.18420507968421987, "grad_norm": 0.09077557921409607, "learning_rate": 4.094106125356126e-05, "loss": 0.0203, "step": 24920 }, { "epoch": 0.18427899825552171, "grad_norm": 0.0909472331404686, "learning_rate": 4.093735161443495e-05, "loss": 0.0205, "step": 24930 }, { "epoch": 0.18435291682682356, "grad_norm": 0.10778666287660599, "learning_rate": 4.093364197530864e-05, "loss": 0.0185, "step": 24940 }, { "epoch": 0.18442683539812543, "grad_norm": 0.08606761693954468, "learning_rate": 4.092993233618234e-05, "loss": 0.0193, "step": 24950 }, { "epoch": 0.18450075396942728, "grad_norm": 0.11441691964864731, "learning_rate": 4.0926222697056035e-05, "loss": 0.0209, "step": 24960 }, { "epoch": 0.18457467254072912, "grad_norm": 0.08625328540802002, "learning_rate": 4.092251305792973e-05, "loss": 0.0198, "step": 24970 }, { "epoch": 0.184648591112031, "grad_norm": 0.10141268372535706, "learning_rate": 4.091880341880342e-05, "loss": 0.0184, "step": 24980 }, { "epoch": 0.18472250968333284, "grad_norm": 0.09997362643480301, "learning_rate": 4.0915093779677116e-05, "loss": 0.0222, "step": 24990 }, { "epoch": 0.1847964282546347, "grad_norm": 0.1047859638929367, "learning_rate": 4.0911384140550805e-05, "loss": 0.0222, "step": 25000 }, { "epoch": 0.18487034682593656, "grad_norm": 0.09549182653427124, "learning_rate": 4.09076745014245e-05, "loss": 0.0193, "step": 25010 }, { "epoch": 0.1849442653972384, "grad_norm": 0.08501344919204712, "learning_rate": 4.09039648622982e-05, "loss": 0.0219, "step": 25020 }, { "epoch": 0.18501818396854025, "grad_norm": 0.0857548862695694, "learning_rate": 4.090025522317189e-05, "loss": 0.017, "step": 25030 }, { "epoch": 0.18509210253984212, "grad_norm": 0.07982197403907776, "learning_rate": 4.089654558404559e-05, "loss": 0.0202, "step": 25040 }, { "epoch": 0.18516602111114397, "grad_norm": 0.0914795845746994, "learning_rate": 4.089283594491928e-05, "loss": 0.0193, "step": 25050 }, { "epoch": 0.18523993968244581, "grad_norm": 0.09756156802177429, "learning_rate": 4.0889126305792974e-05, "loss": 0.0203, "step": 25060 }, { "epoch": 0.18531385825374766, "grad_norm": 0.0878814235329628, "learning_rate": 4.088541666666667e-05, "loss": 0.0188, "step": 25070 }, { "epoch": 0.18538777682504953, "grad_norm": 0.09018289297819138, "learning_rate": 4.088170702754036e-05, "loss": 0.0214, "step": 25080 }, { "epoch": 0.18546169539635138, "grad_norm": 0.09078127145767212, "learning_rate": 4.0877997388414055e-05, "loss": 0.0199, "step": 25090 }, { "epoch": 0.18553561396765322, "grad_norm": 0.10447991639375687, "learning_rate": 4.087428774928775e-05, "loss": 0.0199, "step": 25100 }, { "epoch": 0.1856095325389551, "grad_norm": 0.08599971979856491, "learning_rate": 4.087057811016145e-05, "loss": 0.0219, "step": 25110 }, { "epoch": 0.18568345111025694, "grad_norm": 0.08772403746843338, "learning_rate": 4.086686847103514e-05, "loss": 0.0189, "step": 25120 }, { "epoch": 0.1857573696815588, "grad_norm": 0.1319970190525055, "learning_rate": 4.086315883190883e-05, "loss": 0.0187, "step": 25130 }, { "epoch": 0.18583128825286066, "grad_norm": 0.06123780831694603, "learning_rate": 4.085944919278253e-05, "loss": 0.021, "step": 25140 }, { "epoch": 0.1859052068241625, "grad_norm": 0.07819724828004837, "learning_rate": 4.0855739553656224e-05, "loss": 0.0207, "step": 25150 }, { "epoch": 0.18597912539546435, "grad_norm": 0.12462358176708221, "learning_rate": 4.085202991452991e-05, "loss": 0.0214, "step": 25160 }, { "epoch": 0.18605304396676622, "grad_norm": 0.09657600522041321, "learning_rate": 4.084832027540361e-05, "loss": 0.0204, "step": 25170 }, { "epoch": 0.18612696253806807, "grad_norm": 0.10215092450380325, "learning_rate": 4.0844610636277305e-05, "loss": 0.0206, "step": 25180 }, { "epoch": 0.18620088110936991, "grad_norm": 0.07794997841119766, "learning_rate": 4.0840900997151e-05, "loss": 0.0187, "step": 25190 }, { "epoch": 0.18627479968067176, "grad_norm": 0.10502193123102188, "learning_rate": 4.08371913580247e-05, "loss": 0.0198, "step": 25200 }, { "epoch": 0.18634871825197363, "grad_norm": 0.08818890154361725, "learning_rate": 4.0833481718898386e-05, "loss": 0.0211, "step": 25210 }, { "epoch": 0.18642263682327548, "grad_norm": 0.08618687093257904, "learning_rate": 4.082977207977208e-05, "loss": 0.0184, "step": 25220 }, { "epoch": 0.18649655539457732, "grad_norm": 0.09551467001438141, "learning_rate": 4.082606244064577e-05, "loss": 0.0197, "step": 25230 }, { "epoch": 0.1865704739658792, "grad_norm": 0.09489475190639496, "learning_rate": 4.082235280151947e-05, "loss": 0.0209, "step": 25240 }, { "epoch": 0.18664439253718104, "grad_norm": 0.09283564239740372, "learning_rate": 4.0818643162393164e-05, "loss": 0.0182, "step": 25250 }, { "epoch": 0.1867183111084829, "grad_norm": 0.1071094423532486, "learning_rate": 4.081493352326686e-05, "loss": 0.0197, "step": 25260 }, { "epoch": 0.18679222967978476, "grad_norm": 0.08947714418172836, "learning_rate": 4.0811223884140555e-05, "loss": 0.0193, "step": 25270 }, { "epoch": 0.1868661482510866, "grad_norm": 0.14174768328666687, "learning_rate": 4.0807514245014245e-05, "loss": 0.0184, "step": 25280 }, { "epoch": 0.18694006682238845, "grad_norm": 0.07919485867023468, "learning_rate": 4.080380460588794e-05, "loss": 0.0203, "step": 25290 }, { "epoch": 0.18701398539369032, "grad_norm": 0.08494485914707184, "learning_rate": 4.0800094966761637e-05, "loss": 0.0222, "step": 25300 }, { "epoch": 0.18708790396499217, "grad_norm": 0.0889492928981781, "learning_rate": 4.0796385327635326e-05, "loss": 0.0212, "step": 25310 }, { "epoch": 0.18716182253629401, "grad_norm": 0.0972001850605011, "learning_rate": 4.079267568850902e-05, "loss": 0.0201, "step": 25320 }, { "epoch": 0.18723574110759586, "grad_norm": 0.1136302798986435, "learning_rate": 4.078896604938272e-05, "loss": 0.0206, "step": 25330 }, { "epoch": 0.18730965967889773, "grad_norm": 0.10994186252355576, "learning_rate": 4.0785256410256414e-05, "loss": 0.0222, "step": 25340 }, { "epoch": 0.18738357825019958, "grad_norm": 0.0715063214302063, "learning_rate": 4.078154677113011e-05, "loss": 0.0182, "step": 25350 }, { "epoch": 0.18745749682150142, "grad_norm": 0.07392342388629913, "learning_rate": 4.07778371320038e-05, "loss": 0.0202, "step": 25360 }, { "epoch": 0.1875314153928033, "grad_norm": 0.07772233337163925, "learning_rate": 4.0774127492877495e-05, "loss": 0.0194, "step": 25370 }, { "epoch": 0.18760533396410514, "grad_norm": 0.10917063802480698, "learning_rate": 4.077041785375119e-05, "loss": 0.0194, "step": 25380 }, { "epoch": 0.187679252535407, "grad_norm": 0.0789274200797081, "learning_rate": 4.076670821462488e-05, "loss": 0.022, "step": 25390 }, { "epoch": 0.18775317110670886, "grad_norm": 0.06959021091461182, "learning_rate": 4.0762998575498576e-05, "loss": 0.0192, "step": 25400 }, { "epoch": 0.1878270896780107, "grad_norm": 0.05860432609915733, "learning_rate": 4.075928893637227e-05, "loss": 0.0186, "step": 25410 }, { "epoch": 0.18790100824931255, "grad_norm": 0.08827078342437744, "learning_rate": 4.075557929724597e-05, "loss": 0.0192, "step": 25420 }, { "epoch": 0.18797492682061442, "grad_norm": 0.10155238956212997, "learning_rate": 4.0751869658119664e-05, "loss": 0.0208, "step": 25430 }, { "epoch": 0.18804884539191627, "grad_norm": 0.07168347388505936, "learning_rate": 4.074816001899335e-05, "loss": 0.0183, "step": 25440 }, { "epoch": 0.18812276396321811, "grad_norm": 0.08718912303447723, "learning_rate": 4.074445037986705e-05, "loss": 0.0223, "step": 25450 }, { "epoch": 0.18819668253451996, "grad_norm": 0.08500777930021286, "learning_rate": 4.074074074074074e-05, "loss": 0.0181, "step": 25460 }, { "epoch": 0.18827060110582183, "grad_norm": 0.08464238792657852, "learning_rate": 4.0737031101614434e-05, "loss": 0.0213, "step": 25470 }, { "epoch": 0.18834451967712368, "grad_norm": 0.1232479065656662, "learning_rate": 4.073332146248813e-05, "loss": 0.0229, "step": 25480 }, { "epoch": 0.18841843824842552, "grad_norm": 0.10290573537349701, "learning_rate": 4.0729611823361826e-05, "loss": 0.0228, "step": 25490 }, { "epoch": 0.1884923568197274, "grad_norm": 0.12121661007404327, "learning_rate": 4.072590218423552e-05, "loss": 0.0221, "step": 25500 }, { "epoch": 0.18856627539102924, "grad_norm": 0.08239974826574326, "learning_rate": 4.072219254510921e-05, "loss": 0.0184, "step": 25510 }, { "epoch": 0.1886401939623311, "grad_norm": 0.09272027015686035, "learning_rate": 4.071848290598291e-05, "loss": 0.0201, "step": 25520 }, { "epoch": 0.18871411253363296, "grad_norm": 0.13893020153045654, "learning_rate": 4.07147732668566e-05, "loss": 0.02, "step": 25530 }, { "epoch": 0.1887880311049348, "grad_norm": 0.08534568548202515, "learning_rate": 4.071106362773029e-05, "loss": 0.0176, "step": 25540 }, { "epoch": 0.18886194967623665, "grad_norm": 0.0794425904750824, "learning_rate": 4.070735398860399e-05, "loss": 0.0187, "step": 25550 }, { "epoch": 0.18893586824753852, "grad_norm": 0.05672596022486687, "learning_rate": 4.0703644349477684e-05, "loss": 0.0186, "step": 25560 }, { "epoch": 0.18900978681884037, "grad_norm": 0.058385878801345825, "learning_rate": 4.069993471035138e-05, "loss": 0.0173, "step": 25570 }, { "epoch": 0.18908370539014221, "grad_norm": 0.11499615013599396, "learning_rate": 4.0696225071225076e-05, "loss": 0.0227, "step": 25580 }, { "epoch": 0.18915762396144406, "grad_norm": 0.11212538182735443, "learning_rate": 4.0692515432098765e-05, "loss": 0.0192, "step": 25590 }, { "epoch": 0.18923154253274593, "grad_norm": 0.06475656479597092, "learning_rate": 4.068880579297246e-05, "loss": 0.0184, "step": 25600 }, { "epoch": 0.18930546110404778, "grad_norm": 0.07098093628883362, "learning_rate": 4.068509615384616e-05, "loss": 0.0189, "step": 25610 }, { "epoch": 0.18937937967534962, "grad_norm": 0.09972020238637924, "learning_rate": 4.0681386514719847e-05, "loss": 0.0198, "step": 25620 }, { "epoch": 0.1894532982466515, "grad_norm": 0.08523591607809067, "learning_rate": 4.067767687559354e-05, "loss": 0.0208, "step": 25630 }, { "epoch": 0.18952721681795334, "grad_norm": 0.08826702833175659, "learning_rate": 4.067396723646724e-05, "loss": 0.0172, "step": 25640 }, { "epoch": 0.1896011353892552, "grad_norm": 0.10502415150403976, "learning_rate": 4.0670257597340934e-05, "loss": 0.0235, "step": 25650 }, { "epoch": 0.18967505396055706, "grad_norm": 0.07767823338508606, "learning_rate": 4.066654795821463e-05, "loss": 0.0203, "step": 25660 }, { "epoch": 0.1897489725318589, "grad_norm": 0.06232968717813492, "learning_rate": 4.066283831908832e-05, "loss": 0.0184, "step": 25670 }, { "epoch": 0.18982289110316075, "grad_norm": 0.0691458061337471, "learning_rate": 4.0659128679962016e-05, "loss": 0.02, "step": 25680 }, { "epoch": 0.18989680967446262, "grad_norm": 0.0777791365981102, "learning_rate": 4.0655419040835705e-05, "loss": 0.0186, "step": 25690 }, { "epoch": 0.18997072824576447, "grad_norm": 0.09985006600618362, "learning_rate": 4.06517094017094e-05, "loss": 0.0213, "step": 25700 }, { "epoch": 0.19004464681706632, "grad_norm": 0.06811359524726868, "learning_rate": 4.06479997625831e-05, "loss": 0.0188, "step": 25710 }, { "epoch": 0.19011856538836816, "grad_norm": 0.07796566933393478, "learning_rate": 4.064429012345679e-05, "loss": 0.0207, "step": 25720 }, { "epoch": 0.19019248395967003, "grad_norm": 0.08382702618837357, "learning_rate": 4.064058048433049e-05, "loss": 0.0163, "step": 25730 }, { "epoch": 0.19026640253097188, "grad_norm": 0.07249416410923004, "learning_rate": 4.063687084520418e-05, "loss": 0.0184, "step": 25740 }, { "epoch": 0.19034032110227372, "grad_norm": 0.08133135735988617, "learning_rate": 4.0633161206077874e-05, "loss": 0.0189, "step": 25750 }, { "epoch": 0.1904142396735756, "grad_norm": 0.07951100915670395, "learning_rate": 4.062945156695157e-05, "loss": 0.02, "step": 25760 }, { "epoch": 0.19048815824487744, "grad_norm": 0.08528363704681396, "learning_rate": 4.062574192782526e-05, "loss": 0.0177, "step": 25770 }, { "epoch": 0.1905620768161793, "grad_norm": 0.08923252671957016, "learning_rate": 4.0622032288698955e-05, "loss": 0.0186, "step": 25780 }, { "epoch": 0.19063599538748116, "grad_norm": 0.1294834166765213, "learning_rate": 4.061832264957265e-05, "loss": 0.0193, "step": 25790 }, { "epoch": 0.190709913958783, "grad_norm": 0.08585592359304428, "learning_rate": 4.061461301044635e-05, "loss": 0.0201, "step": 25800 }, { "epoch": 0.19078383253008485, "grad_norm": 0.07966049760580063, "learning_rate": 4.061090337132004e-05, "loss": 0.0192, "step": 25810 }, { "epoch": 0.19085775110138672, "grad_norm": 0.11067353934049606, "learning_rate": 4.060719373219373e-05, "loss": 0.0198, "step": 25820 }, { "epoch": 0.19093166967268857, "grad_norm": 0.08094379305839539, "learning_rate": 4.060348409306743e-05, "loss": 0.0189, "step": 25830 }, { "epoch": 0.19100558824399042, "grad_norm": 0.08187496662139893, "learning_rate": 4.0599774453941124e-05, "loss": 0.0181, "step": 25840 }, { "epoch": 0.19107950681529226, "grad_norm": 0.10788208246231079, "learning_rate": 4.059606481481481e-05, "loss": 0.0211, "step": 25850 }, { "epoch": 0.19115342538659413, "grad_norm": 0.05920582264661789, "learning_rate": 4.059235517568851e-05, "loss": 0.0198, "step": 25860 }, { "epoch": 0.19122734395789598, "grad_norm": 0.08051064610481262, "learning_rate": 4.0588645536562205e-05, "loss": 0.0197, "step": 25870 }, { "epoch": 0.19130126252919782, "grad_norm": 0.07535728067159653, "learning_rate": 4.05849358974359e-05, "loss": 0.0184, "step": 25880 }, { "epoch": 0.1913751811004997, "grad_norm": 0.09129388630390167, "learning_rate": 4.05812262583096e-05, "loss": 0.0171, "step": 25890 }, { "epoch": 0.19144909967180154, "grad_norm": 0.0707845389842987, "learning_rate": 4.0577516619183286e-05, "loss": 0.0212, "step": 25900 }, { "epoch": 0.1915230182431034, "grad_norm": 0.10849178582429886, "learning_rate": 4.057380698005698e-05, "loss": 0.0205, "step": 25910 }, { "epoch": 0.19159693681440526, "grad_norm": 0.07658477127552032, "learning_rate": 4.057009734093067e-05, "loss": 0.0188, "step": 25920 }, { "epoch": 0.1916708553857071, "grad_norm": 0.08434335142374039, "learning_rate": 4.056638770180437e-05, "loss": 0.0201, "step": 25930 }, { "epoch": 0.19174477395700895, "grad_norm": 0.07062356173992157, "learning_rate": 4.056267806267807e-05, "loss": 0.0196, "step": 25940 }, { "epoch": 0.19181869252831082, "grad_norm": 0.12351708859205246, "learning_rate": 4.055896842355176e-05, "loss": 0.0204, "step": 25950 }, { "epoch": 0.19189261109961267, "grad_norm": 0.08979147672653198, "learning_rate": 4.0555258784425455e-05, "loss": 0.0175, "step": 25960 }, { "epoch": 0.19196652967091452, "grad_norm": 0.10261064767837524, "learning_rate": 4.0551549145299144e-05, "loss": 0.0209, "step": 25970 }, { "epoch": 0.1920404482422164, "grad_norm": 0.09259028732776642, "learning_rate": 4.054783950617284e-05, "loss": 0.0203, "step": 25980 }, { "epoch": 0.19211436681351823, "grad_norm": 0.08469455689191818, "learning_rate": 4.0544129867046536e-05, "loss": 0.0197, "step": 25990 }, { "epoch": 0.19218828538482008, "grad_norm": 0.07711025327444077, "learning_rate": 4.0540420227920226e-05, "loss": 0.0178, "step": 26000 }, { "epoch": 0.19226220395612192, "grad_norm": 0.07707731425762177, "learning_rate": 4.053671058879392e-05, "loss": 0.0204, "step": 26010 }, { "epoch": 0.1923361225274238, "grad_norm": 0.08284084498882294, "learning_rate": 4.053300094966762e-05, "loss": 0.0196, "step": 26020 }, { "epoch": 0.19241004109872564, "grad_norm": 0.10776533931493759, "learning_rate": 4.0529291310541313e-05, "loss": 0.0195, "step": 26030 }, { "epoch": 0.1924839596700275, "grad_norm": 0.08582749962806702, "learning_rate": 4.052558167141501e-05, "loss": 0.019, "step": 26040 }, { "epoch": 0.19255787824132936, "grad_norm": 0.0954875648021698, "learning_rate": 4.05218720322887e-05, "loss": 0.0198, "step": 26050 }, { "epoch": 0.1926317968126312, "grad_norm": 0.10585756599903107, "learning_rate": 4.0518162393162395e-05, "loss": 0.0183, "step": 26060 }, { "epoch": 0.19270571538393305, "grad_norm": 0.15620584785938263, "learning_rate": 4.051445275403609e-05, "loss": 0.0203, "step": 26070 }, { "epoch": 0.19277963395523492, "grad_norm": 0.07597756385803223, "learning_rate": 4.051074311490978e-05, "loss": 0.0202, "step": 26080 }, { "epoch": 0.19285355252653677, "grad_norm": 0.10610008984804153, "learning_rate": 4.050703347578348e-05, "loss": 0.0208, "step": 26090 }, { "epoch": 0.19292747109783862, "grad_norm": 0.08419201523065567, "learning_rate": 4.050332383665717e-05, "loss": 0.0197, "step": 26100 }, { "epoch": 0.1930013896691405, "grad_norm": 0.10455375909805298, "learning_rate": 4.049961419753087e-05, "loss": 0.0208, "step": 26110 }, { "epoch": 0.19307530824044233, "grad_norm": 0.09646882116794586, "learning_rate": 4.0495904558404564e-05, "loss": 0.0216, "step": 26120 }, { "epoch": 0.19314922681174418, "grad_norm": 0.09530425816774368, "learning_rate": 4.049219491927825e-05, "loss": 0.0195, "step": 26130 }, { "epoch": 0.19322314538304602, "grad_norm": 0.08805854618549347, "learning_rate": 4.048848528015195e-05, "loss": 0.0192, "step": 26140 }, { "epoch": 0.1932970639543479, "grad_norm": 0.06926032900810242, "learning_rate": 4.048477564102564e-05, "loss": 0.02, "step": 26150 }, { "epoch": 0.19337098252564974, "grad_norm": 0.09659983217716217, "learning_rate": 4.0481066001899334e-05, "loss": 0.0202, "step": 26160 }, { "epoch": 0.1934449010969516, "grad_norm": 0.07807913422584534, "learning_rate": 4.047735636277304e-05, "loss": 0.0224, "step": 26170 }, { "epoch": 0.19351881966825346, "grad_norm": 0.07716178148984909, "learning_rate": 4.0473646723646726e-05, "loss": 0.0192, "step": 26180 }, { "epoch": 0.1935927382395553, "grad_norm": 0.12032181769609451, "learning_rate": 4.046993708452042e-05, "loss": 0.0217, "step": 26190 }, { "epoch": 0.19366665681085715, "grad_norm": 0.09849361330270767, "learning_rate": 4.046622744539411e-05, "loss": 0.0202, "step": 26200 }, { "epoch": 0.19374057538215902, "grad_norm": 0.07785578072071075, "learning_rate": 4.046251780626781e-05, "loss": 0.017, "step": 26210 }, { "epoch": 0.19381449395346087, "grad_norm": 0.09973660856485367, "learning_rate": 4.04588081671415e-05, "loss": 0.0199, "step": 26220 }, { "epoch": 0.19388841252476272, "grad_norm": 0.10502509772777557, "learning_rate": 4.045509852801519e-05, "loss": 0.0194, "step": 26230 }, { "epoch": 0.1939623310960646, "grad_norm": 0.08962590247392654, "learning_rate": 4.045138888888889e-05, "loss": 0.0204, "step": 26240 }, { "epoch": 0.19403624966736643, "grad_norm": 0.0918722152709961, "learning_rate": 4.0447679249762584e-05, "loss": 0.0215, "step": 26250 }, { "epoch": 0.19411016823866828, "grad_norm": 0.07433968037366867, "learning_rate": 4.044396961063628e-05, "loss": 0.0182, "step": 26260 }, { "epoch": 0.19418408680997012, "grad_norm": 0.12824876606464386, "learning_rate": 4.0440259971509976e-05, "loss": 0.0191, "step": 26270 }, { "epoch": 0.194258005381272, "grad_norm": 0.0706171989440918, "learning_rate": 4.0436550332383665e-05, "loss": 0.0177, "step": 26280 }, { "epoch": 0.19433192395257384, "grad_norm": 0.08861715346574783, "learning_rate": 4.043284069325736e-05, "loss": 0.018, "step": 26290 }, { "epoch": 0.1944058425238757, "grad_norm": 0.09288990497589111, "learning_rate": 4.042913105413106e-05, "loss": 0.0201, "step": 26300 }, { "epoch": 0.19447976109517756, "grad_norm": 0.09657161682844162, "learning_rate": 4.0425421415004746e-05, "loss": 0.0184, "step": 26310 }, { "epoch": 0.1945536796664794, "grad_norm": 0.12205624580383301, "learning_rate": 4.042171177587845e-05, "loss": 0.0209, "step": 26320 }, { "epoch": 0.19462759823778125, "grad_norm": 0.07949160784482956, "learning_rate": 4.041800213675214e-05, "loss": 0.0172, "step": 26330 }, { "epoch": 0.19470151680908312, "grad_norm": 0.0787765309214592, "learning_rate": 4.0414292497625834e-05, "loss": 0.0204, "step": 26340 }, { "epoch": 0.19477543538038497, "grad_norm": 0.12772636115550995, "learning_rate": 4.041058285849953e-05, "loss": 0.0214, "step": 26350 }, { "epoch": 0.19484935395168682, "grad_norm": 0.09125122427940369, "learning_rate": 4.040687321937322e-05, "loss": 0.019, "step": 26360 }, { "epoch": 0.1949232725229887, "grad_norm": 0.10375119000673294, "learning_rate": 4.0403163580246915e-05, "loss": 0.02, "step": 26370 }, { "epoch": 0.19499719109429053, "grad_norm": 0.07330422848463058, "learning_rate": 4.0399453941120605e-05, "loss": 0.0192, "step": 26380 }, { "epoch": 0.19507110966559238, "grad_norm": 0.10417810082435608, "learning_rate": 4.03957443019943e-05, "loss": 0.0201, "step": 26390 }, { "epoch": 0.19514502823689422, "grad_norm": 0.0886123776435852, "learning_rate": 4.0392034662868e-05, "loss": 0.0184, "step": 26400 }, { "epoch": 0.1952189468081961, "grad_norm": 0.07089180499315262, "learning_rate": 4.038832502374169e-05, "loss": 0.0165, "step": 26410 }, { "epoch": 0.19529286537949794, "grad_norm": 0.09441141784191132, "learning_rate": 4.038461538461539e-05, "loss": 0.0198, "step": 26420 }, { "epoch": 0.1953667839507998, "grad_norm": 0.08422481268644333, "learning_rate": 4.038090574548908e-05, "loss": 0.0177, "step": 26430 }, { "epoch": 0.19544070252210166, "grad_norm": 0.10501968115568161, "learning_rate": 4.0377196106362774e-05, "loss": 0.0195, "step": 26440 }, { "epoch": 0.1955146210934035, "grad_norm": 0.0918261930346489, "learning_rate": 4.037348646723647e-05, "loss": 0.0205, "step": 26450 }, { "epoch": 0.19558853966470535, "grad_norm": 0.09516014903783798, "learning_rate": 4.036977682811016e-05, "loss": 0.0204, "step": 26460 }, { "epoch": 0.19566245823600723, "grad_norm": 0.10102162510156631, "learning_rate": 4.036606718898386e-05, "loss": 0.0219, "step": 26470 }, { "epoch": 0.19573637680730907, "grad_norm": 0.09033115208148956, "learning_rate": 4.036235754985755e-05, "loss": 0.0217, "step": 26480 }, { "epoch": 0.19581029537861092, "grad_norm": 0.10341212898492813, "learning_rate": 4.0358647910731247e-05, "loss": 0.0176, "step": 26490 }, { "epoch": 0.1958842139499128, "grad_norm": 0.09452036023139954, "learning_rate": 4.035493827160494e-05, "loss": 0.0206, "step": 26500 }, { "epoch": 0.19595813252121463, "grad_norm": 0.06660100072622299, "learning_rate": 4.035122863247863e-05, "loss": 0.0165, "step": 26510 }, { "epoch": 0.19603205109251648, "grad_norm": 0.0918986052274704, "learning_rate": 4.034751899335233e-05, "loss": 0.0213, "step": 26520 }, { "epoch": 0.19610596966381832, "grad_norm": 0.09241180866956711, "learning_rate": 4.0343809354226024e-05, "loss": 0.0167, "step": 26530 }, { "epoch": 0.1961798882351202, "grad_norm": 0.10144799202680588, "learning_rate": 4.034009971509971e-05, "loss": 0.0204, "step": 26540 }, { "epoch": 0.19625380680642204, "grad_norm": 0.10204997658729553, "learning_rate": 4.0336390075973416e-05, "loss": 0.0201, "step": 26550 }, { "epoch": 0.1963277253777239, "grad_norm": 0.08747974038124084, "learning_rate": 4.0332680436847105e-05, "loss": 0.0202, "step": 26560 }, { "epoch": 0.19640164394902576, "grad_norm": 0.0816131979227066, "learning_rate": 4.03289707977208e-05, "loss": 0.0188, "step": 26570 }, { "epoch": 0.1964755625203276, "grad_norm": 0.07453221082687378, "learning_rate": 4.03252611585945e-05, "loss": 0.0185, "step": 26580 }, { "epoch": 0.19654948109162945, "grad_norm": 0.1012062057852745, "learning_rate": 4.0321551519468186e-05, "loss": 0.0197, "step": 26590 }, { "epoch": 0.19662339966293133, "grad_norm": 0.07416244596242905, "learning_rate": 4.031784188034188e-05, "loss": 0.0168, "step": 26600 }, { "epoch": 0.19669731823423317, "grad_norm": 0.06143144145607948, "learning_rate": 4.031413224121557e-05, "loss": 0.0188, "step": 26610 }, { "epoch": 0.19677123680553502, "grad_norm": 0.13770316541194916, "learning_rate": 4.0310422602089274e-05, "loss": 0.0215, "step": 26620 }, { "epoch": 0.1968451553768369, "grad_norm": 0.11620379984378815, "learning_rate": 4.030671296296297e-05, "loss": 0.0192, "step": 26630 }, { "epoch": 0.19691907394813873, "grad_norm": 0.15548306703567505, "learning_rate": 4.030300332383666e-05, "loss": 0.0207, "step": 26640 }, { "epoch": 0.19699299251944058, "grad_norm": 0.08808482438325882, "learning_rate": 4.0299293684710355e-05, "loss": 0.0182, "step": 26650 }, { "epoch": 0.19706691109074242, "grad_norm": 0.08999871462583542, "learning_rate": 4.0295584045584044e-05, "loss": 0.0179, "step": 26660 }, { "epoch": 0.1971408296620443, "grad_norm": 0.08707962930202484, "learning_rate": 4.029187440645774e-05, "loss": 0.0179, "step": 26670 }, { "epoch": 0.19721474823334614, "grad_norm": 0.08106222748756409, "learning_rate": 4.0288164767331436e-05, "loss": 0.0187, "step": 26680 }, { "epoch": 0.197288666804648, "grad_norm": 0.09482986479997635, "learning_rate": 4.0284455128205125e-05, "loss": 0.0209, "step": 26690 }, { "epoch": 0.19736258537594986, "grad_norm": 0.10451532155275345, "learning_rate": 4.028074548907883e-05, "loss": 0.0213, "step": 26700 }, { "epoch": 0.1974365039472517, "grad_norm": 0.0810663029551506, "learning_rate": 4.027703584995252e-05, "loss": 0.0186, "step": 26710 }, { "epoch": 0.19751042251855355, "grad_norm": 0.046732064336538315, "learning_rate": 4.027332621082621e-05, "loss": 0.0181, "step": 26720 }, { "epoch": 0.19758434108985543, "grad_norm": 0.11813398450613022, "learning_rate": 4.026961657169991e-05, "loss": 0.0219, "step": 26730 }, { "epoch": 0.19765825966115727, "grad_norm": 0.12210382521152496, "learning_rate": 4.02659069325736e-05, "loss": 0.0215, "step": 26740 }, { "epoch": 0.19773217823245912, "grad_norm": 0.0800352618098259, "learning_rate": 4.0262197293447294e-05, "loss": 0.0187, "step": 26750 }, { "epoch": 0.197806096803761, "grad_norm": 0.09820833802223206, "learning_rate": 4.025848765432099e-05, "loss": 0.0177, "step": 26760 }, { "epoch": 0.19788001537506283, "grad_norm": 0.09732136875391006, "learning_rate": 4.0254778015194686e-05, "loss": 0.0202, "step": 26770 }, { "epoch": 0.19795393394636468, "grad_norm": 0.09171831607818604, "learning_rate": 4.025106837606838e-05, "loss": 0.019, "step": 26780 }, { "epoch": 0.19802785251766652, "grad_norm": 0.1001991331577301, "learning_rate": 4.024735873694207e-05, "loss": 0.0202, "step": 26790 }, { "epoch": 0.1981017710889684, "grad_norm": 0.08683771640062332, "learning_rate": 4.024364909781577e-05, "loss": 0.0211, "step": 26800 }, { "epoch": 0.19817568966027024, "grad_norm": 0.11620105803012848, "learning_rate": 4.023993945868946e-05, "loss": 0.0212, "step": 26810 }, { "epoch": 0.1982496082315721, "grad_norm": 0.0925314873456955, "learning_rate": 4.023622981956315e-05, "loss": 0.0177, "step": 26820 }, { "epoch": 0.19832352680287396, "grad_norm": 0.08605986833572388, "learning_rate": 4.023252018043685e-05, "loss": 0.0205, "step": 26830 }, { "epoch": 0.1983974453741758, "grad_norm": 0.10294154286384583, "learning_rate": 4.022881054131054e-05, "loss": 0.0202, "step": 26840 }, { "epoch": 0.19847136394547765, "grad_norm": 0.08033913373947144, "learning_rate": 4.022510090218424e-05, "loss": 0.0187, "step": 26850 }, { "epoch": 0.19854528251677953, "grad_norm": 0.07643717527389526, "learning_rate": 4.0221391263057936e-05, "loss": 0.0181, "step": 26860 }, { "epoch": 0.19861920108808137, "grad_norm": 0.1059470847249031, "learning_rate": 4.0217681623931626e-05, "loss": 0.0196, "step": 26870 }, { "epoch": 0.19869311965938322, "grad_norm": 0.08550570160150528, "learning_rate": 4.021397198480532e-05, "loss": 0.0182, "step": 26880 }, { "epoch": 0.1987670382306851, "grad_norm": 0.07760634273290634, "learning_rate": 4.021026234567901e-05, "loss": 0.0194, "step": 26890 }, { "epoch": 0.19884095680198693, "grad_norm": 0.07543564587831497, "learning_rate": 4.020655270655271e-05, "loss": 0.0207, "step": 26900 }, { "epoch": 0.19891487537328878, "grad_norm": 0.08121582120656967, "learning_rate": 4.02028430674264e-05, "loss": 0.0188, "step": 26910 }, { "epoch": 0.19898879394459063, "grad_norm": 0.08908379077911377, "learning_rate": 4.01991334283001e-05, "loss": 0.0197, "step": 26920 }, { "epoch": 0.1990627125158925, "grad_norm": 0.11445832997560501, "learning_rate": 4.0195423789173795e-05, "loss": 0.0222, "step": 26930 }, { "epoch": 0.19913663108719434, "grad_norm": 0.061530083417892456, "learning_rate": 4.019171415004749e-05, "loss": 0.02, "step": 26940 }, { "epoch": 0.1992105496584962, "grad_norm": 0.09355339407920837, "learning_rate": 4.018800451092118e-05, "loss": 0.0193, "step": 26950 }, { "epoch": 0.19928446822979806, "grad_norm": 0.11692371964454651, "learning_rate": 4.0184294871794876e-05, "loss": 0.0222, "step": 26960 }, { "epoch": 0.1993583868010999, "grad_norm": 0.0950143113732338, "learning_rate": 4.0180585232668565e-05, "loss": 0.0209, "step": 26970 }, { "epoch": 0.19943230537240175, "grad_norm": 0.08873429894447327, "learning_rate": 4.017687559354226e-05, "loss": 0.0187, "step": 26980 }, { "epoch": 0.19950622394370363, "grad_norm": 0.08066698908805847, "learning_rate": 4.017316595441596e-05, "loss": 0.0196, "step": 26990 }, { "epoch": 0.19958014251500547, "grad_norm": 0.10435789823532104, "learning_rate": 4.016945631528965e-05, "loss": 0.021, "step": 27000 }, { "epoch": 0.19965406108630732, "grad_norm": 0.11480347812175751, "learning_rate": 4.016574667616335e-05, "loss": 0.0173, "step": 27010 }, { "epoch": 0.1997279796576092, "grad_norm": 0.11956532299518585, "learning_rate": 4.016203703703704e-05, "loss": 0.0202, "step": 27020 }, { "epoch": 0.19980189822891103, "grad_norm": 0.10095667093992233, "learning_rate": 4.0158327397910734e-05, "loss": 0.0185, "step": 27030 }, { "epoch": 0.19987581680021288, "grad_norm": 0.08467351645231247, "learning_rate": 4.015461775878443e-05, "loss": 0.0209, "step": 27040 }, { "epoch": 0.19994973537151475, "grad_norm": 0.07851988077163696, "learning_rate": 4.015090811965812e-05, "loss": 0.0223, "step": 27050 }, { "epoch": 0.2000236539428166, "grad_norm": 0.08040213584899902, "learning_rate": 4.0147198480531815e-05, "loss": 0.0214, "step": 27060 }, { "epoch": 0.20009757251411844, "grad_norm": 0.09008269757032394, "learning_rate": 4.014348884140551e-05, "loss": 0.0198, "step": 27070 }, { "epoch": 0.2001714910854203, "grad_norm": 0.07157569378614426, "learning_rate": 4.013977920227921e-05, "loss": 0.0194, "step": 27080 }, { "epoch": 0.20024540965672216, "grad_norm": 0.06881707161664963, "learning_rate": 4.01360695631529e-05, "loss": 0.0188, "step": 27090 }, { "epoch": 0.200319328228024, "grad_norm": 0.06796582788228989, "learning_rate": 4.013235992402659e-05, "loss": 0.0188, "step": 27100 }, { "epoch": 0.20039324679932585, "grad_norm": 0.09799590706825256, "learning_rate": 4.012865028490029e-05, "loss": 0.0185, "step": 27110 }, { "epoch": 0.20046716537062773, "grad_norm": 0.08818831294775009, "learning_rate": 4.012494064577398e-05, "loss": 0.0174, "step": 27120 }, { "epoch": 0.20054108394192957, "grad_norm": 0.08110783249139786, "learning_rate": 4.012123100664767e-05, "loss": 0.0192, "step": 27130 }, { "epoch": 0.20061500251323142, "grad_norm": 0.09459230303764343, "learning_rate": 4.011752136752137e-05, "loss": 0.0214, "step": 27140 }, { "epoch": 0.2006889210845333, "grad_norm": 0.09958945959806442, "learning_rate": 4.0113811728395065e-05, "loss": 0.02, "step": 27150 }, { "epoch": 0.20076283965583513, "grad_norm": 0.11538317054510117, "learning_rate": 4.011010208926876e-05, "loss": 0.0205, "step": 27160 }, { "epoch": 0.20083675822713698, "grad_norm": 0.0951877310872078, "learning_rate": 4.010639245014246e-05, "loss": 0.0204, "step": 27170 }, { "epoch": 0.20091067679843885, "grad_norm": 0.09305461496114731, "learning_rate": 4.0102682811016146e-05, "loss": 0.0202, "step": 27180 }, { "epoch": 0.2009845953697407, "grad_norm": 0.06460881233215332, "learning_rate": 4.009897317188984e-05, "loss": 0.0192, "step": 27190 }, { "epoch": 0.20105851394104254, "grad_norm": 0.0849134549498558, "learning_rate": 4.009526353276353e-05, "loss": 0.0191, "step": 27200 }, { "epoch": 0.2011324325123444, "grad_norm": 0.07277417182922363, "learning_rate": 4.009155389363723e-05, "loss": 0.0192, "step": 27210 }, { "epoch": 0.20120635108364626, "grad_norm": 0.09616146236658096, "learning_rate": 4.0087844254510923e-05, "loss": 0.0194, "step": 27220 }, { "epoch": 0.2012802696549481, "grad_norm": 0.10256221145391464, "learning_rate": 4.008413461538462e-05, "loss": 0.0196, "step": 27230 }, { "epoch": 0.20135418822624995, "grad_norm": 0.05856601148843765, "learning_rate": 4.0080424976258315e-05, "loss": 0.0186, "step": 27240 }, { "epoch": 0.20142810679755183, "grad_norm": 0.10383433103561401, "learning_rate": 4.0076715337132005e-05, "loss": 0.0215, "step": 27250 }, { "epoch": 0.20150202536885367, "grad_norm": 0.09276574105024338, "learning_rate": 4.00730056980057e-05, "loss": 0.0191, "step": 27260 }, { "epoch": 0.20157594394015552, "grad_norm": 0.07574693858623505, "learning_rate": 4.0069296058879396e-05, "loss": 0.0186, "step": 27270 }, { "epoch": 0.2016498625114574, "grad_norm": 0.07975997775793076, "learning_rate": 4.0065586419753086e-05, "loss": 0.0236, "step": 27280 }, { "epoch": 0.20172378108275923, "grad_norm": 0.07284721732139587, "learning_rate": 4.006187678062678e-05, "loss": 0.023, "step": 27290 }, { "epoch": 0.20179769965406108, "grad_norm": 0.07264596223831177, "learning_rate": 4.005816714150048e-05, "loss": 0.0156, "step": 27300 }, { "epoch": 0.20187161822536295, "grad_norm": 0.10306177288293839, "learning_rate": 4.0054457502374174e-05, "loss": 0.0218, "step": 27310 }, { "epoch": 0.2019455367966648, "grad_norm": 0.10815753042697906, "learning_rate": 4.005074786324787e-05, "loss": 0.0196, "step": 27320 }, { "epoch": 0.20201945536796664, "grad_norm": 0.10363900661468506, "learning_rate": 4.004703822412156e-05, "loss": 0.0163, "step": 27330 }, { "epoch": 0.2020933739392685, "grad_norm": 0.09635012596845627, "learning_rate": 4.0043328584995255e-05, "loss": 0.0209, "step": 27340 }, { "epoch": 0.20216729251057036, "grad_norm": 0.08603912591934204, "learning_rate": 4.0039618945868944e-05, "loss": 0.0186, "step": 27350 }, { "epoch": 0.2022412110818722, "grad_norm": 0.10809623450040817, "learning_rate": 4.003590930674264e-05, "loss": 0.0203, "step": 27360 }, { "epoch": 0.20231512965317405, "grad_norm": 0.08217758685350418, "learning_rate": 4.0032199667616336e-05, "loss": 0.0231, "step": 27370 }, { "epoch": 0.20238904822447593, "grad_norm": 0.07781322300434113, "learning_rate": 4.002849002849003e-05, "loss": 0.0184, "step": 27380 }, { "epoch": 0.20246296679577777, "grad_norm": 0.09918926656246185, "learning_rate": 4.002478038936373e-05, "loss": 0.0227, "step": 27390 }, { "epoch": 0.20253688536707962, "grad_norm": 0.07839609682559967, "learning_rate": 4.0021070750237424e-05, "loss": 0.0187, "step": 27400 }, { "epoch": 0.2026108039383815, "grad_norm": 0.0917617529630661, "learning_rate": 4.001736111111111e-05, "loss": 0.0175, "step": 27410 }, { "epoch": 0.20268472250968333, "grad_norm": 0.09276415407657623, "learning_rate": 4.001365147198481e-05, "loss": 0.0226, "step": 27420 }, { "epoch": 0.20275864108098518, "grad_norm": 0.09192842245101929, "learning_rate": 4.00099418328585e-05, "loss": 0.0175, "step": 27430 }, { "epoch": 0.20283255965228705, "grad_norm": 0.09252292662858963, "learning_rate": 4.0006232193732194e-05, "loss": 0.0201, "step": 27440 }, { "epoch": 0.2029064782235889, "grad_norm": 0.11140212416648865, "learning_rate": 4.000252255460589e-05, "loss": 0.0202, "step": 27450 }, { "epoch": 0.20298039679489074, "grad_norm": 0.10308962315320969, "learning_rate": 3.9998812915479586e-05, "loss": 0.0199, "step": 27460 }, { "epoch": 0.2030543153661926, "grad_norm": 0.09824221581220627, "learning_rate": 3.999510327635328e-05, "loss": 0.0216, "step": 27470 }, { "epoch": 0.20312823393749446, "grad_norm": 0.09012199193239212, "learning_rate": 3.999139363722697e-05, "loss": 0.0176, "step": 27480 }, { "epoch": 0.2032021525087963, "grad_norm": 0.10927720367908478, "learning_rate": 3.998768399810067e-05, "loss": 0.0205, "step": 27490 }, { "epoch": 0.20327607108009815, "grad_norm": 0.07126221060752869, "learning_rate": 3.998397435897436e-05, "loss": 0.0221, "step": 27500 }, { "epoch": 0.20334998965140003, "grad_norm": 0.07685278356075287, "learning_rate": 3.998026471984805e-05, "loss": 0.019, "step": 27510 }, { "epoch": 0.20342390822270187, "grad_norm": 0.080895334482193, "learning_rate": 3.997655508072175e-05, "loss": 0.0188, "step": 27520 }, { "epoch": 0.20349782679400372, "grad_norm": 0.09567815065383911, "learning_rate": 3.9972845441595444e-05, "loss": 0.019, "step": 27530 }, { "epoch": 0.2035717453653056, "grad_norm": 0.056576136499643326, "learning_rate": 3.996913580246914e-05, "loss": 0.0175, "step": 27540 }, { "epoch": 0.20364566393660744, "grad_norm": 0.12664413452148438, "learning_rate": 3.9965426163342836e-05, "loss": 0.0183, "step": 27550 }, { "epoch": 0.20371958250790928, "grad_norm": 0.1033640056848526, "learning_rate": 3.9961716524216525e-05, "loss": 0.0184, "step": 27560 }, { "epoch": 0.20379350107921115, "grad_norm": 0.071520134806633, "learning_rate": 3.995800688509022e-05, "loss": 0.0198, "step": 27570 }, { "epoch": 0.203867419650513, "grad_norm": 0.08319620788097382, "learning_rate": 3.995429724596391e-05, "loss": 0.0198, "step": 27580 }, { "epoch": 0.20394133822181484, "grad_norm": 0.07659216970205307, "learning_rate": 3.9950587606837606e-05, "loss": 0.018, "step": 27590 }, { "epoch": 0.2040152567931167, "grad_norm": 0.08090242743492126, "learning_rate": 3.99468779677113e-05, "loss": 0.0211, "step": 27600 }, { "epoch": 0.20408917536441856, "grad_norm": 0.09213200211524963, "learning_rate": 3.9943168328585e-05, "loss": 0.0186, "step": 27610 }, { "epoch": 0.2041630939357204, "grad_norm": 0.08669887483119965, "learning_rate": 3.9939458689458694e-05, "loss": 0.0202, "step": 27620 }, { "epoch": 0.20423701250702225, "grad_norm": 0.07079674303531647, "learning_rate": 3.993574905033239e-05, "loss": 0.0168, "step": 27630 }, { "epoch": 0.20431093107832413, "grad_norm": 0.07631437480449677, "learning_rate": 3.993203941120608e-05, "loss": 0.0207, "step": 27640 }, { "epoch": 0.20438484964962597, "grad_norm": 0.0871163159608841, "learning_rate": 3.9928329772079775e-05, "loss": 0.0204, "step": 27650 }, { "epoch": 0.20445876822092782, "grad_norm": 0.08351099491119385, "learning_rate": 3.9924620132953465e-05, "loss": 0.0201, "step": 27660 }, { "epoch": 0.2045326867922297, "grad_norm": 0.09270329028367996, "learning_rate": 3.992091049382716e-05, "loss": 0.0189, "step": 27670 }, { "epoch": 0.20460660536353154, "grad_norm": 0.10540712624788284, "learning_rate": 3.9917200854700857e-05, "loss": 0.018, "step": 27680 }, { "epoch": 0.20468052393483338, "grad_norm": 0.07462663948535919, "learning_rate": 3.991349121557455e-05, "loss": 0.0186, "step": 27690 }, { "epoch": 0.20475444250613525, "grad_norm": 0.07764612138271332, "learning_rate": 3.990978157644825e-05, "loss": 0.0175, "step": 27700 }, { "epoch": 0.2048283610774371, "grad_norm": 0.10635413974523544, "learning_rate": 3.990607193732194e-05, "loss": 0.0218, "step": 27710 }, { "epoch": 0.20490227964873894, "grad_norm": 0.11886221915483475, "learning_rate": 3.9902362298195634e-05, "loss": 0.0173, "step": 27720 }, { "epoch": 0.2049761982200408, "grad_norm": 0.07569146901369095, "learning_rate": 3.989865265906933e-05, "loss": 0.02, "step": 27730 }, { "epoch": 0.20505011679134266, "grad_norm": 0.08334322273731232, "learning_rate": 3.989494301994302e-05, "loss": 0.0202, "step": 27740 }, { "epoch": 0.2051240353626445, "grad_norm": 0.11985990405082703, "learning_rate": 3.9891233380816715e-05, "loss": 0.0216, "step": 27750 }, { "epoch": 0.20519795393394635, "grad_norm": 0.096512570977211, "learning_rate": 3.988752374169041e-05, "loss": 0.0204, "step": 27760 }, { "epoch": 0.20527187250524823, "grad_norm": 0.07972533255815506, "learning_rate": 3.988381410256411e-05, "loss": 0.0192, "step": 27770 }, { "epoch": 0.20534579107655007, "grad_norm": 0.09564422070980072, "learning_rate": 3.98801044634378e-05, "loss": 0.02, "step": 27780 }, { "epoch": 0.20541970964785192, "grad_norm": 0.08355620503425598, "learning_rate": 3.987639482431149e-05, "loss": 0.0207, "step": 27790 }, { "epoch": 0.2054936282191538, "grad_norm": 0.10648877918720245, "learning_rate": 3.987268518518519e-05, "loss": 0.0181, "step": 27800 }, { "epoch": 0.20556754679045564, "grad_norm": 0.06632612645626068, "learning_rate": 3.986897554605888e-05, "loss": 0.0209, "step": 27810 }, { "epoch": 0.20564146536175748, "grad_norm": 0.09305301308631897, "learning_rate": 3.986526590693257e-05, "loss": 0.0164, "step": 27820 }, { "epoch": 0.20571538393305935, "grad_norm": 0.07734738290309906, "learning_rate": 3.986155626780627e-05, "loss": 0.0185, "step": 27830 }, { "epoch": 0.2057893025043612, "grad_norm": 0.07458245009183884, "learning_rate": 3.9857846628679965e-05, "loss": 0.0175, "step": 27840 }, { "epoch": 0.20586322107566304, "grad_norm": 0.08158103376626968, "learning_rate": 3.985413698955366e-05, "loss": 0.0191, "step": 27850 }, { "epoch": 0.2059371396469649, "grad_norm": 0.07438033074140549, "learning_rate": 3.985042735042736e-05, "loss": 0.02, "step": 27860 }, { "epoch": 0.20601105821826676, "grad_norm": 0.06623519212007523, "learning_rate": 3.9846717711301046e-05, "loss": 0.0224, "step": 27870 }, { "epoch": 0.2060849767895686, "grad_norm": 0.08878178894519806, "learning_rate": 3.984300807217474e-05, "loss": 0.018, "step": 27880 }, { "epoch": 0.20615889536087045, "grad_norm": 0.09008637815713882, "learning_rate": 3.983929843304843e-05, "loss": 0.0203, "step": 27890 }, { "epoch": 0.20623281393217233, "grad_norm": 0.105464406311512, "learning_rate": 3.983558879392213e-05, "loss": 0.0198, "step": 27900 }, { "epoch": 0.20630673250347417, "grad_norm": 0.10132227092981339, "learning_rate": 3.983187915479582e-05, "loss": 0.0205, "step": 27910 }, { "epoch": 0.20638065107477602, "grad_norm": 0.09023267775774002, "learning_rate": 3.982816951566952e-05, "loss": 0.0213, "step": 27920 }, { "epoch": 0.2064545696460779, "grad_norm": 0.107700414955616, "learning_rate": 3.9824459876543215e-05, "loss": 0.0192, "step": 27930 }, { "epoch": 0.20652848821737974, "grad_norm": 0.0751541405916214, "learning_rate": 3.9820750237416904e-05, "loss": 0.0199, "step": 27940 }, { "epoch": 0.20660240678868158, "grad_norm": 0.0796554908156395, "learning_rate": 3.98170405982906e-05, "loss": 0.0185, "step": 27950 }, { "epoch": 0.20667632535998345, "grad_norm": 0.15034401416778564, "learning_rate": 3.9813330959164296e-05, "loss": 0.0197, "step": 27960 }, { "epoch": 0.2067502439312853, "grad_norm": 0.09039101749658585, "learning_rate": 3.9809621320037985e-05, "loss": 0.0175, "step": 27970 }, { "epoch": 0.20682416250258714, "grad_norm": 0.08342040330171585, "learning_rate": 3.980591168091168e-05, "loss": 0.0206, "step": 27980 }, { "epoch": 0.20689808107388902, "grad_norm": 0.08229884505271912, "learning_rate": 3.980220204178538e-05, "loss": 0.0204, "step": 27990 }, { "epoch": 0.20697199964519086, "grad_norm": 0.07746469974517822, "learning_rate": 3.979849240265907e-05, "loss": 0.0214, "step": 28000 }, { "epoch": 0.2070459182164927, "grad_norm": 0.11914195865392685, "learning_rate": 3.979478276353277e-05, "loss": 0.0223, "step": 28010 }, { "epoch": 0.20711983678779455, "grad_norm": 0.10523311793804169, "learning_rate": 3.979107312440646e-05, "loss": 0.0189, "step": 28020 }, { "epoch": 0.20719375535909643, "grad_norm": 0.061627957969903946, "learning_rate": 3.9787363485280154e-05, "loss": 0.0194, "step": 28030 }, { "epoch": 0.20726767393039827, "grad_norm": 0.08928674459457397, "learning_rate": 3.9783653846153844e-05, "loss": 0.0194, "step": 28040 }, { "epoch": 0.20734159250170012, "grad_norm": 0.11436183750629425, "learning_rate": 3.977994420702754e-05, "loss": 0.022, "step": 28050 }, { "epoch": 0.207415511073002, "grad_norm": 0.07641822844743729, "learning_rate": 3.9776234567901236e-05, "loss": 0.0201, "step": 28060 }, { "epoch": 0.20748942964430384, "grad_norm": 0.07226108014583588, "learning_rate": 3.977252492877493e-05, "loss": 0.0188, "step": 28070 }, { "epoch": 0.20756334821560568, "grad_norm": 0.0811077281832695, "learning_rate": 3.976881528964863e-05, "loss": 0.02, "step": 28080 }, { "epoch": 0.20763726678690755, "grad_norm": 0.09483274072408676, "learning_rate": 3.9765105650522323e-05, "loss": 0.0184, "step": 28090 }, { "epoch": 0.2077111853582094, "grad_norm": 0.07932420074939728, "learning_rate": 3.976139601139601e-05, "loss": 0.02, "step": 28100 }, { "epoch": 0.20778510392951124, "grad_norm": 0.06476841866970062, "learning_rate": 3.975768637226971e-05, "loss": 0.0197, "step": 28110 }, { "epoch": 0.20785902250081312, "grad_norm": 0.09106004238128662, "learning_rate": 3.97539767331434e-05, "loss": 0.0182, "step": 28120 }, { "epoch": 0.20793294107211496, "grad_norm": 0.08542423695325851, "learning_rate": 3.9750267094017094e-05, "loss": 0.0204, "step": 28130 }, { "epoch": 0.2080068596434168, "grad_norm": 0.060376714915037155, "learning_rate": 3.974655745489079e-05, "loss": 0.0171, "step": 28140 }, { "epoch": 0.20808077821471865, "grad_norm": 0.0787116289138794, "learning_rate": 3.9742847815764486e-05, "loss": 0.0188, "step": 28150 }, { "epoch": 0.20815469678602053, "grad_norm": 0.0847581997513771, "learning_rate": 3.973913817663818e-05, "loss": 0.0194, "step": 28160 }, { "epoch": 0.20822861535732237, "grad_norm": 0.08633068203926086, "learning_rate": 3.973542853751187e-05, "loss": 0.0217, "step": 28170 }, { "epoch": 0.20830253392862422, "grad_norm": 0.10672096163034439, "learning_rate": 3.973171889838557e-05, "loss": 0.0206, "step": 28180 }, { "epoch": 0.2083764524999261, "grad_norm": 0.06093067675828934, "learning_rate": 3.972800925925926e-05, "loss": 0.0184, "step": 28190 }, { "epoch": 0.20845037107122794, "grad_norm": 0.09024010598659515, "learning_rate": 3.972429962013295e-05, "loss": 0.0213, "step": 28200 }, { "epoch": 0.20852428964252978, "grad_norm": 0.11794493347406387, "learning_rate": 3.972058998100665e-05, "loss": 0.0198, "step": 28210 }, { "epoch": 0.20859820821383165, "grad_norm": 0.0925167128443718, "learning_rate": 3.9716880341880344e-05, "loss": 0.0214, "step": 28220 }, { "epoch": 0.2086721267851335, "grad_norm": 0.06808876246213913, "learning_rate": 3.971317070275404e-05, "loss": 0.0205, "step": 28230 }, { "epoch": 0.20874604535643534, "grad_norm": 0.09446553885936737, "learning_rate": 3.9709461063627736e-05, "loss": 0.0232, "step": 28240 }, { "epoch": 0.20881996392773722, "grad_norm": 0.07646559178829193, "learning_rate": 3.9705751424501425e-05, "loss": 0.018, "step": 28250 }, { "epoch": 0.20889388249903906, "grad_norm": 0.10056505352258682, "learning_rate": 3.970204178537512e-05, "loss": 0.0196, "step": 28260 }, { "epoch": 0.2089678010703409, "grad_norm": 0.08074390143156052, "learning_rate": 3.969833214624881e-05, "loss": 0.0179, "step": 28270 }, { "epoch": 0.20904171964164275, "grad_norm": 0.08853159844875336, "learning_rate": 3.9694622507122506e-05, "loss": 0.0177, "step": 28280 }, { "epoch": 0.20911563821294463, "grad_norm": 0.09671463072299957, "learning_rate": 3.96909128679962e-05, "loss": 0.0217, "step": 28290 }, { "epoch": 0.20918955678424647, "grad_norm": 0.10711846500635147, "learning_rate": 3.96872032288699e-05, "loss": 0.0193, "step": 28300 }, { "epoch": 0.20926347535554832, "grad_norm": 0.07058045268058777, "learning_rate": 3.9683493589743594e-05, "loss": 0.0186, "step": 28310 }, { "epoch": 0.2093373939268502, "grad_norm": 0.10393711179494858, "learning_rate": 3.967978395061729e-05, "loss": 0.0184, "step": 28320 }, { "epoch": 0.20941131249815204, "grad_norm": 0.10901501029729843, "learning_rate": 3.967607431149098e-05, "loss": 0.0212, "step": 28330 }, { "epoch": 0.20948523106945388, "grad_norm": 0.10953504592180252, "learning_rate": 3.9672364672364675e-05, "loss": 0.0187, "step": 28340 }, { "epoch": 0.20955914964075575, "grad_norm": 0.09685878455638885, "learning_rate": 3.9668655033238364e-05, "loss": 0.018, "step": 28350 }, { "epoch": 0.2096330682120576, "grad_norm": 0.10276530683040619, "learning_rate": 3.966494539411206e-05, "loss": 0.0197, "step": 28360 }, { "epoch": 0.20970698678335944, "grad_norm": 0.07335204631090164, "learning_rate": 3.9661235754985756e-05, "loss": 0.0205, "step": 28370 }, { "epoch": 0.20978090535466132, "grad_norm": 0.10454121232032776, "learning_rate": 3.965752611585945e-05, "loss": 0.0206, "step": 28380 }, { "epoch": 0.20985482392596316, "grad_norm": 0.0707346722483635, "learning_rate": 3.965381647673315e-05, "loss": 0.0203, "step": 28390 }, { "epoch": 0.209928742497265, "grad_norm": 0.08866816014051437, "learning_rate": 3.965010683760684e-05, "loss": 0.0185, "step": 28400 }, { "epoch": 0.21000266106856685, "grad_norm": 0.12376395612955093, "learning_rate": 3.9646397198480533e-05, "loss": 0.0185, "step": 28410 }, { "epoch": 0.21007657963986873, "grad_norm": 0.06304518133401871, "learning_rate": 3.964268755935423e-05, "loss": 0.0225, "step": 28420 }, { "epoch": 0.21015049821117057, "grad_norm": 0.08424234390258789, "learning_rate": 3.963897792022792e-05, "loss": 0.0202, "step": 28430 }, { "epoch": 0.21022441678247242, "grad_norm": 0.09381997585296631, "learning_rate": 3.9635268281101615e-05, "loss": 0.0227, "step": 28440 }, { "epoch": 0.2102983353537743, "grad_norm": 0.06805253028869629, "learning_rate": 3.963155864197531e-05, "loss": 0.0206, "step": 28450 }, { "epoch": 0.21037225392507614, "grad_norm": 0.09173522889614105, "learning_rate": 3.9627849002849006e-05, "loss": 0.0187, "step": 28460 }, { "epoch": 0.21044617249637798, "grad_norm": 0.07035304605960846, "learning_rate": 3.96241393637227e-05, "loss": 0.0202, "step": 28470 }, { "epoch": 0.21052009106767985, "grad_norm": 0.07232367247343063, "learning_rate": 3.962042972459639e-05, "loss": 0.0193, "step": 28480 }, { "epoch": 0.2105940096389817, "grad_norm": 0.10332144796848297, "learning_rate": 3.961672008547009e-05, "loss": 0.0202, "step": 28490 }, { "epoch": 0.21066792821028354, "grad_norm": 0.08793472498655319, "learning_rate": 3.961301044634378e-05, "loss": 0.0174, "step": 28500 }, { "epoch": 0.21074184678158542, "grad_norm": 0.11272072046995163, "learning_rate": 3.960930080721747e-05, "loss": 0.021, "step": 28510 }, { "epoch": 0.21081576535288726, "grad_norm": 0.07357959449291229, "learning_rate": 3.960559116809117e-05, "loss": 0.0181, "step": 28520 }, { "epoch": 0.2108896839241891, "grad_norm": 0.07872324436903, "learning_rate": 3.9601881528964865e-05, "loss": 0.0203, "step": 28530 }, { "epoch": 0.21096360249549095, "grad_norm": 0.08163028210401535, "learning_rate": 3.959817188983856e-05, "loss": 0.0193, "step": 28540 }, { "epoch": 0.21103752106679283, "grad_norm": 0.09265872836112976, "learning_rate": 3.959446225071226e-05, "loss": 0.0209, "step": 28550 }, { "epoch": 0.21111143963809467, "grad_norm": 0.09339044243097305, "learning_rate": 3.9590752611585946e-05, "loss": 0.0212, "step": 28560 }, { "epoch": 0.21118535820939652, "grad_norm": 0.06638886034488678, "learning_rate": 3.958704297245964e-05, "loss": 0.021, "step": 28570 }, { "epoch": 0.2112592767806984, "grad_norm": 0.09415410459041595, "learning_rate": 3.958333333333333e-05, "loss": 0.0198, "step": 28580 }, { "epoch": 0.21133319535200024, "grad_norm": 0.09834780544042587, "learning_rate": 3.957962369420703e-05, "loss": 0.02, "step": 28590 }, { "epoch": 0.21140711392330208, "grad_norm": 0.09509751945734024, "learning_rate": 3.957591405508072e-05, "loss": 0.0186, "step": 28600 }, { "epoch": 0.21148103249460395, "grad_norm": 0.08566658943891525, "learning_rate": 3.957220441595442e-05, "loss": 0.0187, "step": 28610 }, { "epoch": 0.2115549510659058, "grad_norm": 0.09062514454126358, "learning_rate": 3.9568494776828115e-05, "loss": 0.0205, "step": 28620 }, { "epoch": 0.21162886963720764, "grad_norm": 0.0751434788107872, "learning_rate": 3.9564785137701804e-05, "loss": 0.0171, "step": 28630 }, { "epoch": 0.21170278820850952, "grad_norm": 0.08853239566087723, "learning_rate": 3.95610754985755e-05, "loss": 0.0206, "step": 28640 }, { "epoch": 0.21177670677981136, "grad_norm": 0.10610842704772949, "learning_rate": 3.9557365859449196e-05, "loss": 0.0182, "step": 28650 }, { "epoch": 0.2118506253511132, "grad_norm": 0.11282014846801758, "learning_rate": 3.9553656220322885e-05, "loss": 0.0177, "step": 28660 }, { "epoch": 0.21192454392241505, "grad_norm": 0.08027879148721695, "learning_rate": 3.954994658119658e-05, "loss": 0.0213, "step": 28670 }, { "epoch": 0.21199846249371693, "grad_norm": 0.09355953335762024, "learning_rate": 3.954623694207028e-05, "loss": 0.0202, "step": 28680 }, { "epoch": 0.21207238106501877, "grad_norm": 0.09194199740886688, "learning_rate": 3.954252730294397e-05, "loss": 0.0204, "step": 28690 }, { "epoch": 0.21214629963632062, "grad_norm": 0.0887255147099495, "learning_rate": 3.953881766381767e-05, "loss": 0.0214, "step": 28700 }, { "epoch": 0.2122202182076225, "grad_norm": 0.08310788124799728, "learning_rate": 3.953510802469136e-05, "loss": 0.0211, "step": 28710 }, { "epoch": 0.21229413677892434, "grad_norm": 0.0715780034661293, "learning_rate": 3.9531398385565054e-05, "loss": 0.0182, "step": 28720 }, { "epoch": 0.21236805535022618, "grad_norm": 0.08348576724529266, "learning_rate": 3.9527688746438743e-05, "loss": 0.0197, "step": 28730 }, { "epoch": 0.21244197392152805, "grad_norm": 0.11897409707307816, "learning_rate": 3.952397910731244e-05, "loss": 0.0193, "step": 28740 }, { "epoch": 0.2125158924928299, "grad_norm": 0.08021600544452667, "learning_rate": 3.9520269468186135e-05, "loss": 0.019, "step": 28750 }, { "epoch": 0.21258981106413175, "grad_norm": 0.07227059453725815, "learning_rate": 3.951655982905983e-05, "loss": 0.02, "step": 28760 }, { "epoch": 0.21266372963543362, "grad_norm": 0.09160932153463364, "learning_rate": 3.951285018993353e-05, "loss": 0.0213, "step": 28770 }, { "epoch": 0.21273764820673546, "grad_norm": 0.0931738093495369, "learning_rate": 3.950914055080722e-05, "loss": 0.0198, "step": 28780 }, { "epoch": 0.2128115667780373, "grad_norm": 0.08352254331111908, "learning_rate": 3.950543091168091e-05, "loss": 0.021, "step": 28790 }, { "epoch": 0.21288548534933915, "grad_norm": 0.08564615249633789, "learning_rate": 3.950172127255461e-05, "loss": 0.0187, "step": 28800 }, { "epoch": 0.21295940392064103, "grad_norm": 0.1044422835111618, "learning_rate": 3.94980116334283e-05, "loss": 0.0198, "step": 28810 }, { "epoch": 0.21303332249194287, "grad_norm": 0.07070822268724442, "learning_rate": 3.9494301994301994e-05, "loss": 0.0189, "step": 28820 }, { "epoch": 0.21310724106324472, "grad_norm": 0.07984407991170883, "learning_rate": 3.949059235517569e-05, "loss": 0.0189, "step": 28830 }, { "epoch": 0.2131811596345466, "grad_norm": 0.08178012818098068, "learning_rate": 3.9486882716049385e-05, "loss": 0.0196, "step": 28840 }, { "epoch": 0.21325507820584844, "grad_norm": 0.06512026488780975, "learning_rate": 3.948317307692308e-05, "loss": 0.0207, "step": 28850 }, { "epoch": 0.21332899677715028, "grad_norm": 0.08254005759954453, "learning_rate": 3.947946343779677e-05, "loss": 0.0182, "step": 28860 }, { "epoch": 0.21340291534845215, "grad_norm": 0.10215901583433151, "learning_rate": 3.9475753798670467e-05, "loss": 0.0184, "step": 28870 }, { "epoch": 0.213476833919754, "grad_norm": 0.08271851390600204, "learning_rate": 3.947204415954416e-05, "loss": 0.0174, "step": 28880 }, { "epoch": 0.21355075249105585, "grad_norm": 0.08024625480175018, "learning_rate": 3.946833452041785e-05, "loss": 0.0183, "step": 28890 }, { "epoch": 0.21362467106235772, "grad_norm": 0.06575581431388855, "learning_rate": 3.946462488129155e-05, "loss": 0.0184, "step": 28900 }, { "epoch": 0.21369858963365956, "grad_norm": 0.12389591336250305, "learning_rate": 3.9460915242165244e-05, "loss": 0.0229, "step": 28910 }, { "epoch": 0.2137725082049614, "grad_norm": 0.07798313349485397, "learning_rate": 3.945720560303894e-05, "loss": 0.0181, "step": 28920 }, { "epoch": 0.21384642677626328, "grad_norm": 0.07742154598236084, "learning_rate": 3.9453495963912636e-05, "loss": 0.0182, "step": 28930 }, { "epoch": 0.21392034534756513, "grad_norm": 0.0996508002281189, "learning_rate": 3.9449786324786325e-05, "loss": 0.0205, "step": 28940 }, { "epoch": 0.21399426391886697, "grad_norm": 0.07458839565515518, "learning_rate": 3.944607668566002e-05, "loss": 0.0176, "step": 28950 }, { "epoch": 0.21406818249016882, "grad_norm": 0.11061214655637741, "learning_rate": 3.944236704653371e-05, "loss": 0.0197, "step": 28960 }, { "epoch": 0.2141421010614707, "grad_norm": 0.07425292581319809, "learning_rate": 3.9438657407407406e-05, "loss": 0.0171, "step": 28970 }, { "epoch": 0.21421601963277254, "grad_norm": 0.08154287934303284, "learning_rate": 3.94349477682811e-05, "loss": 0.0204, "step": 28980 }, { "epoch": 0.21428993820407438, "grad_norm": 0.09038446098566055, "learning_rate": 3.94312381291548e-05, "loss": 0.0187, "step": 28990 }, { "epoch": 0.21436385677537625, "grad_norm": 0.09763018041849136, "learning_rate": 3.9427528490028494e-05, "loss": 0.0195, "step": 29000 }, { "epoch": 0.2144377753466781, "grad_norm": 0.07743502408266068, "learning_rate": 3.942381885090219e-05, "loss": 0.0194, "step": 29010 }, { "epoch": 0.21451169391797995, "grad_norm": 0.08858030289411545, "learning_rate": 3.942010921177588e-05, "loss": 0.0191, "step": 29020 }, { "epoch": 0.21458561248928182, "grad_norm": 0.07084911316633224, "learning_rate": 3.9416399572649575e-05, "loss": 0.0193, "step": 29030 }, { "epoch": 0.21465953106058366, "grad_norm": 0.108488067984581, "learning_rate": 3.9412689933523264e-05, "loss": 0.0208, "step": 29040 }, { "epoch": 0.2147334496318855, "grad_norm": 0.08137663453817368, "learning_rate": 3.940898029439696e-05, "loss": 0.0194, "step": 29050 }, { "epoch": 0.21480736820318738, "grad_norm": 0.07932858914136887, "learning_rate": 3.940527065527066e-05, "loss": 0.02, "step": 29060 }, { "epoch": 0.21488128677448923, "grad_norm": 0.0741783156991005, "learning_rate": 3.940156101614435e-05, "loss": 0.02, "step": 29070 }, { "epoch": 0.21495520534579107, "grad_norm": 0.09059015661478043, "learning_rate": 3.939785137701805e-05, "loss": 0.0196, "step": 29080 }, { "epoch": 0.21502912391709292, "grad_norm": 0.10432152450084686, "learning_rate": 3.939414173789174e-05, "loss": 0.0184, "step": 29090 }, { "epoch": 0.2151030424883948, "grad_norm": 0.08571776747703552, "learning_rate": 3.939043209876543e-05, "loss": 0.0184, "step": 29100 }, { "epoch": 0.21517696105969664, "grad_norm": 0.09835029393434525, "learning_rate": 3.938672245963913e-05, "loss": 0.0186, "step": 29110 }, { "epoch": 0.21525087963099848, "grad_norm": 0.08376480638980865, "learning_rate": 3.938301282051282e-05, "loss": 0.0182, "step": 29120 }, { "epoch": 0.21532479820230035, "grad_norm": 0.06342848390340805, "learning_rate": 3.9379303181386514e-05, "loss": 0.0168, "step": 29130 }, { "epoch": 0.2153987167736022, "grad_norm": 0.08471592515707016, "learning_rate": 3.937559354226021e-05, "loss": 0.02, "step": 29140 }, { "epoch": 0.21547263534490405, "grad_norm": 0.0840645506978035, "learning_rate": 3.9371883903133906e-05, "loss": 0.0193, "step": 29150 }, { "epoch": 0.21554655391620592, "grad_norm": 0.11769651621580124, "learning_rate": 3.93681742640076e-05, "loss": 0.0203, "step": 29160 }, { "epoch": 0.21562047248750776, "grad_norm": 0.1026725247502327, "learning_rate": 3.936446462488129e-05, "loss": 0.0216, "step": 29170 }, { "epoch": 0.2156943910588096, "grad_norm": 0.10950805991888046, "learning_rate": 3.936075498575499e-05, "loss": 0.0175, "step": 29180 }, { "epoch": 0.21576830963011148, "grad_norm": 0.08458021283149719, "learning_rate": 3.9357045346628677e-05, "loss": 0.0195, "step": 29190 }, { "epoch": 0.21584222820141333, "grad_norm": 0.0908300057053566, "learning_rate": 3.935333570750237e-05, "loss": 0.0219, "step": 29200 }, { "epoch": 0.21591614677271517, "grad_norm": 0.10464771091938019, "learning_rate": 3.9349626068376075e-05, "loss": 0.0195, "step": 29210 }, { "epoch": 0.21599006534401702, "grad_norm": 0.07181484997272491, "learning_rate": 3.9345916429249764e-05, "loss": 0.0162, "step": 29220 }, { "epoch": 0.2160639839153189, "grad_norm": 0.09997867047786713, "learning_rate": 3.934220679012346e-05, "loss": 0.021, "step": 29230 }, { "epoch": 0.21613790248662074, "grad_norm": 0.07985897362232208, "learning_rate": 3.9338497150997156e-05, "loss": 0.0187, "step": 29240 }, { "epoch": 0.21621182105792258, "grad_norm": 0.06396341323852539, "learning_rate": 3.9334787511870846e-05, "loss": 0.0191, "step": 29250 }, { "epoch": 0.21628573962922445, "grad_norm": 0.0796850398182869, "learning_rate": 3.933107787274454e-05, "loss": 0.0202, "step": 29260 }, { "epoch": 0.2163596582005263, "grad_norm": 0.06869375705718994, "learning_rate": 3.932736823361823e-05, "loss": 0.0193, "step": 29270 }, { "epoch": 0.21643357677182815, "grad_norm": 0.08196059614419937, "learning_rate": 3.932365859449193e-05, "loss": 0.0177, "step": 29280 }, { "epoch": 0.21650749534313002, "grad_norm": 0.12254177033901215, "learning_rate": 3.931994895536563e-05, "loss": 0.019, "step": 29290 }, { "epoch": 0.21658141391443186, "grad_norm": 0.09668796509504318, "learning_rate": 3.931623931623932e-05, "loss": 0.0207, "step": 29300 }, { "epoch": 0.2166553324857337, "grad_norm": 0.11711964011192322, "learning_rate": 3.9312529677113015e-05, "loss": 0.0196, "step": 29310 }, { "epoch": 0.21672925105703558, "grad_norm": 0.09891097992658615, "learning_rate": 3.9308820037986704e-05, "loss": 0.0193, "step": 29320 }, { "epoch": 0.21680316962833743, "grad_norm": 0.06463112682104111, "learning_rate": 3.93051103988604e-05, "loss": 0.0175, "step": 29330 }, { "epoch": 0.21687708819963927, "grad_norm": 0.08490326255559921, "learning_rate": 3.9301400759734096e-05, "loss": 0.02, "step": 29340 }, { "epoch": 0.21695100677094112, "grad_norm": 0.09126602858304977, "learning_rate": 3.9297691120607785e-05, "loss": 0.021, "step": 29350 }, { "epoch": 0.217024925342243, "grad_norm": 0.08907146006822586, "learning_rate": 3.929398148148149e-05, "loss": 0.0173, "step": 29360 }, { "epoch": 0.21709884391354484, "grad_norm": 0.08719177544116974, "learning_rate": 3.929027184235518e-05, "loss": 0.0214, "step": 29370 }, { "epoch": 0.21717276248484668, "grad_norm": 0.08162589371204376, "learning_rate": 3.928656220322887e-05, "loss": 0.0213, "step": 29380 }, { "epoch": 0.21724668105614856, "grad_norm": 0.0870555192232132, "learning_rate": 3.928285256410257e-05, "loss": 0.0205, "step": 29390 }, { "epoch": 0.2173205996274504, "grad_norm": 0.08443516492843628, "learning_rate": 3.927914292497626e-05, "loss": 0.0177, "step": 29400 }, { "epoch": 0.21739451819875225, "grad_norm": 0.12415960431098938, "learning_rate": 3.9275433285849954e-05, "loss": 0.0205, "step": 29410 }, { "epoch": 0.21746843677005412, "grad_norm": 0.12497182935476303, "learning_rate": 3.927172364672364e-05, "loss": 0.0201, "step": 29420 }, { "epoch": 0.21754235534135596, "grad_norm": 0.10130871087312698, "learning_rate": 3.926801400759734e-05, "loss": 0.016, "step": 29430 }, { "epoch": 0.2176162739126578, "grad_norm": 0.08756314963102341, "learning_rate": 3.926430436847104e-05, "loss": 0.0166, "step": 29440 }, { "epoch": 0.21769019248395968, "grad_norm": 0.10453139245510101, "learning_rate": 3.926059472934473e-05, "loss": 0.019, "step": 29450 }, { "epoch": 0.21776411105526153, "grad_norm": 0.09802225977182388, "learning_rate": 3.925688509021843e-05, "loss": 0.0206, "step": 29460 }, { "epoch": 0.21783802962656337, "grad_norm": 0.07709541916847229, "learning_rate": 3.925317545109212e-05, "loss": 0.0152, "step": 29470 }, { "epoch": 0.21791194819786522, "grad_norm": 0.10449168086051941, "learning_rate": 3.924946581196581e-05, "loss": 0.0189, "step": 29480 }, { "epoch": 0.2179858667691671, "grad_norm": 0.09322134405374527, "learning_rate": 3.924575617283951e-05, "loss": 0.0207, "step": 29490 }, { "epoch": 0.21805978534046894, "grad_norm": 0.0893213301897049, "learning_rate": 3.92420465337132e-05, "loss": 0.0182, "step": 29500 }, { "epoch": 0.21813370391177078, "grad_norm": 0.06929013133049011, "learning_rate": 3.92383368945869e-05, "loss": 0.0194, "step": 29510 }, { "epoch": 0.21820762248307266, "grad_norm": 0.08194336295127869, "learning_rate": 3.9234627255460596e-05, "loss": 0.0211, "step": 29520 }, { "epoch": 0.2182815410543745, "grad_norm": 0.09584349393844604, "learning_rate": 3.9230917616334285e-05, "loss": 0.0187, "step": 29530 }, { "epoch": 0.21835545962567635, "grad_norm": 0.08808445930480957, "learning_rate": 3.922720797720798e-05, "loss": 0.018, "step": 29540 }, { "epoch": 0.21842937819697822, "grad_norm": 0.08436179161071777, "learning_rate": 3.922349833808167e-05, "loss": 0.0182, "step": 29550 }, { "epoch": 0.21850329676828006, "grad_norm": 0.09081083536148071, "learning_rate": 3.9219788698955366e-05, "loss": 0.0196, "step": 29560 }, { "epoch": 0.2185772153395819, "grad_norm": 0.06510711461305618, "learning_rate": 3.921607905982906e-05, "loss": 0.0198, "step": 29570 }, { "epoch": 0.21865113391088378, "grad_norm": 0.06192505732178688, "learning_rate": 3.921236942070275e-05, "loss": 0.0171, "step": 29580 }, { "epoch": 0.21872505248218563, "grad_norm": 0.10911913961172104, "learning_rate": 3.9208659781576454e-05, "loss": 0.02, "step": 29590 }, { "epoch": 0.21879897105348747, "grad_norm": 0.10316479951143265, "learning_rate": 3.9204950142450143e-05, "loss": 0.0207, "step": 29600 }, { "epoch": 0.21887288962478932, "grad_norm": 0.09082043915987015, "learning_rate": 3.920124050332384e-05, "loss": 0.0199, "step": 29610 }, { "epoch": 0.2189468081960912, "grad_norm": 0.07779832184314728, "learning_rate": 3.9197530864197535e-05, "loss": 0.0194, "step": 29620 }, { "epoch": 0.21902072676739304, "grad_norm": 0.09248018264770508, "learning_rate": 3.9193821225071225e-05, "loss": 0.0179, "step": 29630 }, { "epoch": 0.21909464533869488, "grad_norm": 0.08787126839160919, "learning_rate": 3.919011158594492e-05, "loss": 0.021, "step": 29640 }, { "epoch": 0.21916856390999676, "grad_norm": 0.10116524994373322, "learning_rate": 3.918640194681861e-05, "loss": 0.0186, "step": 29650 }, { "epoch": 0.2192424824812986, "grad_norm": 0.09683122485876083, "learning_rate": 3.918269230769231e-05, "loss": 0.0189, "step": 29660 }, { "epoch": 0.21931640105260045, "grad_norm": 0.09836520254611969, "learning_rate": 3.917898266856601e-05, "loss": 0.019, "step": 29670 }, { "epoch": 0.21939031962390232, "grad_norm": 0.080613873898983, "learning_rate": 3.91752730294397e-05, "loss": 0.0178, "step": 29680 }, { "epoch": 0.21946423819520416, "grad_norm": 0.11065308004617691, "learning_rate": 3.9171563390313394e-05, "loss": 0.0179, "step": 29690 }, { "epoch": 0.219538156766506, "grad_norm": 0.04144902527332306, "learning_rate": 3.916785375118709e-05, "loss": 0.0166, "step": 29700 }, { "epoch": 0.21961207533780788, "grad_norm": 0.08354011923074722, "learning_rate": 3.916414411206078e-05, "loss": 0.0195, "step": 29710 }, { "epoch": 0.21968599390910973, "grad_norm": 0.09685730189085007, "learning_rate": 3.9160434472934475e-05, "loss": 0.0199, "step": 29720 }, { "epoch": 0.21975991248041157, "grad_norm": 0.07402803003787994, "learning_rate": 3.9156724833808164e-05, "loss": 0.0168, "step": 29730 }, { "epoch": 0.21983383105171342, "grad_norm": 0.08626370131969452, "learning_rate": 3.915301519468187e-05, "loss": 0.0202, "step": 29740 }, { "epoch": 0.2199077496230153, "grad_norm": 0.12363871932029724, "learning_rate": 3.914930555555556e-05, "loss": 0.0193, "step": 29750 }, { "epoch": 0.21998166819431714, "grad_norm": 0.07647421211004257, "learning_rate": 3.914559591642925e-05, "loss": 0.0187, "step": 29760 }, { "epoch": 0.22005558676561898, "grad_norm": 0.10127396136522293, "learning_rate": 3.914188627730295e-05, "loss": 0.0185, "step": 29770 }, { "epoch": 0.22012950533692086, "grad_norm": 0.11334076523780823, "learning_rate": 3.913817663817664e-05, "loss": 0.0225, "step": 29780 }, { "epoch": 0.2202034239082227, "grad_norm": 0.08629689365625381, "learning_rate": 3.913446699905033e-05, "loss": 0.0193, "step": 29790 }, { "epoch": 0.22027734247952455, "grad_norm": 0.07195857167243958, "learning_rate": 3.913075735992403e-05, "loss": 0.0191, "step": 29800 }, { "epoch": 0.22035126105082642, "grad_norm": 0.09427592903375626, "learning_rate": 3.9127047720797725e-05, "loss": 0.0177, "step": 29810 }, { "epoch": 0.22042517962212826, "grad_norm": 0.14630918204784393, "learning_rate": 3.912333808167142e-05, "loss": 0.0199, "step": 29820 }, { "epoch": 0.2204990981934301, "grad_norm": 0.10568604618310928, "learning_rate": 3.911962844254511e-05, "loss": 0.018, "step": 29830 }, { "epoch": 0.22057301676473198, "grad_norm": 0.09192904829978943, "learning_rate": 3.9115918803418806e-05, "loss": 0.0215, "step": 29840 }, { "epoch": 0.22064693533603383, "grad_norm": 0.08686095476150513, "learning_rate": 3.91122091642925e-05, "loss": 0.0187, "step": 29850 }, { "epoch": 0.22072085390733567, "grad_norm": 0.09952393174171448, "learning_rate": 3.910849952516619e-05, "loss": 0.0211, "step": 29860 }, { "epoch": 0.22079477247863755, "grad_norm": 0.08013574033975601, "learning_rate": 3.910478988603989e-05, "loss": 0.0193, "step": 29870 }, { "epoch": 0.2208686910499394, "grad_norm": 0.0664672777056694, "learning_rate": 3.9101080246913576e-05, "loss": 0.0183, "step": 29880 }, { "epoch": 0.22094260962124124, "grad_norm": 0.07354767620563507, "learning_rate": 3.909737060778728e-05, "loss": 0.0181, "step": 29890 }, { "epoch": 0.22101652819254308, "grad_norm": 0.0886886715888977, "learning_rate": 3.9093660968660975e-05, "loss": 0.0184, "step": 29900 }, { "epoch": 0.22109044676384496, "grad_norm": 0.08277405798435211, "learning_rate": 3.9089951329534664e-05, "loss": 0.0204, "step": 29910 }, { "epoch": 0.2211643653351468, "grad_norm": 0.10522552579641342, "learning_rate": 3.908624169040836e-05, "loss": 0.0213, "step": 29920 }, { "epoch": 0.22123828390644865, "grad_norm": 0.08946707099676132, "learning_rate": 3.9082532051282056e-05, "loss": 0.0191, "step": 29930 }, { "epoch": 0.22131220247775052, "grad_norm": 0.08845418691635132, "learning_rate": 3.9078822412155745e-05, "loss": 0.02, "step": 29940 }, { "epoch": 0.22138612104905236, "grad_norm": 0.08584783226251602, "learning_rate": 3.907511277302944e-05, "loss": 0.0193, "step": 29950 }, { "epoch": 0.2214600396203542, "grad_norm": 0.0865851640701294, "learning_rate": 3.907140313390314e-05, "loss": 0.0191, "step": 29960 }, { "epoch": 0.22153395819165608, "grad_norm": 0.0629979595541954, "learning_rate": 3.906769349477683e-05, "loss": 0.0187, "step": 29970 }, { "epoch": 0.22160787676295793, "grad_norm": 0.05814515799283981, "learning_rate": 3.906398385565053e-05, "loss": 0.0204, "step": 29980 }, { "epoch": 0.22168179533425977, "grad_norm": 0.1256542056798935, "learning_rate": 3.906027421652422e-05, "loss": 0.0224, "step": 29990 }, { "epoch": 0.22175571390556165, "grad_norm": 0.1209261491894722, "learning_rate": 3.9056564577397914e-05, "loss": 0.0185, "step": 30000 }, { "epoch": 0.22175571390556165, "eval_f1": 0.5997543019336526, "eval_loss": 0.019051436334848404, "eval_precision": 0.47411606732788514, "eval_recall": 0.8159863391440243, "eval_runtime": 2664.1736, "eval_samples_per_second": 203.115, "eval_steps_per_second": 3.174, "step": 30000 }, { "epoch": 0.2218296324768635, "grad_norm": 0.08469045907258987, "learning_rate": 3.9052854938271604e-05, "loss": 0.0206, "step": 30010 }, { "epoch": 0.22190355104816534, "grad_norm": 0.09641801565885544, "learning_rate": 3.90491452991453e-05, "loss": 0.0168, "step": 30020 }, { "epoch": 0.22197746961946718, "grad_norm": 0.08884450048208237, "learning_rate": 3.9045435660018995e-05, "loss": 0.019, "step": 30030 }, { "epoch": 0.22205138819076906, "grad_norm": 0.09590624272823334, "learning_rate": 3.904172602089269e-05, "loss": 0.0207, "step": 30040 }, { "epoch": 0.2221253067620709, "grad_norm": 0.08627372980117798, "learning_rate": 3.903801638176639e-05, "loss": 0.0161, "step": 30050 }, { "epoch": 0.22219922533337275, "grad_norm": 0.10284058004617691, "learning_rate": 3.9034306742640077e-05, "loss": 0.0198, "step": 30060 }, { "epoch": 0.22227314390467462, "grad_norm": 0.13199660181999207, "learning_rate": 3.903059710351377e-05, "loss": 0.0199, "step": 30070 }, { "epoch": 0.22234706247597646, "grad_norm": 0.0878579244017601, "learning_rate": 3.902688746438747e-05, "loss": 0.0188, "step": 30080 }, { "epoch": 0.2224209810472783, "grad_norm": 0.07251982390880585, "learning_rate": 3.902317782526116e-05, "loss": 0.0198, "step": 30090 }, { "epoch": 0.22249489961858018, "grad_norm": 0.06770602613687515, "learning_rate": 3.9019468186134854e-05, "loss": 0.0186, "step": 30100 }, { "epoch": 0.22256881818988203, "grad_norm": 0.08190783858299255, "learning_rate": 3.901575854700854e-05, "loss": 0.0161, "step": 30110 }, { "epoch": 0.22264273676118387, "grad_norm": 0.07122600078582764, "learning_rate": 3.9012048907882246e-05, "loss": 0.0177, "step": 30120 }, { "epoch": 0.22271665533248575, "grad_norm": 0.09126278758049011, "learning_rate": 3.900833926875594e-05, "loss": 0.0187, "step": 30130 }, { "epoch": 0.2227905739037876, "grad_norm": 0.08449380099773407, "learning_rate": 3.900462962962963e-05, "loss": 0.0196, "step": 30140 }, { "epoch": 0.22286449247508944, "grad_norm": 0.07444407790899277, "learning_rate": 3.900091999050333e-05, "loss": 0.0191, "step": 30150 }, { "epoch": 0.22293841104639128, "grad_norm": 0.07632920145988464, "learning_rate": 3.899721035137702e-05, "loss": 0.0204, "step": 30160 }, { "epoch": 0.22301232961769316, "grad_norm": 0.08453027904033661, "learning_rate": 3.899350071225071e-05, "loss": 0.0181, "step": 30170 }, { "epoch": 0.223086248188995, "grad_norm": 0.09648805111646652, "learning_rate": 3.898979107312441e-05, "loss": 0.0194, "step": 30180 }, { "epoch": 0.22316016676029685, "grad_norm": 0.09008285403251648, "learning_rate": 3.8986081433998104e-05, "loss": 0.0184, "step": 30190 }, { "epoch": 0.22323408533159872, "grad_norm": 0.07204605638980865, "learning_rate": 3.89823717948718e-05, "loss": 0.0194, "step": 30200 }, { "epoch": 0.22330800390290056, "grad_norm": 0.08470254391431808, "learning_rate": 3.8978662155745496e-05, "loss": 0.0191, "step": 30210 }, { "epoch": 0.2233819224742024, "grad_norm": 0.1287703961133957, "learning_rate": 3.8974952516619185e-05, "loss": 0.0171, "step": 30220 }, { "epoch": 0.22345584104550428, "grad_norm": 0.11528854072093964, "learning_rate": 3.897124287749288e-05, "loss": 0.0209, "step": 30230 }, { "epoch": 0.22352975961680613, "grad_norm": 0.07534787058830261, "learning_rate": 3.896753323836657e-05, "loss": 0.0182, "step": 30240 }, { "epoch": 0.22360367818810797, "grad_norm": 0.0873965248465538, "learning_rate": 3.8963823599240266e-05, "loss": 0.0155, "step": 30250 }, { "epoch": 0.22367759675940985, "grad_norm": 0.11592572182416916, "learning_rate": 3.896011396011396e-05, "loss": 0.0183, "step": 30260 }, { "epoch": 0.2237515153307117, "grad_norm": 0.1082519143819809, "learning_rate": 3.895640432098766e-05, "loss": 0.0169, "step": 30270 }, { "epoch": 0.22382543390201354, "grad_norm": 0.0788358822464943, "learning_rate": 3.8952694681861354e-05, "loss": 0.0163, "step": 30280 }, { "epoch": 0.22389935247331538, "grad_norm": 0.09216403216123581, "learning_rate": 3.894898504273504e-05, "loss": 0.0197, "step": 30290 }, { "epoch": 0.22397327104461726, "grad_norm": 0.08517227321863174, "learning_rate": 3.894527540360874e-05, "loss": 0.0205, "step": 30300 }, { "epoch": 0.2240471896159191, "grad_norm": 0.13697083294391632, "learning_rate": 3.8941565764482435e-05, "loss": 0.0206, "step": 30310 }, { "epoch": 0.22412110818722095, "grad_norm": 0.07699858397245407, "learning_rate": 3.8937856125356124e-05, "loss": 0.0186, "step": 30320 }, { "epoch": 0.22419502675852282, "grad_norm": 0.15653513371944427, "learning_rate": 3.893414648622982e-05, "loss": 0.0198, "step": 30330 }, { "epoch": 0.22426894532982466, "grad_norm": 0.08664526790380478, "learning_rate": 3.8930436847103516e-05, "loss": 0.0208, "step": 30340 }, { "epoch": 0.2243428639011265, "grad_norm": 0.06121116131544113, "learning_rate": 3.892672720797721e-05, "loss": 0.0189, "step": 30350 }, { "epoch": 0.22441678247242838, "grad_norm": 0.09972760826349258, "learning_rate": 3.892301756885091e-05, "loss": 0.0181, "step": 30360 }, { "epoch": 0.22449070104373023, "grad_norm": 0.09164098650217056, "learning_rate": 3.89193079297246e-05, "loss": 0.0168, "step": 30370 }, { "epoch": 0.22456461961503207, "grad_norm": 0.1236223429441452, "learning_rate": 3.891559829059829e-05, "loss": 0.017, "step": 30380 }, { "epoch": 0.22463853818633395, "grad_norm": 0.1081567257642746, "learning_rate": 3.891188865147199e-05, "loss": 0.0185, "step": 30390 }, { "epoch": 0.2247124567576358, "grad_norm": 0.059913747012615204, "learning_rate": 3.890817901234568e-05, "loss": 0.0171, "step": 30400 }, { "epoch": 0.22478637532893764, "grad_norm": 0.07820994406938553, "learning_rate": 3.8904469373219374e-05, "loss": 0.0172, "step": 30410 }, { "epoch": 0.22486029390023948, "grad_norm": 0.08917850255966187, "learning_rate": 3.890075973409307e-05, "loss": 0.0169, "step": 30420 }, { "epoch": 0.22493421247154136, "grad_norm": 0.09755807369947433, "learning_rate": 3.8897050094966766e-05, "loss": 0.0196, "step": 30430 }, { "epoch": 0.2250081310428432, "grad_norm": 0.09309454262256622, "learning_rate": 3.889334045584046e-05, "loss": 0.0209, "step": 30440 }, { "epoch": 0.22508204961414505, "grad_norm": 0.09513840824365616, "learning_rate": 3.888963081671415e-05, "loss": 0.0218, "step": 30450 }, { "epoch": 0.22515596818544692, "grad_norm": 0.09850829094648361, "learning_rate": 3.888592117758785e-05, "loss": 0.0185, "step": 30460 }, { "epoch": 0.22522988675674877, "grad_norm": 0.0765216201543808, "learning_rate": 3.888221153846154e-05, "loss": 0.0163, "step": 30470 }, { "epoch": 0.2253038053280506, "grad_norm": 0.08078419417142868, "learning_rate": 3.887850189933523e-05, "loss": 0.0191, "step": 30480 }, { "epoch": 0.22537772389935248, "grad_norm": 0.09681311249732971, "learning_rate": 3.887479226020893e-05, "loss": 0.021, "step": 30490 }, { "epoch": 0.22545164247065433, "grad_norm": 0.10332570225000381, "learning_rate": 3.8871082621082625e-05, "loss": 0.023, "step": 30500 }, { "epoch": 0.22552556104195617, "grad_norm": 0.10160394012928009, "learning_rate": 3.886737298195632e-05, "loss": 0.0184, "step": 30510 }, { "epoch": 0.22559947961325805, "grad_norm": 0.09615443646907806, "learning_rate": 3.886366334283001e-05, "loss": 0.0208, "step": 30520 }, { "epoch": 0.2256733981845599, "grad_norm": 0.08312519639730453, "learning_rate": 3.8859953703703706e-05, "loss": 0.0201, "step": 30530 }, { "epoch": 0.22574731675586174, "grad_norm": 0.07552629709243774, "learning_rate": 3.88562440645774e-05, "loss": 0.022, "step": 30540 }, { "epoch": 0.22582123532716358, "grad_norm": 0.10100152343511581, "learning_rate": 3.885253442545109e-05, "loss": 0.02, "step": 30550 }, { "epoch": 0.22589515389846546, "grad_norm": 0.08780404180288315, "learning_rate": 3.884882478632479e-05, "loss": 0.0177, "step": 30560 }, { "epoch": 0.2259690724697673, "grad_norm": 0.09640546888113022, "learning_rate": 3.884511514719848e-05, "loss": 0.0195, "step": 30570 }, { "epoch": 0.22604299104106915, "grad_norm": 0.08945133537054062, "learning_rate": 3.884140550807218e-05, "loss": 0.0218, "step": 30580 }, { "epoch": 0.22611690961237102, "grad_norm": 0.1121789738535881, "learning_rate": 3.8837695868945875e-05, "loss": 0.0175, "step": 30590 }, { "epoch": 0.22619082818367287, "grad_norm": 0.1048922911286354, "learning_rate": 3.8833986229819564e-05, "loss": 0.0203, "step": 30600 }, { "epoch": 0.2262647467549747, "grad_norm": 0.1150166317820549, "learning_rate": 3.883027659069326e-05, "loss": 0.0216, "step": 30610 }, { "epoch": 0.22633866532627658, "grad_norm": 0.10024099797010422, "learning_rate": 3.8826566951566956e-05, "loss": 0.0188, "step": 30620 }, { "epoch": 0.22641258389757843, "grad_norm": 0.06918352097272873, "learning_rate": 3.8822857312440645e-05, "loss": 0.0209, "step": 30630 }, { "epoch": 0.22648650246888027, "grad_norm": 0.06771031767129898, "learning_rate": 3.881914767331434e-05, "loss": 0.0196, "step": 30640 }, { "epoch": 0.22656042104018215, "grad_norm": 0.12088067829608917, "learning_rate": 3.881543803418804e-05, "loss": 0.0182, "step": 30650 }, { "epoch": 0.226634339611484, "grad_norm": 0.07698526978492737, "learning_rate": 3.881172839506173e-05, "loss": 0.0217, "step": 30660 }, { "epoch": 0.22670825818278584, "grad_norm": 0.13076117634773254, "learning_rate": 3.880801875593543e-05, "loss": 0.0202, "step": 30670 }, { "epoch": 0.22678217675408768, "grad_norm": 0.0864979699254036, "learning_rate": 3.880430911680912e-05, "loss": 0.0209, "step": 30680 }, { "epoch": 0.22685609532538956, "grad_norm": 0.08185715973377228, "learning_rate": 3.8800599477682814e-05, "loss": 0.0211, "step": 30690 }, { "epoch": 0.2269300138966914, "grad_norm": 0.08243662863969803, "learning_rate": 3.87968898385565e-05, "loss": 0.0173, "step": 30700 }, { "epoch": 0.22700393246799325, "grad_norm": 0.07913575321435928, "learning_rate": 3.87931801994302e-05, "loss": 0.0193, "step": 30710 }, { "epoch": 0.22707785103929512, "grad_norm": 0.09552217274904251, "learning_rate": 3.8789470560303895e-05, "loss": 0.0203, "step": 30720 }, { "epoch": 0.22715176961059697, "grad_norm": 0.11742965131998062, "learning_rate": 3.878576092117759e-05, "loss": 0.0207, "step": 30730 }, { "epoch": 0.2272256881818988, "grad_norm": 0.06373470276594162, "learning_rate": 3.878205128205129e-05, "loss": 0.0186, "step": 30740 }, { "epoch": 0.22729960675320068, "grad_norm": 0.07583777606487274, "learning_rate": 3.8778341642924976e-05, "loss": 0.0174, "step": 30750 }, { "epoch": 0.22737352532450253, "grad_norm": 0.09138477593660355, "learning_rate": 3.877463200379867e-05, "loss": 0.0193, "step": 30760 }, { "epoch": 0.22744744389580437, "grad_norm": 0.07085064053535461, "learning_rate": 3.877092236467237e-05, "loss": 0.0192, "step": 30770 }, { "epoch": 0.22752136246710625, "grad_norm": 0.07788054645061493, "learning_rate": 3.876721272554606e-05, "loss": 0.0182, "step": 30780 }, { "epoch": 0.2275952810384081, "grad_norm": 0.0838763490319252, "learning_rate": 3.8763503086419753e-05, "loss": 0.0187, "step": 30790 }, { "epoch": 0.22766919960970994, "grad_norm": 0.1182074323296547, "learning_rate": 3.875979344729345e-05, "loss": 0.0206, "step": 30800 }, { "epoch": 0.2277431181810118, "grad_norm": 0.1088971197605133, "learning_rate": 3.8756083808167145e-05, "loss": 0.0219, "step": 30810 }, { "epoch": 0.22781703675231366, "grad_norm": 0.11365870386362076, "learning_rate": 3.875237416904084e-05, "loss": 0.0196, "step": 30820 }, { "epoch": 0.2278909553236155, "grad_norm": 0.06740875542163849, "learning_rate": 3.874866452991453e-05, "loss": 0.0171, "step": 30830 }, { "epoch": 0.22796487389491735, "grad_norm": 0.07138946652412415, "learning_rate": 3.8744954890788227e-05, "loss": 0.0211, "step": 30840 }, { "epoch": 0.22803879246621922, "grad_norm": 0.0920095294713974, "learning_rate": 3.874124525166192e-05, "loss": 0.0183, "step": 30850 }, { "epoch": 0.22811271103752107, "grad_norm": 0.13043396174907684, "learning_rate": 3.873753561253561e-05, "loss": 0.0222, "step": 30860 }, { "epoch": 0.2281866296088229, "grad_norm": 0.12495102733373642, "learning_rate": 3.873382597340931e-05, "loss": 0.018, "step": 30870 }, { "epoch": 0.22826054818012478, "grad_norm": 0.08440731465816498, "learning_rate": 3.8730116334283004e-05, "loss": 0.0178, "step": 30880 }, { "epoch": 0.22833446675142663, "grad_norm": 0.07918287813663483, "learning_rate": 3.87264066951567e-05, "loss": 0.02, "step": 30890 }, { "epoch": 0.22840838532272847, "grad_norm": 0.10504679381847382, "learning_rate": 3.8722697056030396e-05, "loss": 0.0189, "step": 30900 }, { "epoch": 0.22848230389403035, "grad_norm": 0.09025320410728455, "learning_rate": 3.8718987416904085e-05, "loss": 0.0191, "step": 30910 }, { "epoch": 0.2285562224653322, "grad_norm": 0.11715808510780334, "learning_rate": 3.871527777777778e-05, "loss": 0.0223, "step": 30920 }, { "epoch": 0.22863014103663404, "grad_norm": 0.07258326560258865, "learning_rate": 3.871156813865147e-05, "loss": 0.0192, "step": 30930 }, { "epoch": 0.2287040596079359, "grad_norm": 0.06756031513214111, "learning_rate": 3.8707858499525166e-05, "loss": 0.0202, "step": 30940 }, { "epoch": 0.22877797817923776, "grad_norm": 0.09260854125022888, "learning_rate": 3.870414886039886e-05, "loss": 0.0219, "step": 30950 }, { "epoch": 0.2288518967505396, "grad_norm": 0.0938676968216896, "learning_rate": 3.870043922127256e-05, "loss": 0.0182, "step": 30960 }, { "epoch": 0.22892581532184145, "grad_norm": 0.09336868673563004, "learning_rate": 3.8696729582146254e-05, "loss": 0.0191, "step": 30970 }, { "epoch": 0.22899973389314332, "grad_norm": 0.08895561844110489, "learning_rate": 3.869301994301994e-05, "loss": 0.0182, "step": 30980 }, { "epoch": 0.22907365246444517, "grad_norm": 0.076514832675457, "learning_rate": 3.868931030389364e-05, "loss": 0.0172, "step": 30990 }, { "epoch": 0.229147571035747, "grad_norm": 0.09007681161165237, "learning_rate": 3.8685600664767335e-05, "loss": 0.0202, "step": 31000 }, { "epoch": 0.22922148960704888, "grad_norm": 0.10296738147735596, "learning_rate": 3.8681891025641024e-05, "loss": 0.0191, "step": 31010 }, { "epoch": 0.22929540817835073, "grad_norm": 0.09268203377723694, "learning_rate": 3.867818138651472e-05, "loss": 0.0225, "step": 31020 }, { "epoch": 0.22936932674965257, "grad_norm": 0.08601827919483185, "learning_rate": 3.8674471747388416e-05, "loss": 0.0174, "step": 31030 }, { "epoch": 0.22944324532095445, "grad_norm": 0.13542363047599792, "learning_rate": 3.867076210826211e-05, "loss": 0.0215, "step": 31040 }, { "epoch": 0.2295171638922563, "grad_norm": 0.07593340426683426, "learning_rate": 3.866705246913581e-05, "loss": 0.0193, "step": 31050 }, { "epoch": 0.22959108246355814, "grad_norm": 0.1184159368276596, "learning_rate": 3.86633428300095e-05, "loss": 0.0195, "step": 31060 }, { "epoch": 0.22966500103486, "grad_norm": 0.10774824768304825, "learning_rate": 3.865963319088319e-05, "loss": 0.0208, "step": 31070 }, { "epoch": 0.22973891960616186, "grad_norm": 0.0968312919139862, "learning_rate": 3.865592355175689e-05, "loss": 0.0201, "step": 31080 }, { "epoch": 0.2298128381774637, "grad_norm": 0.11818359047174454, "learning_rate": 3.865221391263058e-05, "loss": 0.02, "step": 31090 }, { "epoch": 0.22988675674876555, "grad_norm": 0.054972726851701736, "learning_rate": 3.8648504273504274e-05, "loss": 0.0164, "step": 31100 }, { "epoch": 0.22996067532006742, "grad_norm": 0.05757006257772446, "learning_rate": 3.864479463437797e-05, "loss": 0.0182, "step": 31110 }, { "epoch": 0.23003459389136927, "grad_norm": 0.05938870832324028, "learning_rate": 3.8641084995251666e-05, "loss": 0.0202, "step": 31120 }, { "epoch": 0.2301085124626711, "grad_norm": 0.09021522849798203, "learning_rate": 3.863737535612536e-05, "loss": 0.0206, "step": 31130 }, { "epoch": 0.23018243103397298, "grad_norm": 0.08191125839948654, "learning_rate": 3.863366571699905e-05, "loss": 0.0175, "step": 31140 }, { "epoch": 0.23025634960527483, "grad_norm": 0.13434763252735138, "learning_rate": 3.862995607787275e-05, "loss": 0.0241, "step": 31150 }, { "epoch": 0.23033026817657667, "grad_norm": 0.09122100472450256, "learning_rate": 3.8626246438746436e-05, "loss": 0.0178, "step": 31160 }, { "epoch": 0.23040418674787855, "grad_norm": 0.08553260564804077, "learning_rate": 3.862253679962013e-05, "loss": 0.018, "step": 31170 }, { "epoch": 0.2304781053191804, "grad_norm": 0.06894198060035706, "learning_rate": 3.861882716049383e-05, "loss": 0.0196, "step": 31180 }, { "epoch": 0.23055202389048224, "grad_norm": 0.09503644704818726, "learning_rate": 3.8615117521367524e-05, "loss": 0.0184, "step": 31190 }, { "epoch": 0.2306259424617841, "grad_norm": 0.08598212897777557, "learning_rate": 3.861140788224122e-05, "loss": 0.0187, "step": 31200 }, { "epoch": 0.23069986103308596, "grad_norm": 0.08239752054214478, "learning_rate": 3.860769824311491e-05, "loss": 0.0208, "step": 31210 }, { "epoch": 0.2307737796043878, "grad_norm": 0.0904286578297615, "learning_rate": 3.8603988603988605e-05, "loss": 0.0194, "step": 31220 }, { "epoch": 0.23084769817568965, "grad_norm": 0.08765274286270142, "learning_rate": 3.86002789648623e-05, "loss": 0.0196, "step": 31230 }, { "epoch": 0.23092161674699152, "grad_norm": 0.0852227434515953, "learning_rate": 3.859656932573599e-05, "loss": 0.0213, "step": 31240 }, { "epoch": 0.23099553531829337, "grad_norm": 0.09749691188335419, "learning_rate": 3.859285968660969e-05, "loss": 0.0202, "step": 31250 }, { "epoch": 0.2310694538895952, "grad_norm": 0.06527355313301086, "learning_rate": 3.858915004748338e-05, "loss": 0.02, "step": 31260 }, { "epoch": 0.23114337246089708, "grad_norm": 0.11276469379663467, "learning_rate": 3.858544040835708e-05, "loss": 0.0173, "step": 31270 }, { "epoch": 0.23121729103219893, "grad_norm": 0.08993665128946304, "learning_rate": 3.8581730769230775e-05, "loss": 0.0184, "step": 31280 }, { "epoch": 0.23129120960350077, "grad_norm": 0.0920916274189949, "learning_rate": 3.8578021130104464e-05, "loss": 0.0184, "step": 31290 }, { "epoch": 0.23136512817480265, "grad_norm": 0.06178808957338333, "learning_rate": 3.857431149097816e-05, "loss": 0.0171, "step": 31300 }, { "epoch": 0.2314390467461045, "grad_norm": 0.07721130549907684, "learning_rate": 3.8570601851851856e-05, "loss": 0.0172, "step": 31310 }, { "epoch": 0.23151296531740634, "grad_norm": 0.0718604326248169, "learning_rate": 3.8566892212725545e-05, "loss": 0.0188, "step": 31320 }, { "epoch": 0.2315868838887082, "grad_norm": 0.09962927550077438, "learning_rate": 3.856318257359924e-05, "loss": 0.0212, "step": 31330 }, { "epoch": 0.23166080246001006, "grad_norm": 0.09302369505167007, "learning_rate": 3.855947293447294e-05, "loss": 0.0185, "step": 31340 }, { "epoch": 0.2317347210313119, "grad_norm": 0.07690336555242538, "learning_rate": 3.855576329534663e-05, "loss": 0.0191, "step": 31350 }, { "epoch": 0.23180863960261375, "grad_norm": 0.07962159067392349, "learning_rate": 3.855205365622033e-05, "loss": 0.0195, "step": 31360 }, { "epoch": 0.23188255817391562, "grad_norm": 0.09690086543560028, "learning_rate": 3.854834401709402e-05, "loss": 0.018, "step": 31370 }, { "epoch": 0.23195647674521747, "grad_norm": 0.0838049054145813, "learning_rate": 3.8544634377967714e-05, "loss": 0.0173, "step": 31380 }, { "epoch": 0.2320303953165193, "grad_norm": 0.07686971127986908, "learning_rate": 3.85409247388414e-05, "loss": 0.0175, "step": 31390 }, { "epoch": 0.23210431388782118, "grad_norm": 0.08736717700958252, "learning_rate": 3.85372150997151e-05, "loss": 0.0182, "step": 31400 }, { "epoch": 0.23217823245912303, "grad_norm": 0.0625508725643158, "learning_rate": 3.8533505460588795e-05, "loss": 0.0204, "step": 31410 }, { "epoch": 0.23225215103042487, "grad_norm": 0.09991180151700974, "learning_rate": 3.852979582146249e-05, "loss": 0.0179, "step": 31420 }, { "epoch": 0.23232606960172675, "grad_norm": 0.06481295078992844, "learning_rate": 3.852608618233619e-05, "loss": 0.0209, "step": 31430 }, { "epoch": 0.2323999881730286, "grad_norm": 0.07274527847766876, "learning_rate": 3.8522376543209876e-05, "loss": 0.018, "step": 31440 }, { "epoch": 0.23247390674433044, "grad_norm": 0.04653900861740112, "learning_rate": 3.851866690408357e-05, "loss": 0.0149, "step": 31450 }, { "epoch": 0.2325478253156323, "grad_norm": 0.0779101625084877, "learning_rate": 3.851495726495727e-05, "loss": 0.019, "step": 31460 }, { "epoch": 0.23262174388693416, "grad_norm": 0.0786028727889061, "learning_rate": 3.851124762583096e-05, "loss": 0.0176, "step": 31470 }, { "epoch": 0.232695662458236, "grad_norm": 0.07252313196659088, "learning_rate": 3.850753798670465e-05, "loss": 0.0193, "step": 31480 }, { "epoch": 0.23276958102953785, "grad_norm": 0.0671439990401268, "learning_rate": 3.850382834757835e-05, "loss": 0.0187, "step": 31490 }, { "epoch": 0.23284349960083972, "grad_norm": 0.08912762999534607, "learning_rate": 3.8500118708452045e-05, "loss": 0.0184, "step": 31500 }, { "epoch": 0.23291741817214157, "grad_norm": 0.06486102193593979, "learning_rate": 3.849640906932574e-05, "loss": 0.0177, "step": 31510 }, { "epoch": 0.2329913367434434, "grad_norm": 0.10911845415830612, "learning_rate": 3.849269943019943e-05, "loss": 0.02, "step": 31520 }, { "epoch": 0.23306525531474528, "grad_norm": 0.07711423933506012, "learning_rate": 3.8488989791073126e-05, "loss": 0.0193, "step": 31530 }, { "epoch": 0.23313917388604713, "grad_norm": 0.06304068863391876, "learning_rate": 3.848528015194682e-05, "loss": 0.0211, "step": 31540 }, { "epoch": 0.23321309245734897, "grad_norm": 0.11020921915769577, "learning_rate": 3.848157051282051e-05, "loss": 0.0173, "step": 31550 }, { "epoch": 0.23328701102865085, "grad_norm": 0.09332350641489029, "learning_rate": 3.847786087369421e-05, "loss": 0.0212, "step": 31560 }, { "epoch": 0.2333609295999527, "grad_norm": 0.07746771723031998, "learning_rate": 3.84741512345679e-05, "loss": 0.0208, "step": 31570 }, { "epoch": 0.23343484817125454, "grad_norm": 0.10996796935796738, "learning_rate": 3.84704415954416e-05, "loss": 0.0178, "step": 31580 }, { "epoch": 0.2335087667425564, "grad_norm": 0.07971123605966568, "learning_rate": 3.8466731956315295e-05, "loss": 0.0198, "step": 31590 }, { "epoch": 0.23358268531385826, "grad_norm": 0.08604178577661514, "learning_rate": 3.8463022317188984e-05, "loss": 0.0196, "step": 31600 }, { "epoch": 0.2336566038851601, "grad_norm": 0.11693210899829865, "learning_rate": 3.845931267806268e-05, "loss": 0.02, "step": 31610 }, { "epoch": 0.23373052245646195, "grad_norm": 0.09861855953931808, "learning_rate": 3.845560303893637e-05, "loss": 0.0204, "step": 31620 }, { "epoch": 0.23380444102776382, "grad_norm": 0.08212066441774368, "learning_rate": 3.8451893399810066e-05, "loss": 0.0166, "step": 31630 }, { "epoch": 0.23387835959906567, "grad_norm": 0.0658751055598259, "learning_rate": 3.844818376068376e-05, "loss": 0.0163, "step": 31640 }, { "epoch": 0.2339522781703675, "grad_norm": 0.06246474012732506, "learning_rate": 3.844447412155746e-05, "loss": 0.0189, "step": 31650 }, { "epoch": 0.23402619674166938, "grad_norm": 0.07317464053630829, "learning_rate": 3.8440764482431153e-05, "loss": 0.0178, "step": 31660 }, { "epoch": 0.23410011531297123, "grad_norm": 0.07486504316329956, "learning_rate": 3.843705484330484e-05, "loss": 0.0175, "step": 31670 }, { "epoch": 0.23417403388427308, "grad_norm": 0.057501647621393204, "learning_rate": 3.843334520417854e-05, "loss": 0.018, "step": 31680 }, { "epoch": 0.23424795245557495, "grad_norm": 0.10206136852502823, "learning_rate": 3.8429635565052235e-05, "loss": 0.0169, "step": 31690 }, { "epoch": 0.2343218710268768, "grad_norm": 0.09707526117563248, "learning_rate": 3.8425925925925924e-05, "loss": 0.0202, "step": 31700 }, { "epoch": 0.23439578959817864, "grad_norm": 0.08385404944419861, "learning_rate": 3.842221628679962e-05, "loss": 0.018, "step": 31710 }, { "epoch": 0.2344697081694805, "grad_norm": 0.08981026709079742, "learning_rate": 3.8418506647673316e-05, "loss": 0.018, "step": 31720 }, { "epoch": 0.23454362674078236, "grad_norm": 0.07878104597330093, "learning_rate": 3.841479700854701e-05, "loss": 0.0198, "step": 31730 }, { "epoch": 0.2346175453120842, "grad_norm": 0.10871163010597229, "learning_rate": 3.841108736942071e-05, "loss": 0.0199, "step": 31740 }, { "epoch": 0.23469146388338608, "grad_norm": 0.08035383373498917, "learning_rate": 3.84073777302944e-05, "loss": 0.0184, "step": 31750 }, { "epoch": 0.23476538245468792, "grad_norm": 0.09240926057100296, "learning_rate": 3.840366809116809e-05, "loss": 0.0191, "step": 31760 }, { "epoch": 0.23483930102598977, "grad_norm": 0.0863938182592392, "learning_rate": 3.839995845204179e-05, "loss": 0.0178, "step": 31770 }, { "epoch": 0.2349132195972916, "grad_norm": 0.08668408542871475, "learning_rate": 3.839624881291548e-05, "loss": 0.0186, "step": 31780 }, { "epoch": 0.23498713816859348, "grad_norm": 0.10141124576330185, "learning_rate": 3.8392539173789174e-05, "loss": 0.02, "step": 31790 }, { "epoch": 0.23506105673989533, "grad_norm": 0.07474733889102936, "learning_rate": 3.838882953466287e-05, "loss": 0.0179, "step": 31800 }, { "epoch": 0.23513497531119718, "grad_norm": 0.0803011879324913, "learning_rate": 3.8385119895536566e-05, "loss": 0.0183, "step": 31810 }, { "epoch": 0.23520889388249905, "grad_norm": 0.1086181104183197, "learning_rate": 3.838141025641026e-05, "loss": 0.0206, "step": 31820 }, { "epoch": 0.2352828124538009, "grad_norm": 0.0771515890955925, "learning_rate": 3.837770061728395e-05, "loss": 0.0189, "step": 31830 }, { "epoch": 0.23535673102510274, "grad_norm": 0.09756075590848923, "learning_rate": 3.837399097815765e-05, "loss": 0.0195, "step": 31840 }, { "epoch": 0.2354306495964046, "grad_norm": 0.06898177415132523, "learning_rate": 3.8370281339031336e-05, "loss": 0.0182, "step": 31850 }, { "epoch": 0.23550456816770646, "grad_norm": 0.08295425772666931, "learning_rate": 3.836657169990503e-05, "loss": 0.0191, "step": 31860 }, { "epoch": 0.2355784867390083, "grad_norm": 0.08858896791934967, "learning_rate": 3.836286206077873e-05, "loss": 0.0207, "step": 31870 }, { "epoch": 0.23565240531031018, "grad_norm": 0.06618060171604156, "learning_rate": 3.8359152421652424e-05, "loss": 0.0189, "step": 31880 }, { "epoch": 0.23572632388161202, "grad_norm": 0.0754452645778656, "learning_rate": 3.835544278252612e-05, "loss": 0.0175, "step": 31890 }, { "epoch": 0.23580024245291387, "grad_norm": 0.08633226156234741, "learning_rate": 3.835173314339981e-05, "loss": 0.0211, "step": 31900 }, { "epoch": 0.2358741610242157, "grad_norm": 0.09160919487476349, "learning_rate": 3.8348023504273505e-05, "loss": 0.0222, "step": 31910 }, { "epoch": 0.23594807959551758, "grad_norm": 0.08634735643863678, "learning_rate": 3.83443138651472e-05, "loss": 0.0163, "step": 31920 }, { "epoch": 0.23602199816681943, "grad_norm": 0.13907606899738312, "learning_rate": 3.834060422602089e-05, "loss": 0.0188, "step": 31930 }, { "epoch": 0.23609591673812128, "grad_norm": 0.07669810205698013, "learning_rate": 3.8336894586894586e-05, "loss": 0.0212, "step": 31940 }, { "epoch": 0.23616983530942315, "grad_norm": 0.101229727268219, "learning_rate": 3.833318494776828e-05, "loss": 0.0192, "step": 31950 }, { "epoch": 0.236243753880725, "grad_norm": 0.09813881665468216, "learning_rate": 3.832947530864198e-05, "loss": 0.0208, "step": 31960 }, { "epoch": 0.23631767245202684, "grad_norm": 0.08786879479885101, "learning_rate": 3.8325765669515674e-05, "loss": 0.0196, "step": 31970 }, { "epoch": 0.2363915910233287, "grad_norm": 0.06772688031196594, "learning_rate": 3.8322056030389363e-05, "loss": 0.018, "step": 31980 }, { "epoch": 0.23646550959463056, "grad_norm": 0.09436634927988052, "learning_rate": 3.831834639126306e-05, "loss": 0.0207, "step": 31990 }, { "epoch": 0.2365394281659324, "grad_norm": 0.09478554129600525, "learning_rate": 3.8314636752136755e-05, "loss": 0.0182, "step": 32000 }, { "epoch": 0.23661334673723428, "grad_norm": 0.09436893463134766, "learning_rate": 3.8310927113010445e-05, "loss": 0.0174, "step": 32010 }, { "epoch": 0.23668726530853612, "grad_norm": 0.12236055731773376, "learning_rate": 3.830721747388414e-05, "loss": 0.0205, "step": 32020 }, { "epoch": 0.23676118387983797, "grad_norm": 0.09569685161113739, "learning_rate": 3.8303507834757837e-05, "loss": 0.0187, "step": 32030 }, { "epoch": 0.2368351024511398, "grad_norm": 0.12519055604934692, "learning_rate": 3.829979819563153e-05, "loss": 0.0207, "step": 32040 }, { "epoch": 0.23690902102244168, "grad_norm": 0.0853719487786293, "learning_rate": 3.829608855650523e-05, "loss": 0.0163, "step": 32050 }, { "epoch": 0.23698293959374353, "grad_norm": 0.0898117944598198, "learning_rate": 3.829237891737892e-05, "loss": 0.0218, "step": 32060 }, { "epoch": 0.23705685816504538, "grad_norm": 0.10286859422922134, "learning_rate": 3.8288669278252614e-05, "loss": 0.0191, "step": 32070 }, { "epoch": 0.23713077673634725, "grad_norm": 0.07973692566156387, "learning_rate": 3.82849596391263e-05, "loss": 0.02, "step": 32080 }, { "epoch": 0.2372046953076491, "grad_norm": 0.0759081095457077, "learning_rate": 3.828125e-05, "loss": 0.018, "step": 32090 }, { "epoch": 0.23727861387895094, "grad_norm": 0.10089462995529175, "learning_rate": 3.82775403608737e-05, "loss": 0.0183, "step": 32100 }, { "epoch": 0.2373525324502528, "grad_norm": 0.06875733286142349, "learning_rate": 3.827383072174739e-05, "loss": 0.0188, "step": 32110 }, { "epoch": 0.23742645102155466, "grad_norm": 0.09233024716377258, "learning_rate": 3.827012108262109e-05, "loss": 0.0168, "step": 32120 }, { "epoch": 0.2375003695928565, "grad_norm": 0.11073601245880127, "learning_rate": 3.8266411443494776e-05, "loss": 0.0194, "step": 32130 }, { "epoch": 0.23757428816415838, "grad_norm": 0.05894783139228821, "learning_rate": 3.826270180436847e-05, "loss": 0.0183, "step": 32140 }, { "epoch": 0.23764820673546022, "grad_norm": 0.09136971086263657, "learning_rate": 3.825899216524217e-05, "loss": 0.0213, "step": 32150 }, { "epoch": 0.23772212530676207, "grad_norm": 0.09493359178304672, "learning_rate": 3.825528252611586e-05, "loss": 0.0207, "step": 32160 }, { "epoch": 0.2377960438780639, "grad_norm": 0.0764891654253006, "learning_rate": 3.825157288698955e-05, "loss": 0.0197, "step": 32170 }, { "epoch": 0.23786996244936578, "grad_norm": 0.06478419899940491, "learning_rate": 3.824786324786325e-05, "loss": 0.0208, "step": 32180 }, { "epoch": 0.23794388102066763, "grad_norm": 0.060552988201379776, "learning_rate": 3.8244153608736945e-05, "loss": 0.0186, "step": 32190 }, { "epoch": 0.23801779959196948, "grad_norm": 0.09175571799278259, "learning_rate": 3.824044396961064e-05, "loss": 0.0194, "step": 32200 }, { "epoch": 0.23809171816327135, "grad_norm": 0.07756716758012772, "learning_rate": 3.823673433048433e-05, "loss": 0.0198, "step": 32210 }, { "epoch": 0.2381656367345732, "grad_norm": 0.07804546505212784, "learning_rate": 3.8233024691358026e-05, "loss": 0.0207, "step": 32220 }, { "epoch": 0.23823955530587504, "grad_norm": 0.10484328866004944, "learning_rate": 3.822931505223172e-05, "loss": 0.0187, "step": 32230 }, { "epoch": 0.2383134738771769, "grad_norm": 0.10777238756418228, "learning_rate": 3.822560541310541e-05, "loss": 0.0222, "step": 32240 }, { "epoch": 0.23838739244847876, "grad_norm": 0.06873957812786102, "learning_rate": 3.8221895773979114e-05, "loss": 0.0215, "step": 32250 }, { "epoch": 0.2384613110197806, "grad_norm": 0.06929781287908554, "learning_rate": 3.82181861348528e-05, "loss": 0.0206, "step": 32260 }, { "epoch": 0.23853522959108248, "grad_norm": 0.09036051481962204, "learning_rate": 3.82144764957265e-05, "loss": 0.0181, "step": 32270 }, { "epoch": 0.23860914816238432, "grad_norm": 0.07851787656545639, "learning_rate": 3.8210766856600195e-05, "loss": 0.0195, "step": 32280 }, { "epoch": 0.23868306673368617, "grad_norm": 0.07176248729228973, "learning_rate": 3.8207057217473884e-05, "loss": 0.0206, "step": 32290 }, { "epoch": 0.238756985304988, "grad_norm": 0.0853743925690651, "learning_rate": 3.820334757834758e-05, "loss": 0.019, "step": 32300 }, { "epoch": 0.23883090387628989, "grad_norm": 0.12627804279327393, "learning_rate": 3.819963793922127e-05, "loss": 0.0216, "step": 32310 }, { "epoch": 0.23890482244759173, "grad_norm": 0.13927190005779266, "learning_rate": 3.8195928300094965e-05, "loss": 0.022, "step": 32320 }, { "epoch": 0.23897874101889358, "grad_norm": 0.08586980402469635, "learning_rate": 3.819221866096867e-05, "loss": 0.0193, "step": 32330 }, { "epoch": 0.23905265959019545, "grad_norm": 0.08638795465230942, "learning_rate": 3.818850902184236e-05, "loss": 0.0179, "step": 32340 }, { "epoch": 0.2391265781614973, "grad_norm": 0.07057658582925797, "learning_rate": 3.818479938271605e-05, "loss": 0.0152, "step": 32350 }, { "epoch": 0.23920049673279914, "grad_norm": 0.09457147121429443, "learning_rate": 3.818108974358974e-05, "loss": 0.019, "step": 32360 }, { "epoch": 0.239274415304101, "grad_norm": 0.11094425618648529, "learning_rate": 3.817738010446344e-05, "loss": 0.0208, "step": 32370 }, { "epoch": 0.23934833387540286, "grad_norm": 0.09708326309919357, "learning_rate": 3.8173670465337134e-05, "loss": 0.0198, "step": 32380 }, { "epoch": 0.2394222524467047, "grad_norm": 0.07583857327699661, "learning_rate": 3.8169960826210824e-05, "loss": 0.0192, "step": 32390 }, { "epoch": 0.23949617101800658, "grad_norm": 0.089440256357193, "learning_rate": 3.8166251187084526e-05, "loss": 0.0219, "step": 32400 }, { "epoch": 0.23957008958930842, "grad_norm": 0.0617837980389595, "learning_rate": 3.8162541547958215e-05, "loss": 0.0188, "step": 32410 }, { "epoch": 0.23964400816061027, "grad_norm": 0.09545888751745224, "learning_rate": 3.815883190883191e-05, "loss": 0.018, "step": 32420 }, { "epoch": 0.2397179267319121, "grad_norm": 0.10647038370370865, "learning_rate": 3.815512226970561e-05, "loss": 0.0196, "step": 32430 }, { "epoch": 0.23979184530321399, "grad_norm": 0.09234632551670074, "learning_rate": 3.81514126305793e-05, "loss": 0.0173, "step": 32440 }, { "epoch": 0.23986576387451583, "grad_norm": 0.1066628023982048, "learning_rate": 3.814770299145299e-05, "loss": 0.0194, "step": 32450 }, { "epoch": 0.23993968244581768, "grad_norm": 0.08322324603796005, "learning_rate": 3.814399335232669e-05, "loss": 0.019, "step": 32460 }, { "epoch": 0.24001360101711955, "grad_norm": 0.116621233522892, "learning_rate": 3.814028371320038e-05, "loss": 0.0187, "step": 32470 }, { "epoch": 0.2400875195884214, "grad_norm": 0.08321616798639297, "learning_rate": 3.813657407407408e-05, "loss": 0.019, "step": 32480 }, { "epoch": 0.24016143815972324, "grad_norm": 0.1350875049829483, "learning_rate": 3.813286443494777e-05, "loss": 0.0192, "step": 32490 }, { "epoch": 0.2402353567310251, "grad_norm": 0.12473758310079575, "learning_rate": 3.8129154795821466e-05, "loss": 0.0209, "step": 32500 }, { "epoch": 0.24030927530232696, "grad_norm": 0.07671623677015305, "learning_rate": 3.812544515669516e-05, "loss": 0.0209, "step": 32510 }, { "epoch": 0.2403831938736288, "grad_norm": 0.08903726935386658, "learning_rate": 3.812173551756885e-05, "loss": 0.0211, "step": 32520 }, { "epoch": 0.24045711244493068, "grad_norm": 0.0904008224606514, "learning_rate": 3.811802587844255e-05, "loss": 0.0196, "step": 32530 }, { "epoch": 0.24053103101623252, "grad_norm": 0.08499748259782791, "learning_rate": 3.8114316239316236e-05, "loss": 0.0218, "step": 32540 }, { "epoch": 0.24060494958753437, "grad_norm": 0.09576483070850372, "learning_rate": 3.811060660018994e-05, "loss": 0.0205, "step": 32550 }, { "epoch": 0.2406788681588362, "grad_norm": 0.07833150774240494, "learning_rate": 3.8106896961063635e-05, "loss": 0.0189, "step": 32560 }, { "epoch": 0.24075278673013809, "grad_norm": 0.08177255839109421, "learning_rate": 3.8103187321937324e-05, "loss": 0.0172, "step": 32570 }, { "epoch": 0.24082670530143993, "grad_norm": 0.11327052116394043, "learning_rate": 3.809947768281102e-05, "loss": 0.0219, "step": 32580 }, { "epoch": 0.24090062387274178, "grad_norm": 0.08393348008394241, "learning_rate": 3.809576804368471e-05, "loss": 0.0218, "step": 32590 }, { "epoch": 0.24097454244404365, "grad_norm": 0.09698115289211273, "learning_rate": 3.8092058404558405e-05, "loss": 0.0197, "step": 32600 }, { "epoch": 0.2410484610153455, "grad_norm": 0.09822794049978256, "learning_rate": 3.80883487654321e-05, "loss": 0.0219, "step": 32610 }, { "epoch": 0.24112237958664734, "grad_norm": 0.08049602806568146, "learning_rate": 3.808463912630579e-05, "loss": 0.0207, "step": 32620 }, { "epoch": 0.2411962981579492, "grad_norm": 0.08989317715167999, "learning_rate": 3.808092948717949e-05, "loss": 0.0228, "step": 32630 }, { "epoch": 0.24127021672925106, "grad_norm": 0.09174864739179611, "learning_rate": 3.807721984805318e-05, "loss": 0.0207, "step": 32640 }, { "epoch": 0.2413441353005529, "grad_norm": 0.09022696316242218, "learning_rate": 3.807351020892688e-05, "loss": 0.0215, "step": 32650 }, { "epoch": 0.24141805387185478, "grad_norm": 0.1705494076013565, "learning_rate": 3.8069800569800574e-05, "loss": 0.021, "step": 32660 }, { "epoch": 0.24149197244315662, "grad_norm": 0.07814808189868927, "learning_rate": 3.806609093067426e-05, "loss": 0.021, "step": 32670 }, { "epoch": 0.24156589101445847, "grad_norm": 0.09162690490484238, "learning_rate": 3.806238129154796e-05, "loss": 0.0193, "step": 32680 }, { "epoch": 0.24163980958576034, "grad_norm": 0.11757416278123856, "learning_rate": 3.8058671652421655e-05, "loss": 0.0188, "step": 32690 }, { "epoch": 0.24171372815706219, "grad_norm": 0.0703003853559494, "learning_rate": 3.8054962013295344e-05, "loss": 0.0179, "step": 32700 }, { "epoch": 0.24178764672836403, "grad_norm": 0.0890263170003891, "learning_rate": 3.805125237416905e-05, "loss": 0.0191, "step": 32710 }, { "epoch": 0.24186156529966588, "grad_norm": 0.09751693159341812, "learning_rate": 3.8047542735042736e-05, "loss": 0.017, "step": 32720 }, { "epoch": 0.24193548387096775, "grad_norm": 0.10251424461603165, "learning_rate": 3.804383309591643e-05, "loss": 0.0225, "step": 32730 }, { "epoch": 0.2420094024422696, "grad_norm": 0.09268451482057571, "learning_rate": 3.804012345679013e-05, "loss": 0.0175, "step": 32740 }, { "epoch": 0.24208332101357144, "grad_norm": 0.08104787021875381, "learning_rate": 3.803641381766382e-05, "loss": 0.0206, "step": 32750 }, { "epoch": 0.2421572395848733, "grad_norm": 0.11207576841115952, "learning_rate": 3.803270417853751e-05, "loss": 0.0215, "step": 32760 }, { "epoch": 0.24223115815617516, "grad_norm": 0.10712762176990509, "learning_rate": 3.80289945394112e-05, "loss": 0.0206, "step": 32770 }, { "epoch": 0.242305076727477, "grad_norm": 0.08218982070684433, "learning_rate": 3.8025284900284905e-05, "loss": 0.0187, "step": 32780 }, { "epoch": 0.24237899529877888, "grad_norm": 0.10749506950378418, "learning_rate": 3.80215752611586e-05, "loss": 0.0174, "step": 32790 }, { "epoch": 0.24245291387008072, "grad_norm": 0.11496833711862564, "learning_rate": 3.801786562203229e-05, "loss": 0.0211, "step": 32800 }, { "epoch": 0.24252683244138257, "grad_norm": 0.09944915026426315, "learning_rate": 3.8014155982905986e-05, "loss": 0.0211, "step": 32810 }, { "epoch": 0.24260075101268444, "grad_norm": 0.14544843137264252, "learning_rate": 3.8010446343779676e-05, "loss": 0.0189, "step": 32820 }, { "epoch": 0.24267466958398629, "grad_norm": 0.146175816655159, "learning_rate": 3.800673670465337e-05, "loss": 0.0194, "step": 32830 }, { "epoch": 0.24274858815528813, "grad_norm": 0.08695892989635468, "learning_rate": 3.800302706552707e-05, "loss": 0.0166, "step": 32840 }, { "epoch": 0.24282250672658998, "grad_norm": 0.10960700362920761, "learning_rate": 3.799931742640076e-05, "loss": 0.0188, "step": 32850 }, { "epoch": 0.24289642529789185, "grad_norm": 0.09043558686971664, "learning_rate": 3.799560778727446e-05, "loss": 0.0216, "step": 32860 }, { "epoch": 0.2429703438691937, "grad_norm": 0.1071183830499649, "learning_rate": 3.799189814814815e-05, "loss": 0.0188, "step": 32870 }, { "epoch": 0.24304426244049554, "grad_norm": 0.08563335239887238, "learning_rate": 3.7988188509021845e-05, "loss": 0.0203, "step": 32880 }, { "epoch": 0.2431181810117974, "grad_norm": 0.09415535628795624, "learning_rate": 3.798447886989554e-05, "loss": 0.0195, "step": 32890 }, { "epoch": 0.24319209958309926, "grad_norm": 0.10130775719881058, "learning_rate": 3.798076923076923e-05, "loss": 0.0217, "step": 32900 }, { "epoch": 0.2432660181544011, "grad_norm": 0.075839102268219, "learning_rate": 3.7977059591642926e-05, "loss": 0.0192, "step": 32910 }, { "epoch": 0.24333993672570298, "grad_norm": 0.0735127404332161, "learning_rate": 3.797334995251662e-05, "loss": 0.022, "step": 32920 }, { "epoch": 0.24341385529700482, "grad_norm": 0.07995545864105225, "learning_rate": 3.796964031339032e-05, "loss": 0.0184, "step": 32930 }, { "epoch": 0.24348777386830667, "grad_norm": 0.09814750403165817, "learning_rate": 3.7965930674264014e-05, "loss": 0.0188, "step": 32940 }, { "epoch": 0.24356169243960854, "grad_norm": 0.09759753197431564, "learning_rate": 3.79622210351377e-05, "loss": 0.0175, "step": 32950 }, { "epoch": 0.24363561101091039, "grad_norm": 0.0937381312251091, "learning_rate": 3.79585113960114e-05, "loss": 0.0185, "step": 32960 }, { "epoch": 0.24370952958221223, "grad_norm": 0.06919185072183609, "learning_rate": 3.7954801756885095e-05, "loss": 0.0165, "step": 32970 }, { "epoch": 0.24378344815351408, "grad_norm": 0.11352057009935379, "learning_rate": 3.7951092117758784e-05, "loss": 0.02, "step": 32980 }, { "epoch": 0.24385736672481595, "grad_norm": 0.06584945321083069, "learning_rate": 3.794738247863248e-05, "loss": 0.0183, "step": 32990 }, { "epoch": 0.2439312852961178, "grad_norm": 0.2477860003709793, "learning_rate": 3.794367283950617e-05, "loss": 0.0187, "step": 33000 }, { "epoch": 0.24400520386741964, "grad_norm": 0.09093081206083298, "learning_rate": 3.793996320037987e-05, "loss": 0.0202, "step": 33010 }, { "epoch": 0.2440791224387215, "grad_norm": 0.09048257768154144, "learning_rate": 3.793625356125357e-05, "loss": 0.0177, "step": 33020 }, { "epoch": 0.24415304101002336, "grad_norm": 0.11075396090745926, "learning_rate": 3.793254392212726e-05, "loss": 0.0197, "step": 33030 }, { "epoch": 0.2442269595813252, "grad_norm": 0.10718685388565063, "learning_rate": 3.792883428300095e-05, "loss": 0.0189, "step": 33040 }, { "epoch": 0.24430087815262708, "grad_norm": 0.08964429050683975, "learning_rate": 3.792512464387464e-05, "loss": 0.0177, "step": 33050 }, { "epoch": 0.24437479672392892, "grad_norm": 0.11210490763187408, "learning_rate": 3.792141500474834e-05, "loss": 0.0183, "step": 33060 }, { "epoch": 0.24444871529523077, "grad_norm": 0.09917151182889938, "learning_rate": 3.7917705365622034e-05, "loss": 0.0195, "step": 33070 }, { "epoch": 0.24452263386653264, "grad_norm": 0.07808961719274521, "learning_rate": 3.791399572649573e-05, "loss": 0.0188, "step": 33080 }, { "epoch": 0.24459655243783449, "grad_norm": 0.09703001379966736, "learning_rate": 3.7910286087369426e-05, "loss": 0.0205, "step": 33090 }, { "epoch": 0.24467047100913633, "grad_norm": 0.07967003434896469, "learning_rate": 3.7906576448243115e-05, "loss": 0.0174, "step": 33100 }, { "epoch": 0.24474438958043818, "grad_norm": 0.08490297943353653, "learning_rate": 3.790286680911681e-05, "loss": 0.019, "step": 33110 }, { "epoch": 0.24481830815174005, "grad_norm": 0.09356676787137985, "learning_rate": 3.789915716999051e-05, "loss": 0.0184, "step": 33120 }, { "epoch": 0.2448922267230419, "grad_norm": 0.0811527818441391, "learning_rate": 3.7895447530864196e-05, "loss": 0.022, "step": 33130 }, { "epoch": 0.24496614529434374, "grad_norm": 0.09434277564287186, "learning_rate": 3.789173789173789e-05, "loss": 0.0194, "step": 33140 }, { "epoch": 0.2450400638656456, "grad_norm": 0.09916486591100693, "learning_rate": 3.788802825261159e-05, "loss": 0.0216, "step": 33150 }, { "epoch": 0.24511398243694746, "grad_norm": 0.08763419836759567, "learning_rate": 3.7884318613485284e-05, "loss": 0.0202, "step": 33160 }, { "epoch": 0.2451879010082493, "grad_norm": 0.08583448827266693, "learning_rate": 3.788060897435898e-05, "loss": 0.0179, "step": 33170 }, { "epoch": 0.24526181957955118, "grad_norm": 0.09215591102838516, "learning_rate": 3.787689933523267e-05, "loss": 0.0187, "step": 33180 }, { "epoch": 0.24533573815085302, "grad_norm": 0.09262657165527344, "learning_rate": 3.7873189696106365e-05, "loss": 0.0192, "step": 33190 }, { "epoch": 0.24540965672215487, "grad_norm": 0.10066172480583191, "learning_rate": 3.786948005698006e-05, "loss": 0.0222, "step": 33200 }, { "epoch": 0.24548357529345674, "grad_norm": 0.0908813551068306, "learning_rate": 3.786577041785375e-05, "loss": 0.018, "step": 33210 }, { "epoch": 0.24555749386475859, "grad_norm": 0.11447655409574509, "learning_rate": 3.7862060778727447e-05, "loss": 0.02, "step": 33220 }, { "epoch": 0.24563141243606043, "grad_norm": 0.07889176160097122, "learning_rate": 3.785835113960114e-05, "loss": 0.0196, "step": 33230 }, { "epoch": 0.24570533100736228, "grad_norm": 0.06103529781103134, "learning_rate": 3.785464150047484e-05, "loss": 0.0175, "step": 33240 }, { "epoch": 0.24577924957866415, "grad_norm": 0.0895182192325592, "learning_rate": 3.7850931861348534e-05, "loss": 0.0212, "step": 33250 }, { "epoch": 0.245853168149966, "grad_norm": 0.065130814909935, "learning_rate": 3.7847222222222224e-05, "loss": 0.0193, "step": 33260 }, { "epoch": 0.24592708672126784, "grad_norm": 0.09803667664527893, "learning_rate": 3.784351258309592e-05, "loss": 0.0185, "step": 33270 }, { "epoch": 0.2460010052925697, "grad_norm": 0.08449755609035492, "learning_rate": 3.783980294396961e-05, "loss": 0.0218, "step": 33280 }, { "epoch": 0.24607492386387156, "grad_norm": 0.0648382306098938, "learning_rate": 3.7836093304843305e-05, "loss": 0.017, "step": 33290 }, { "epoch": 0.2461488424351734, "grad_norm": 0.2292952835559845, "learning_rate": 3.7832383665717e-05, "loss": 0.0192, "step": 33300 }, { "epoch": 0.24622276100647528, "grad_norm": 0.08093922585248947, "learning_rate": 3.78286740265907e-05, "loss": 0.0194, "step": 33310 }, { "epoch": 0.24629667957777712, "grad_norm": 0.09469078481197357, "learning_rate": 3.782496438746439e-05, "loss": 0.0202, "step": 33320 }, { "epoch": 0.24637059814907897, "grad_norm": 0.10700713098049164, "learning_rate": 3.782125474833808e-05, "loss": 0.0192, "step": 33330 }, { "epoch": 0.24644451672038084, "grad_norm": 0.10421323776245117, "learning_rate": 3.781754510921178e-05, "loss": 0.0202, "step": 33340 }, { "epoch": 0.24651843529168269, "grad_norm": 0.07413748651742935, "learning_rate": 3.7813835470085474e-05, "loss": 0.0211, "step": 33350 }, { "epoch": 0.24659235386298453, "grad_norm": 0.08727966248989105, "learning_rate": 3.781012583095916e-05, "loss": 0.0186, "step": 33360 }, { "epoch": 0.24666627243428638, "grad_norm": 0.07526741921901703, "learning_rate": 3.780641619183286e-05, "loss": 0.0191, "step": 33370 }, { "epoch": 0.24674019100558825, "grad_norm": 0.09076111763715744, "learning_rate": 3.7802706552706555e-05, "loss": 0.0204, "step": 33380 }, { "epoch": 0.2468141095768901, "grad_norm": 0.12775136530399323, "learning_rate": 3.779899691358025e-05, "loss": 0.0211, "step": 33390 }, { "epoch": 0.24688802814819194, "grad_norm": 0.13375797867774963, "learning_rate": 3.779528727445395e-05, "loss": 0.0203, "step": 33400 }, { "epoch": 0.2469619467194938, "grad_norm": 0.09040780365467072, "learning_rate": 3.7791577635327636e-05, "loss": 0.0216, "step": 33410 }, { "epoch": 0.24703586529079566, "grad_norm": 0.0828985720872879, "learning_rate": 3.778786799620133e-05, "loss": 0.0202, "step": 33420 }, { "epoch": 0.2471097838620975, "grad_norm": 0.09508246928453445, "learning_rate": 3.778415835707503e-05, "loss": 0.0186, "step": 33430 }, { "epoch": 0.24718370243339938, "grad_norm": 0.09850742667913437, "learning_rate": 3.778044871794872e-05, "loss": 0.0218, "step": 33440 }, { "epoch": 0.24725762100470122, "grad_norm": 0.0872756764292717, "learning_rate": 3.777673907882241e-05, "loss": 0.0152, "step": 33450 }, { "epoch": 0.24733153957600307, "grad_norm": 0.11343777179718018, "learning_rate": 3.777302943969611e-05, "loss": 0.0186, "step": 33460 }, { "epoch": 0.24740545814730494, "grad_norm": 0.10817458480596542, "learning_rate": 3.7769319800569805e-05, "loss": 0.0218, "step": 33470 }, { "epoch": 0.24747937671860679, "grad_norm": 0.10540654510259628, "learning_rate": 3.77656101614435e-05, "loss": 0.0204, "step": 33480 }, { "epoch": 0.24755329528990863, "grad_norm": 0.08479999005794525, "learning_rate": 3.776190052231719e-05, "loss": 0.0213, "step": 33490 }, { "epoch": 0.24762721386121048, "grad_norm": 0.06634625047445297, "learning_rate": 3.7758190883190886e-05, "loss": 0.0208, "step": 33500 }, { "epoch": 0.24770113243251235, "grad_norm": 0.10522954165935516, "learning_rate": 3.7754481244064575e-05, "loss": 0.021, "step": 33510 }, { "epoch": 0.2477750510038142, "grad_norm": 0.11834339052438736, "learning_rate": 3.775077160493827e-05, "loss": 0.0217, "step": 33520 }, { "epoch": 0.24784896957511604, "grad_norm": 0.09099873155355453, "learning_rate": 3.774706196581197e-05, "loss": 0.022, "step": 33530 }, { "epoch": 0.2479228881464179, "grad_norm": 0.11641920357942581, "learning_rate": 3.774335232668566e-05, "loss": 0.021, "step": 33540 }, { "epoch": 0.24799680671771976, "grad_norm": 0.10506580024957657, "learning_rate": 3.773964268755936e-05, "loss": 0.0198, "step": 33550 }, { "epoch": 0.2480707252890216, "grad_norm": 0.09904909878969193, "learning_rate": 3.773593304843305e-05, "loss": 0.0187, "step": 33560 }, { "epoch": 0.24814464386032348, "grad_norm": 0.08247719705104828, "learning_rate": 3.7732223409306744e-05, "loss": 0.0214, "step": 33570 }, { "epoch": 0.24821856243162532, "grad_norm": 0.09621842950582504, "learning_rate": 3.772851377018044e-05, "loss": 0.0198, "step": 33580 }, { "epoch": 0.24829248100292717, "grad_norm": 0.10325994342565536, "learning_rate": 3.772480413105413e-05, "loss": 0.0191, "step": 33590 }, { "epoch": 0.24836639957422904, "grad_norm": 0.06651351600885391, "learning_rate": 3.7721094491927826e-05, "loss": 0.0198, "step": 33600 }, { "epoch": 0.2484403181455309, "grad_norm": 0.10950721055269241, "learning_rate": 3.771738485280152e-05, "loss": 0.0203, "step": 33610 }, { "epoch": 0.24851423671683273, "grad_norm": 0.07975035905838013, "learning_rate": 3.771367521367522e-05, "loss": 0.0195, "step": 33620 }, { "epoch": 0.2485881552881346, "grad_norm": 0.14583900570869446, "learning_rate": 3.7709965574548913e-05, "loss": 0.0194, "step": 33630 }, { "epoch": 0.24866207385943645, "grad_norm": 0.07780491560697556, "learning_rate": 3.77062559354226e-05, "loss": 0.0198, "step": 33640 }, { "epoch": 0.2487359924307383, "grad_norm": 0.12104685604572296, "learning_rate": 3.77025462962963e-05, "loss": 0.0209, "step": 33650 }, { "epoch": 0.24880991100204014, "grad_norm": 0.13882067799568176, "learning_rate": 3.7698836657169995e-05, "loss": 0.0234, "step": 33660 }, { "epoch": 0.248883829573342, "grad_norm": 0.0761672854423523, "learning_rate": 3.7695127018043684e-05, "loss": 0.0202, "step": 33670 }, { "epoch": 0.24895774814464386, "grad_norm": 0.07395438104867935, "learning_rate": 3.769141737891738e-05, "loss": 0.0191, "step": 33680 }, { "epoch": 0.2490316667159457, "grad_norm": 0.09155049920082092, "learning_rate": 3.7687707739791076e-05, "loss": 0.0193, "step": 33690 }, { "epoch": 0.24910558528724758, "grad_norm": 0.10466806590557098, "learning_rate": 3.768399810066477e-05, "loss": 0.0198, "step": 33700 }, { "epoch": 0.24917950385854942, "grad_norm": 0.0634426698088646, "learning_rate": 3.768028846153847e-05, "loss": 0.0168, "step": 33710 }, { "epoch": 0.24925342242985127, "grad_norm": 0.08831311017274857, "learning_rate": 3.767657882241216e-05, "loss": 0.0199, "step": 33720 }, { "epoch": 0.24932734100115314, "grad_norm": 0.078910231590271, "learning_rate": 3.767286918328585e-05, "loss": 0.0183, "step": 33730 }, { "epoch": 0.249401259572455, "grad_norm": 0.07520467042922974, "learning_rate": 3.766915954415954e-05, "loss": 0.019, "step": 33740 }, { "epoch": 0.24947517814375683, "grad_norm": 0.10446647554636002, "learning_rate": 3.766544990503324e-05, "loss": 0.0225, "step": 33750 }, { "epoch": 0.2495490967150587, "grad_norm": 0.1083189845085144, "learning_rate": 3.7661740265906934e-05, "loss": 0.0193, "step": 33760 }, { "epoch": 0.24962301528636055, "grad_norm": 0.07150457799434662, "learning_rate": 3.765803062678063e-05, "loss": 0.0198, "step": 33770 }, { "epoch": 0.2496969338576624, "grad_norm": 0.08948767930269241, "learning_rate": 3.7654320987654326e-05, "loss": 0.0211, "step": 33780 }, { "epoch": 0.24977085242896424, "grad_norm": 0.08297252655029297, "learning_rate": 3.7650611348528015e-05, "loss": 0.0225, "step": 33790 }, { "epoch": 0.2498447710002661, "grad_norm": 0.07543346285820007, "learning_rate": 3.764690170940171e-05, "loss": 0.0198, "step": 33800 }, { "epoch": 0.24991868957156796, "grad_norm": 0.07644642144441605, "learning_rate": 3.764319207027541e-05, "loss": 0.019, "step": 33810 }, { "epoch": 0.2499926081428698, "grad_norm": 0.08885039389133453, "learning_rate": 3.7639482431149096e-05, "loss": 0.018, "step": 33820 }, { "epoch": 0.2500665267141717, "grad_norm": 0.12531474232673645, "learning_rate": 3.763577279202279e-05, "loss": 0.0194, "step": 33830 }, { "epoch": 0.2501404452854735, "grad_norm": 0.0952950045466423, "learning_rate": 3.763206315289649e-05, "loss": 0.0204, "step": 33840 }, { "epoch": 0.25021436385677537, "grad_norm": 0.12818032503128052, "learning_rate": 3.7628353513770184e-05, "loss": 0.0208, "step": 33850 }, { "epoch": 0.2502882824280772, "grad_norm": 0.08323697000741959, "learning_rate": 3.762464387464388e-05, "loss": 0.0207, "step": 33860 }, { "epoch": 0.25036220099937906, "grad_norm": 0.10147396475076675, "learning_rate": 3.762093423551757e-05, "loss": 0.0202, "step": 33870 }, { "epoch": 0.25043611957068096, "grad_norm": 0.07355233281850815, "learning_rate": 3.7617224596391265e-05, "loss": 0.0195, "step": 33880 }, { "epoch": 0.2505100381419828, "grad_norm": 0.12211878597736359, "learning_rate": 3.761351495726496e-05, "loss": 0.0194, "step": 33890 }, { "epoch": 0.25058395671328465, "grad_norm": 0.14431354403495789, "learning_rate": 3.760980531813865e-05, "loss": 0.0208, "step": 33900 }, { "epoch": 0.2506578752845865, "grad_norm": 0.08047827333211899, "learning_rate": 3.7606095679012346e-05, "loss": 0.0184, "step": 33910 }, { "epoch": 0.25073179385588834, "grad_norm": 0.07976054400205612, "learning_rate": 3.760238603988604e-05, "loss": 0.0191, "step": 33920 }, { "epoch": 0.2508057124271902, "grad_norm": 0.17716683447360992, "learning_rate": 3.759867640075974e-05, "loss": 0.0213, "step": 33930 }, { "epoch": 0.2508796309984921, "grad_norm": 0.12124445289373398, "learning_rate": 3.7594966761633434e-05, "loss": 0.0193, "step": 33940 }, { "epoch": 0.25095354956979393, "grad_norm": 0.10458637028932571, "learning_rate": 3.759125712250712e-05, "loss": 0.0221, "step": 33950 }, { "epoch": 0.2510274681410958, "grad_norm": 0.10960089415311813, "learning_rate": 3.758754748338082e-05, "loss": 0.0205, "step": 33960 }, { "epoch": 0.2511013867123976, "grad_norm": 0.10792728513479233, "learning_rate": 3.758383784425451e-05, "loss": 0.0202, "step": 33970 }, { "epoch": 0.25117530528369947, "grad_norm": 0.10539413243532181, "learning_rate": 3.7580128205128204e-05, "loss": 0.0201, "step": 33980 }, { "epoch": 0.2512492238550013, "grad_norm": 0.09134382009506226, "learning_rate": 3.75764185660019e-05, "loss": 0.019, "step": 33990 }, { "epoch": 0.25132314242630316, "grad_norm": 0.10139552503824234, "learning_rate": 3.7572708926875596e-05, "loss": 0.0223, "step": 34000 }, { "epoch": 0.25139706099760506, "grad_norm": 0.08448898047208786, "learning_rate": 3.756899928774929e-05, "loss": 0.0183, "step": 34010 }, { "epoch": 0.2514709795689069, "grad_norm": 0.11146911233663559, "learning_rate": 3.756528964862298e-05, "loss": 0.0189, "step": 34020 }, { "epoch": 0.25154489814020875, "grad_norm": 0.07452386617660522, "learning_rate": 3.756158000949668e-05, "loss": 0.019, "step": 34030 }, { "epoch": 0.2516188167115106, "grad_norm": 0.07857771217823029, "learning_rate": 3.7557870370370374e-05, "loss": 0.0162, "step": 34040 }, { "epoch": 0.25169273528281244, "grad_norm": 0.08723234385251999, "learning_rate": 3.755416073124406e-05, "loss": 0.0205, "step": 34050 }, { "epoch": 0.2517666538541143, "grad_norm": 0.07878848165273666, "learning_rate": 3.755045109211776e-05, "loss": 0.0174, "step": 34060 }, { "epoch": 0.2518405724254162, "grad_norm": 0.05665014311671257, "learning_rate": 3.7546741452991455e-05, "loss": 0.0188, "step": 34070 }, { "epoch": 0.25191449099671803, "grad_norm": 0.09765440225601196, "learning_rate": 3.754303181386515e-05, "loss": 0.0185, "step": 34080 }, { "epoch": 0.2519884095680199, "grad_norm": 0.09128037840127945, "learning_rate": 3.7539322174738847e-05, "loss": 0.0203, "step": 34090 }, { "epoch": 0.2520623281393217, "grad_norm": 0.06400024890899658, "learning_rate": 3.7535612535612536e-05, "loss": 0.0173, "step": 34100 }, { "epoch": 0.25213624671062357, "grad_norm": 0.08637039363384247, "learning_rate": 3.753190289648623e-05, "loss": 0.0203, "step": 34110 }, { "epoch": 0.2522101652819254, "grad_norm": 0.08356247842311859, "learning_rate": 3.752819325735993e-05, "loss": 0.0178, "step": 34120 }, { "epoch": 0.25228408385322726, "grad_norm": 0.09152962267398834, "learning_rate": 3.752448361823362e-05, "loss": 0.0185, "step": 34130 }, { "epoch": 0.25235800242452916, "grad_norm": 0.08049582690000534, "learning_rate": 3.752077397910731e-05, "loss": 0.0179, "step": 34140 }, { "epoch": 0.252431920995831, "grad_norm": 0.10080865025520325, "learning_rate": 3.751706433998101e-05, "loss": 0.0202, "step": 34150 }, { "epoch": 0.25250583956713285, "grad_norm": 0.12849904596805573, "learning_rate": 3.7513354700854705e-05, "loss": 0.0212, "step": 34160 }, { "epoch": 0.2525797581384347, "grad_norm": 0.3746621608734131, "learning_rate": 3.75096450617284e-05, "loss": 0.0201, "step": 34170 }, { "epoch": 0.25265367670973654, "grad_norm": 0.08858539909124374, "learning_rate": 3.750593542260209e-05, "loss": 0.0205, "step": 34180 }, { "epoch": 0.2527275952810384, "grad_norm": 0.08877479285001755, "learning_rate": 3.7502225783475786e-05, "loss": 0.0198, "step": 34190 }, { "epoch": 0.2528015138523403, "grad_norm": 0.14612101018428802, "learning_rate": 3.7498516144349475e-05, "loss": 0.0185, "step": 34200 }, { "epoch": 0.25287543242364213, "grad_norm": 0.09967604279518127, "learning_rate": 3.749480650522317e-05, "loss": 0.0198, "step": 34210 }, { "epoch": 0.252949350994944, "grad_norm": 0.06724333763122559, "learning_rate": 3.749109686609687e-05, "loss": 0.0215, "step": 34220 }, { "epoch": 0.2530232695662458, "grad_norm": 0.06407474726438522, "learning_rate": 3.748738722697056e-05, "loss": 0.019, "step": 34230 }, { "epoch": 0.25309718813754767, "grad_norm": 0.0668005645275116, "learning_rate": 3.748367758784426e-05, "loss": 0.0157, "step": 34240 }, { "epoch": 0.2531711067088495, "grad_norm": 0.43225792050361633, "learning_rate": 3.747996794871795e-05, "loss": 0.0213, "step": 34250 }, { "epoch": 0.25324502528015136, "grad_norm": 0.08636850863695145, "learning_rate": 3.7476258309591644e-05, "loss": 0.0211, "step": 34260 }, { "epoch": 0.25331894385145326, "grad_norm": 0.12886829674243927, "learning_rate": 3.747254867046534e-05, "loss": 0.0214, "step": 34270 }, { "epoch": 0.2533928624227551, "grad_norm": 0.07349354028701782, "learning_rate": 3.746883903133903e-05, "loss": 0.0177, "step": 34280 }, { "epoch": 0.25346678099405695, "grad_norm": 0.06146747246384621, "learning_rate": 3.7465129392212725e-05, "loss": 0.0184, "step": 34290 }, { "epoch": 0.2535406995653588, "grad_norm": 0.08462537825107574, "learning_rate": 3.746141975308642e-05, "loss": 0.019, "step": 34300 }, { "epoch": 0.25361461813666064, "grad_norm": 0.08384005725383759, "learning_rate": 3.745771011396012e-05, "loss": 0.0187, "step": 34310 }, { "epoch": 0.2536885367079625, "grad_norm": 0.0576070211827755, "learning_rate": 3.745400047483381e-05, "loss": 0.0197, "step": 34320 }, { "epoch": 0.2537624552792644, "grad_norm": 0.12527556717395782, "learning_rate": 3.74502908357075e-05, "loss": 0.0204, "step": 34330 }, { "epoch": 0.25383637385056623, "grad_norm": 0.07750841975212097, "learning_rate": 3.74465811965812e-05, "loss": 0.019, "step": 34340 }, { "epoch": 0.2539102924218681, "grad_norm": 0.0806896835565567, "learning_rate": 3.7442871557454894e-05, "loss": 0.0192, "step": 34350 }, { "epoch": 0.2539842109931699, "grad_norm": 0.08760758489370346, "learning_rate": 3.7439161918328583e-05, "loss": 0.0205, "step": 34360 }, { "epoch": 0.25405812956447177, "grad_norm": 0.1384943574666977, "learning_rate": 3.743545227920228e-05, "loss": 0.0213, "step": 34370 }, { "epoch": 0.2541320481357736, "grad_norm": 0.08599944412708282, "learning_rate": 3.7431742640075975e-05, "loss": 0.022, "step": 34380 }, { "epoch": 0.25420596670707546, "grad_norm": 0.0776156410574913, "learning_rate": 3.742803300094967e-05, "loss": 0.0189, "step": 34390 }, { "epoch": 0.25427988527837736, "grad_norm": 0.1002473384141922, "learning_rate": 3.742432336182337e-05, "loss": 0.0188, "step": 34400 }, { "epoch": 0.2543538038496792, "grad_norm": 0.0879029631614685, "learning_rate": 3.7420613722697057e-05, "loss": 0.0177, "step": 34410 }, { "epoch": 0.25442772242098105, "grad_norm": 0.09049376100301743, "learning_rate": 3.741690408357075e-05, "loss": 0.0179, "step": 34420 }, { "epoch": 0.2545016409922829, "grad_norm": 0.07568518072366714, "learning_rate": 3.741319444444444e-05, "loss": 0.0191, "step": 34430 }, { "epoch": 0.25457555956358474, "grad_norm": 0.09793508797883987, "learning_rate": 3.740948480531814e-05, "loss": 0.0186, "step": 34440 }, { "epoch": 0.2546494781348866, "grad_norm": 0.129734605550766, "learning_rate": 3.7405775166191834e-05, "loss": 0.0192, "step": 34450 }, { "epoch": 0.2547233967061885, "grad_norm": 0.1545630842447281, "learning_rate": 3.740206552706553e-05, "loss": 0.0217, "step": 34460 }, { "epoch": 0.25479731527749033, "grad_norm": 0.10320380330085754, "learning_rate": 3.7398355887939226e-05, "loss": 0.0201, "step": 34470 }, { "epoch": 0.2548712338487922, "grad_norm": 0.10976788401603699, "learning_rate": 3.7394646248812915e-05, "loss": 0.0191, "step": 34480 }, { "epoch": 0.254945152420094, "grad_norm": 0.08091693371534348, "learning_rate": 3.739093660968661e-05, "loss": 0.0174, "step": 34490 }, { "epoch": 0.25501907099139587, "grad_norm": 0.07782463729381561, "learning_rate": 3.738722697056031e-05, "loss": 0.0178, "step": 34500 }, { "epoch": 0.2550929895626977, "grad_norm": 0.12280046194791794, "learning_rate": 3.7383517331433996e-05, "loss": 0.0192, "step": 34510 }, { "epoch": 0.25516690813399956, "grad_norm": 0.08706457912921906, "learning_rate": 3.737980769230769e-05, "loss": 0.0178, "step": 34520 }, { "epoch": 0.25524082670530146, "grad_norm": 0.08564306795597076, "learning_rate": 3.737609805318139e-05, "loss": 0.0184, "step": 34530 }, { "epoch": 0.2553147452766033, "grad_norm": 0.08043599873781204, "learning_rate": 3.7372388414055084e-05, "loss": 0.0206, "step": 34540 }, { "epoch": 0.25538866384790515, "grad_norm": 0.10819025337696075, "learning_rate": 3.736867877492878e-05, "loss": 0.0223, "step": 34550 }, { "epoch": 0.255462582419207, "grad_norm": 0.08196381479501724, "learning_rate": 3.736496913580247e-05, "loss": 0.0186, "step": 34560 }, { "epoch": 0.25553650099050884, "grad_norm": 0.1249900832772255, "learning_rate": 3.7361259496676165e-05, "loss": 0.0173, "step": 34570 }, { "epoch": 0.2556104195618107, "grad_norm": 0.09837884455919266, "learning_rate": 3.735754985754986e-05, "loss": 0.0186, "step": 34580 }, { "epoch": 0.2556843381331126, "grad_norm": 0.0960315689444542, "learning_rate": 3.735384021842355e-05, "loss": 0.0194, "step": 34590 }, { "epoch": 0.25575825670441443, "grad_norm": 0.07749620825052261, "learning_rate": 3.7350130579297246e-05, "loss": 0.0192, "step": 34600 }, { "epoch": 0.2558321752757163, "grad_norm": 0.07777617126703262, "learning_rate": 3.734642094017094e-05, "loss": 0.0212, "step": 34610 }, { "epoch": 0.2559060938470181, "grad_norm": 0.09821510314941406, "learning_rate": 3.734271130104464e-05, "loss": 0.0189, "step": 34620 }, { "epoch": 0.25598001241831997, "grad_norm": 0.18012312054634094, "learning_rate": 3.7339001661918334e-05, "loss": 0.0181, "step": 34630 }, { "epoch": 0.2560539309896218, "grad_norm": 0.10718531906604767, "learning_rate": 3.733529202279202e-05, "loss": 0.0201, "step": 34640 }, { "epoch": 0.25612784956092366, "grad_norm": 0.09973935037851334, "learning_rate": 3.733158238366572e-05, "loss": 0.0186, "step": 34650 }, { "epoch": 0.25620176813222556, "grad_norm": 0.096929170191288, "learning_rate": 3.732787274453941e-05, "loss": 0.0215, "step": 34660 }, { "epoch": 0.2562756867035274, "grad_norm": 0.09448627382516861, "learning_rate": 3.7324163105413104e-05, "loss": 0.0194, "step": 34670 }, { "epoch": 0.25634960527482925, "grad_norm": 0.09486164897680283, "learning_rate": 3.73204534662868e-05, "loss": 0.0176, "step": 34680 }, { "epoch": 0.2564235238461311, "grad_norm": 0.09092456102371216, "learning_rate": 3.7316743827160496e-05, "loss": 0.0208, "step": 34690 }, { "epoch": 0.25649744241743294, "grad_norm": 0.11252086609601974, "learning_rate": 3.731303418803419e-05, "loss": 0.0204, "step": 34700 }, { "epoch": 0.2565713609887348, "grad_norm": 0.08080518245697021, "learning_rate": 3.730932454890788e-05, "loss": 0.0206, "step": 34710 }, { "epoch": 0.2566452795600367, "grad_norm": 0.0721350684762001, "learning_rate": 3.730561490978158e-05, "loss": 0.0185, "step": 34720 }, { "epoch": 0.25671919813133853, "grad_norm": 0.08283282071352005, "learning_rate": 3.730190527065527e-05, "loss": 0.0202, "step": 34730 }, { "epoch": 0.2567931167026404, "grad_norm": 0.09533637017011642, "learning_rate": 3.729819563152896e-05, "loss": 0.0199, "step": 34740 }, { "epoch": 0.2568670352739422, "grad_norm": 0.1281680017709732, "learning_rate": 3.729448599240266e-05, "loss": 0.0197, "step": 34750 }, { "epoch": 0.25694095384524407, "grad_norm": 0.15431781113147736, "learning_rate": 3.7290776353276354e-05, "loss": 0.0173, "step": 34760 }, { "epoch": 0.2570148724165459, "grad_norm": 0.09290771931409836, "learning_rate": 3.728706671415005e-05, "loss": 0.0191, "step": 34770 }, { "epoch": 0.25708879098784776, "grad_norm": 0.07939286530017853, "learning_rate": 3.7283357075023746e-05, "loss": 0.021, "step": 34780 }, { "epoch": 0.25716270955914966, "grad_norm": 0.07546821236610413, "learning_rate": 3.7279647435897436e-05, "loss": 0.0211, "step": 34790 }, { "epoch": 0.2572366281304515, "grad_norm": 0.11875446140766144, "learning_rate": 3.727593779677113e-05, "loss": 0.0229, "step": 34800 }, { "epoch": 0.25731054670175335, "grad_norm": 0.09297899901866913, "learning_rate": 3.727222815764483e-05, "loss": 0.0173, "step": 34810 }, { "epoch": 0.2573844652730552, "grad_norm": 0.06684008240699768, "learning_rate": 3.726851851851852e-05, "loss": 0.0175, "step": 34820 }, { "epoch": 0.25745838384435704, "grad_norm": 0.11314887553453445, "learning_rate": 3.726480887939221e-05, "loss": 0.02, "step": 34830 }, { "epoch": 0.2575323024156589, "grad_norm": 0.12812693417072296, "learning_rate": 3.726109924026591e-05, "loss": 0.0196, "step": 34840 }, { "epoch": 0.2576062209869608, "grad_norm": 0.10089819878339767, "learning_rate": 3.7257389601139605e-05, "loss": 0.0198, "step": 34850 }, { "epoch": 0.25768013955826263, "grad_norm": 0.09206525981426239, "learning_rate": 3.72536799620133e-05, "loss": 0.0186, "step": 34860 }, { "epoch": 0.2577540581295645, "grad_norm": 0.09052669256925583, "learning_rate": 3.724997032288699e-05, "loss": 0.0164, "step": 34870 }, { "epoch": 0.2578279767008663, "grad_norm": 0.08012855798006058, "learning_rate": 3.7246260683760686e-05, "loss": 0.0191, "step": 34880 }, { "epoch": 0.25790189527216817, "grad_norm": 0.10896433144807816, "learning_rate": 3.7242551044634375e-05, "loss": 0.0212, "step": 34890 }, { "epoch": 0.25797581384347, "grad_norm": 0.05664210394024849, "learning_rate": 3.723884140550807e-05, "loss": 0.0189, "step": 34900 }, { "epoch": 0.25804973241477186, "grad_norm": 0.07072228193283081, "learning_rate": 3.723513176638177e-05, "loss": 0.0203, "step": 34910 }, { "epoch": 0.25812365098607376, "grad_norm": 0.07742172479629517, "learning_rate": 3.723142212725546e-05, "loss": 0.018, "step": 34920 }, { "epoch": 0.2581975695573756, "grad_norm": 0.08487288653850555, "learning_rate": 3.722771248812916e-05, "loss": 0.0192, "step": 34930 }, { "epoch": 0.25827148812867745, "grad_norm": 0.0894191637635231, "learning_rate": 3.722400284900285e-05, "loss": 0.0183, "step": 34940 }, { "epoch": 0.2583454066999793, "grad_norm": 0.077961266040802, "learning_rate": 3.7220293209876544e-05, "loss": 0.0189, "step": 34950 }, { "epoch": 0.25841932527128114, "grad_norm": 0.08691947162151337, "learning_rate": 3.721658357075024e-05, "loss": 0.0183, "step": 34960 }, { "epoch": 0.258493243842583, "grad_norm": 0.06688518822193146, "learning_rate": 3.721287393162393e-05, "loss": 0.0218, "step": 34970 }, { "epoch": 0.2585671624138849, "grad_norm": 0.10814563930034637, "learning_rate": 3.7209164292497625e-05, "loss": 0.0201, "step": 34980 }, { "epoch": 0.25864108098518673, "grad_norm": 0.11186385154724121, "learning_rate": 3.720545465337133e-05, "loss": 0.0177, "step": 34990 }, { "epoch": 0.2587149995564886, "grad_norm": 0.07740023732185364, "learning_rate": 3.720174501424502e-05, "loss": 0.0181, "step": 35000 }, { "epoch": 0.2587889181277904, "grad_norm": 0.12069465219974518, "learning_rate": 3.719803537511871e-05, "loss": 0.0186, "step": 35010 }, { "epoch": 0.25886283669909227, "grad_norm": 0.10102164000272751, "learning_rate": 3.71943257359924e-05, "loss": 0.0192, "step": 35020 }, { "epoch": 0.2589367552703941, "grad_norm": 0.0714918002486229, "learning_rate": 3.71906160968661e-05, "loss": 0.0177, "step": 35030 }, { "epoch": 0.259010673841696, "grad_norm": 0.09676463156938553, "learning_rate": 3.7186906457739794e-05, "loss": 0.0206, "step": 35040 }, { "epoch": 0.25908459241299786, "grad_norm": 0.1151902824640274, "learning_rate": 3.718319681861348e-05, "loss": 0.0169, "step": 35050 }, { "epoch": 0.2591585109842997, "grad_norm": 0.09498842060565948, "learning_rate": 3.717948717948718e-05, "loss": 0.0182, "step": 35060 }, { "epoch": 0.25923242955560155, "grad_norm": 0.08307395130395889, "learning_rate": 3.7175777540360875e-05, "loss": 0.0182, "step": 35070 }, { "epoch": 0.2593063481269034, "grad_norm": 0.09639619290828705, "learning_rate": 3.717206790123457e-05, "loss": 0.0199, "step": 35080 }, { "epoch": 0.25938026669820524, "grad_norm": 0.056522320955991745, "learning_rate": 3.716835826210827e-05, "loss": 0.017, "step": 35090 }, { "epoch": 0.2594541852695071, "grad_norm": 0.09537209570407867, "learning_rate": 3.7164648622981956e-05, "loss": 0.0183, "step": 35100 }, { "epoch": 0.259528103840809, "grad_norm": 0.12425485253334045, "learning_rate": 3.716093898385565e-05, "loss": 0.0199, "step": 35110 }, { "epoch": 0.25960202241211083, "grad_norm": 0.08875080943107605, "learning_rate": 3.715722934472934e-05, "loss": 0.019, "step": 35120 }, { "epoch": 0.2596759409834127, "grad_norm": 0.10081818699836731, "learning_rate": 3.715351970560304e-05, "loss": 0.0183, "step": 35130 }, { "epoch": 0.2597498595547145, "grad_norm": 0.10890643298625946, "learning_rate": 3.714981006647674e-05, "loss": 0.0193, "step": 35140 }, { "epoch": 0.25982377812601637, "grad_norm": 0.09250643849372864, "learning_rate": 3.714610042735043e-05, "loss": 0.0191, "step": 35150 }, { "epoch": 0.2598976966973182, "grad_norm": 0.08782697468996048, "learning_rate": 3.7142390788224125e-05, "loss": 0.0179, "step": 35160 }, { "epoch": 0.2599716152686201, "grad_norm": 0.09054393321275711, "learning_rate": 3.7138681149097814e-05, "loss": 0.0195, "step": 35170 }, { "epoch": 0.26004553383992196, "grad_norm": 0.08586835116147995, "learning_rate": 3.713497150997151e-05, "loss": 0.018, "step": 35180 }, { "epoch": 0.2601194524112238, "grad_norm": 0.07333735376596451, "learning_rate": 3.7131261870845206e-05, "loss": 0.0209, "step": 35190 }, { "epoch": 0.26019337098252565, "grad_norm": 0.08953770250082016, "learning_rate": 3.7127552231718896e-05, "loss": 0.0163, "step": 35200 }, { "epoch": 0.2602672895538275, "grad_norm": 0.09727450460195541, "learning_rate": 3.712384259259259e-05, "loss": 0.0164, "step": 35210 }, { "epoch": 0.26034120812512934, "grad_norm": 0.1030101552605629, "learning_rate": 3.7120132953466294e-05, "loss": 0.0181, "step": 35220 }, { "epoch": 0.2604151266964312, "grad_norm": 0.11227700859308243, "learning_rate": 3.7116423314339984e-05, "loss": 0.0218, "step": 35230 }, { "epoch": 0.2604890452677331, "grad_norm": 0.09380971640348434, "learning_rate": 3.711271367521368e-05, "loss": 0.0196, "step": 35240 }, { "epoch": 0.26056296383903493, "grad_norm": 0.07920172065496445, "learning_rate": 3.710900403608737e-05, "loss": 0.0183, "step": 35250 }, { "epoch": 0.2606368824103368, "grad_norm": 0.10708235204219818, "learning_rate": 3.7105294396961065e-05, "loss": 0.0195, "step": 35260 }, { "epoch": 0.2607108009816386, "grad_norm": 0.12499354779720306, "learning_rate": 3.710158475783476e-05, "loss": 0.0209, "step": 35270 }, { "epoch": 0.26078471955294047, "grad_norm": 0.0819254145026207, "learning_rate": 3.709787511870845e-05, "loss": 0.0191, "step": 35280 }, { "epoch": 0.2608586381242423, "grad_norm": 0.12078723311424255, "learning_rate": 3.709416547958215e-05, "loss": 0.0205, "step": 35290 }, { "epoch": 0.2609325566955442, "grad_norm": 0.09493082016706467, "learning_rate": 3.709045584045584e-05, "loss": 0.0202, "step": 35300 }, { "epoch": 0.26100647526684606, "grad_norm": 0.07896389067173004, "learning_rate": 3.708674620132954e-05, "loss": 0.0177, "step": 35310 }, { "epoch": 0.2610803938381479, "grad_norm": 0.10148924589157104, "learning_rate": 3.7083036562203234e-05, "loss": 0.0205, "step": 35320 }, { "epoch": 0.26115431240944975, "grad_norm": 0.08146346360445023, "learning_rate": 3.707932692307692e-05, "loss": 0.0219, "step": 35330 }, { "epoch": 0.2612282309807516, "grad_norm": 0.0864454135298729, "learning_rate": 3.707561728395062e-05, "loss": 0.0219, "step": 35340 }, { "epoch": 0.26130214955205344, "grad_norm": 0.08265972882509232, "learning_rate": 3.707190764482431e-05, "loss": 0.0205, "step": 35350 }, { "epoch": 0.2613760681233553, "grad_norm": 0.0848066657781601, "learning_rate": 3.7068198005698004e-05, "loss": 0.0202, "step": 35360 }, { "epoch": 0.2614499866946572, "grad_norm": 0.09225792437791824, "learning_rate": 3.706448836657171e-05, "loss": 0.0227, "step": 35370 }, { "epoch": 0.26152390526595903, "grad_norm": 0.0770459994673729, "learning_rate": 3.7060778727445396e-05, "loss": 0.0186, "step": 35380 }, { "epoch": 0.2615978238372609, "grad_norm": 0.07575425505638123, "learning_rate": 3.705706908831909e-05, "loss": 0.0169, "step": 35390 }, { "epoch": 0.2616717424085627, "grad_norm": 0.07981394976377487, "learning_rate": 3.705335944919278e-05, "loss": 0.0178, "step": 35400 }, { "epoch": 0.26174566097986457, "grad_norm": 0.10660523921251297, "learning_rate": 3.704964981006648e-05, "loss": 0.0179, "step": 35410 }, { "epoch": 0.2618195795511664, "grad_norm": 0.11321929097175598, "learning_rate": 3.704594017094017e-05, "loss": 0.0192, "step": 35420 }, { "epoch": 0.2618934981224683, "grad_norm": 0.10271138697862625, "learning_rate": 3.704223053181386e-05, "loss": 0.0183, "step": 35430 }, { "epoch": 0.26196741669377016, "grad_norm": 0.08852849900722504, "learning_rate": 3.703852089268756e-05, "loss": 0.0188, "step": 35440 }, { "epoch": 0.262041335265072, "grad_norm": 0.10573693364858627, "learning_rate": 3.703481125356126e-05, "loss": 0.0194, "step": 35450 }, { "epoch": 0.26211525383637385, "grad_norm": 0.09053755551576614, "learning_rate": 3.703110161443495e-05, "loss": 0.0186, "step": 35460 }, { "epoch": 0.2621891724076757, "grad_norm": 0.09393644332885742, "learning_rate": 3.7027391975308646e-05, "loss": 0.0188, "step": 35470 }, { "epoch": 0.26226309097897754, "grad_norm": 0.0807633176445961, "learning_rate": 3.7023682336182335e-05, "loss": 0.0214, "step": 35480 }, { "epoch": 0.2623370095502794, "grad_norm": 0.0962679386138916, "learning_rate": 3.701997269705603e-05, "loss": 0.019, "step": 35490 }, { "epoch": 0.2624109281215813, "grad_norm": 0.08685987442731857, "learning_rate": 3.701626305792973e-05, "loss": 0.0189, "step": 35500 }, { "epoch": 0.26248484669288313, "grad_norm": 0.08247281610965729, "learning_rate": 3.7012553418803416e-05, "loss": 0.0174, "step": 35510 }, { "epoch": 0.262558765264185, "grad_norm": 0.09114781022071838, "learning_rate": 3.700884377967712e-05, "loss": 0.0195, "step": 35520 }, { "epoch": 0.2626326838354868, "grad_norm": 0.09900009632110596, "learning_rate": 3.700513414055081e-05, "loss": 0.0218, "step": 35530 }, { "epoch": 0.26270660240678867, "grad_norm": 0.08584482222795486, "learning_rate": 3.7001424501424504e-05, "loss": 0.0194, "step": 35540 }, { "epoch": 0.2627805209780905, "grad_norm": 0.1007232666015625, "learning_rate": 3.69977148622982e-05, "loss": 0.0177, "step": 35550 }, { "epoch": 0.2628544395493924, "grad_norm": 0.08348452299833298, "learning_rate": 3.699400522317189e-05, "loss": 0.0199, "step": 35560 }, { "epoch": 0.26292835812069426, "grad_norm": 0.08889731019735336, "learning_rate": 3.6990295584045585e-05, "loss": 0.0191, "step": 35570 }, { "epoch": 0.2630022766919961, "grad_norm": 0.07925963401794434, "learning_rate": 3.6986585944919275e-05, "loss": 0.0189, "step": 35580 }, { "epoch": 0.26307619526329795, "grad_norm": 0.06700103729963303, "learning_rate": 3.698287630579297e-05, "loss": 0.0235, "step": 35590 }, { "epoch": 0.2631501138345998, "grad_norm": 0.09109890460968018, "learning_rate": 3.697916666666667e-05, "loss": 0.0164, "step": 35600 }, { "epoch": 0.26322403240590164, "grad_norm": 0.07469119131565094, "learning_rate": 3.697545702754036e-05, "loss": 0.0208, "step": 35610 }, { "epoch": 0.2632979509772035, "grad_norm": 0.09684962779283524, "learning_rate": 3.697174738841406e-05, "loss": 0.0178, "step": 35620 }, { "epoch": 0.2633718695485054, "grad_norm": 0.09081660211086273, "learning_rate": 3.696803774928775e-05, "loss": 0.0171, "step": 35630 }, { "epoch": 0.26344578811980723, "grad_norm": 0.07729621231555939, "learning_rate": 3.6964328110161444e-05, "loss": 0.0171, "step": 35640 }, { "epoch": 0.2635197066911091, "grad_norm": 0.07513635605573654, "learning_rate": 3.696061847103514e-05, "loss": 0.0215, "step": 35650 }, { "epoch": 0.2635936252624109, "grad_norm": 0.07328686863183975, "learning_rate": 3.695690883190883e-05, "loss": 0.0183, "step": 35660 }, { "epoch": 0.26366754383371277, "grad_norm": 0.08256453275680542, "learning_rate": 3.695319919278253e-05, "loss": 0.0171, "step": 35670 }, { "epoch": 0.2637414624050146, "grad_norm": 0.08420126140117645, "learning_rate": 3.694948955365623e-05, "loss": 0.02, "step": 35680 }, { "epoch": 0.2638153809763165, "grad_norm": 0.09419424086809158, "learning_rate": 3.694577991452992e-05, "loss": 0.0203, "step": 35690 }, { "epoch": 0.26388929954761836, "grad_norm": 0.07843416184186935, "learning_rate": 3.694207027540361e-05, "loss": 0.0207, "step": 35700 }, { "epoch": 0.2639632181189202, "grad_norm": 0.06925562769174576, "learning_rate": 3.69383606362773e-05, "loss": 0.0204, "step": 35710 }, { "epoch": 0.26403713669022205, "grad_norm": 0.06749237328767776, "learning_rate": 3.6934650997151e-05, "loss": 0.0186, "step": 35720 }, { "epoch": 0.2641110552615239, "grad_norm": 0.0696081593632698, "learning_rate": 3.6930941358024694e-05, "loss": 0.0192, "step": 35730 }, { "epoch": 0.26418497383282574, "grad_norm": 0.0760570541024208, "learning_rate": 3.692723171889838e-05, "loss": 0.0186, "step": 35740 }, { "epoch": 0.2642588924041276, "grad_norm": 0.10477142781019211, "learning_rate": 3.6923522079772086e-05, "loss": 0.0184, "step": 35750 }, { "epoch": 0.2643328109754295, "grad_norm": 0.103655606508255, "learning_rate": 3.6919812440645775e-05, "loss": 0.0204, "step": 35760 }, { "epoch": 0.26440672954673133, "grad_norm": 0.09836665540933609, "learning_rate": 3.691610280151947e-05, "loss": 0.0199, "step": 35770 }, { "epoch": 0.2644806481180332, "grad_norm": 0.07217530161142349, "learning_rate": 3.691239316239317e-05, "loss": 0.021, "step": 35780 }, { "epoch": 0.264554566689335, "grad_norm": 0.07730984687805176, "learning_rate": 3.6908683523266856e-05, "loss": 0.0208, "step": 35790 }, { "epoch": 0.26462848526063687, "grad_norm": 0.09929768741130829, "learning_rate": 3.690497388414055e-05, "loss": 0.019, "step": 35800 }, { "epoch": 0.2647024038319387, "grad_norm": 0.09074138849973679, "learning_rate": 3.690126424501424e-05, "loss": 0.0196, "step": 35810 }, { "epoch": 0.2647763224032406, "grad_norm": 0.10163474828004837, "learning_rate": 3.6897554605887944e-05, "loss": 0.0182, "step": 35820 }, { "epoch": 0.26485024097454246, "grad_norm": 0.08178673684597015, "learning_rate": 3.689384496676164e-05, "loss": 0.0172, "step": 35830 }, { "epoch": 0.2649241595458443, "grad_norm": 0.09935182332992554, "learning_rate": 3.689013532763533e-05, "loss": 0.019, "step": 35840 }, { "epoch": 0.26499807811714615, "grad_norm": 0.08761539310216904, "learning_rate": 3.6886425688509025e-05, "loss": 0.0227, "step": 35850 }, { "epoch": 0.265071996688448, "grad_norm": 0.0659652128815651, "learning_rate": 3.6882716049382714e-05, "loss": 0.02, "step": 35860 }, { "epoch": 0.26514591525974984, "grad_norm": 0.073598213493824, "learning_rate": 3.687900641025641e-05, "loss": 0.0177, "step": 35870 }, { "epoch": 0.2652198338310517, "grad_norm": 0.104554682970047, "learning_rate": 3.6875296771130106e-05, "loss": 0.0196, "step": 35880 }, { "epoch": 0.2652937524023536, "grad_norm": 0.06133726239204407, "learning_rate": 3.6871587132003795e-05, "loss": 0.0168, "step": 35890 }, { "epoch": 0.26536767097365543, "grad_norm": 0.10068414360284805, "learning_rate": 3.68678774928775e-05, "loss": 0.0225, "step": 35900 }, { "epoch": 0.2654415895449573, "grad_norm": 0.08246283233165741, "learning_rate": 3.6864167853751194e-05, "loss": 0.018, "step": 35910 }, { "epoch": 0.2655155081162591, "grad_norm": 0.06949212402105331, "learning_rate": 3.686045821462488e-05, "loss": 0.0182, "step": 35920 }, { "epoch": 0.26558942668756097, "grad_norm": 0.09852777421474457, "learning_rate": 3.685674857549858e-05, "loss": 0.0187, "step": 35930 }, { "epoch": 0.2656633452588628, "grad_norm": 0.0703735500574112, "learning_rate": 3.685303893637227e-05, "loss": 0.02, "step": 35940 }, { "epoch": 0.2657372638301647, "grad_norm": 0.11955846846103668, "learning_rate": 3.6849329297245964e-05, "loss": 0.0206, "step": 35950 }, { "epoch": 0.26581118240146656, "grad_norm": 0.08075056225061417, "learning_rate": 3.684561965811966e-05, "loss": 0.0187, "step": 35960 }, { "epoch": 0.2658851009727684, "grad_norm": 0.08222194015979767, "learning_rate": 3.6841910018993356e-05, "loss": 0.0175, "step": 35970 }, { "epoch": 0.26595901954407025, "grad_norm": 0.10614724457263947, "learning_rate": 3.683820037986705e-05, "loss": 0.0207, "step": 35980 }, { "epoch": 0.2660329381153721, "grad_norm": 0.09781161695718765, "learning_rate": 3.683449074074074e-05, "loss": 0.0203, "step": 35990 }, { "epoch": 0.26610685668667394, "grad_norm": 0.08541706204414368, "learning_rate": 3.683078110161444e-05, "loss": 0.0184, "step": 36000 }, { "epoch": 0.2661807752579758, "grad_norm": 0.09494499117136002, "learning_rate": 3.6827071462488133e-05, "loss": 0.0229, "step": 36010 }, { "epoch": 0.2662546938292777, "grad_norm": 0.08918741345405579, "learning_rate": 3.682336182336182e-05, "loss": 0.0221, "step": 36020 }, { "epoch": 0.26632861240057953, "grad_norm": 0.07742021977901459, "learning_rate": 3.681965218423552e-05, "loss": 0.0208, "step": 36030 }, { "epoch": 0.2664025309718814, "grad_norm": 0.09315332770347595, "learning_rate": 3.681594254510921e-05, "loss": 0.0179, "step": 36040 }, { "epoch": 0.2664764495431832, "grad_norm": 0.10973034054040909, "learning_rate": 3.681223290598291e-05, "loss": 0.0163, "step": 36050 }, { "epoch": 0.26655036811448507, "grad_norm": 0.157131165266037, "learning_rate": 3.6808523266856606e-05, "loss": 0.0193, "step": 36060 }, { "epoch": 0.2666242866857869, "grad_norm": 0.11281372606754303, "learning_rate": 3.6804813627730296e-05, "loss": 0.0201, "step": 36070 }, { "epoch": 0.2666982052570888, "grad_norm": 0.08388475328683853, "learning_rate": 3.680110398860399e-05, "loss": 0.0187, "step": 36080 }, { "epoch": 0.26677212382839066, "grad_norm": 0.11188846081495285, "learning_rate": 3.679739434947768e-05, "loss": 0.0178, "step": 36090 }, { "epoch": 0.2668460423996925, "grad_norm": 0.07050701230764389, "learning_rate": 3.679368471035138e-05, "loss": 0.0155, "step": 36100 }, { "epoch": 0.26691996097099435, "grad_norm": 0.09294600039720535, "learning_rate": 3.678997507122507e-05, "loss": 0.0162, "step": 36110 }, { "epoch": 0.2669938795422962, "grad_norm": 0.1007222980260849, "learning_rate": 3.678626543209877e-05, "loss": 0.019, "step": 36120 }, { "epoch": 0.26706779811359804, "grad_norm": 0.06697934865951538, "learning_rate": 3.6782555792972465e-05, "loss": 0.0204, "step": 36130 }, { "epoch": 0.2671417166848999, "grad_norm": 0.07020814716815948, "learning_rate": 3.677884615384616e-05, "loss": 0.0203, "step": 36140 }, { "epoch": 0.2672156352562018, "grad_norm": 0.0640861839056015, "learning_rate": 3.677513651471985e-05, "loss": 0.0198, "step": 36150 }, { "epoch": 0.26728955382750363, "grad_norm": 0.08898360282182693, "learning_rate": 3.6771426875593546e-05, "loss": 0.0193, "step": 36160 }, { "epoch": 0.2673634723988055, "grad_norm": 0.06888226419687271, "learning_rate": 3.6767717236467235e-05, "loss": 0.0182, "step": 36170 }, { "epoch": 0.2674373909701073, "grad_norm": 0.09916161000728607, "learning_rate": 3.676400759734093e-05, "loss": 0.0194, "step": 36180 }, { "epoch": 0.26751130954140917, "grad_norm": 0.08564165979623795, "learning_rate": 3.676029795821463e-05, "loss": 0.0211, "step": 36190 }, { "epoch": 0.267585228112711, "grad_norm": 0.06861856579780579, "learning_rate": 3.675658831908832e-05, "loss": 0.0203, "step": 36200 }, { "epoch": 0.2676591466840129, "grad_norm": 0.06755343079566956, "learning_rate": 3.675287867996202e-05, "loss": 0.02, "step": 36210 }, { "epoch": 0.26773306525531476, "grad_norm": 0.0989122986793518, "learning_rate": 3.674916904083571e-05, "loss": 0.0172, "step": 36220 }, { "epoch": 0.2678069838266166, "grad_norm": 0.09048120677471161, "learning_rate": 3.6745459401709404e-05, "loss": 0.0199, "step": 36230 }, { "epoch": 0.26788090239791845, "grad_norm": 0.07613878697156906, "learning_rate": 3.67417497625831e-05, "loss": 0.0189, "step": 36240 }, { "epoch": 0.2679548209692203, "grad_norm": 0.08191327750682831, "learning_rate": 3.673804012345679e-05, "loss": 0.0185, "step": 36250 }, { "epoch": 0.26802873954052214, "grad_norm": 0.06279822438955307, "learning_rate": 3.6734330484330485e-05, "loss": 0.0169, "step": 36260 }, { "epoch": 0.268102658111824, "grad_norm": 0.07203217595815659, "learning_rate": 3.673062084520418e-05, "loss": 0.0191, "step": 36270 }, { "epoch": 0.2681765766831259, "grad_norm": 0.10490775853395462, "learning_rate": 3.672691120607788e-05, "loss": 0.0186, "step": 36280 }, { "epoch": 0.26825049525442773, "grad_norm": 0.1026383489370346, "learning_rate": 3.672320156695157e-05, "loss": 0.0197, "step": 36290 }, { "epoch": 0.2683244138257296, "grad_norm": 0.07934411615133286, "learning_rate": 3.671949192782526e-05, "loss": 0.0179, "step": 36300 }, { "epoch": 0.2683983323970314, "grad_norm": 0.07977404445409775, "learning_rate": 3.671578228869896e-05, "loss": 0.0181, "step": 36310 }, { "epoch": 0.26847225096833327, "grad_norm": 0.10161271691322327, "learning_rate": 3.671207264957265e-05, "loss": 0.0204, "step": 36320 }, { "epoch": 0.2685461695396351, "grad_norm": 0.07428612560033798, "learning_rate": 3.670836301044634e-05, "loss": 0.0162, "step": 36330 }, { "epoch": 0.268620088110937, "grad_norm": 0.05228397622704506, "learning_rate": 3.670465337132004e-05, "loss": 0.0192, "step": 36340 }, { "epoch": 0.26869400668223886, "grad_norm": 0.0897165909409523, "learning_rate": 3.6700943732193735e-05, "loss": 0.0188, "step": 36350 }, { "epoch": 0.2687679252535407, "grad_norm": 0.09100687503814697, "learning_rate": 3.669723409306743e-05, "loss": 0.0192, "step": 36360 }, { "epoch": 0.26884184382484255, "grad_norm": 0.12539775669574738, "learning_rate": 3.669352445394113e-05, "loss": 0.0173, "step": 36370 }, { "epoch": 0.2689157623961444, "grad_norm": 0.10039008408784866, "learning_rate": 3.6689814814814816e-05, "loss": 0.0211, "step": 36380 }, { "epoch": 0.26898968096744624, "grad_norm": 0.09370112419128418, "learning_rate": 3.668610517568851e-05, "loss": 0.0172, "step": 36390 }, { "epoch": 0.2690635995387481, "grad_norm": 0.11496493220329285, "learning_rate": 3.66823955365622e-05, "loss": 0.0168, "step": 36400 }, { "epoch": 0.26913751811005, "grad_norm": 0.07878755778074265, "learning_rate": 3.66786858974359e-05, "loss": 0.0183, "step": 36410 }, { "epoch": 0.26921143668135183, "grad_norm": 0.09112108498811722, "learning_rate": 3.6674976258309594e-05, "loss": 0.0192, "step": 36420 }, { "epoch": 0.2692853552526537, "grad_norm": 0.10041724890470505, "learning_rate": 3.667126661918329e-05, "loss": 0.0195, "step": 36430 }, { "epoch": 0.2693592738239555, "grad_norm": 0.10924817621707916, "learning_rate": 3.6667556980056985e-05, "loss": 0.0206, "step": 36440 }, { "epoch": 0.26943319239525737, "grad_norm": 0.09754710644483566, "learning_rate": 3.6663847340930675e-05, "loss": 0.0183, "step": 36450 }, { "epoch": 0.2695071109665592, "grad_norm": 0.09620609879493713, "learning_rate": 3.666013770180437e-05, "loss": 0.0193, "step": 36460 }, { "epoch": 0.2695810295378611, "grad_norm": 0.09440019726753235, "learning_rate": 3.6656428062678067e-05, "loss": 0.0175, "step": 36470 }, { "epoch": 0.26965494810916296, "grad_norm": 0.07512184977531433, "learning_rate": 3.6652718423551756e-05, "loss": 0.0198, "step": 36480 }, { "epoch": 0.2697288666804648, "grad_norm": 0.08837522566318512, "learning_rate": 3.664900878442545e-05, "loss": 0.0201, "step": 36490 }, { "epoch": 0.26980278525176665, "grad_norm": 0.16763469576835632, "learning_rate": 3.664529914529915e-05, "loss": 0.0219, "step": 36500 }, { "epoch": 0.2698767038230685, "grad_norm": 0.1020568385720253, "learning_rate": 3.6641589506172844e-05, "loss": 0.0194, "step": 36510 }, { "epoch": 0.26995062239437034, "grad_norm": 0.11948026716709137, "learning_rate": 3.663787986704654e-05, "loss": 0.0189, "step": 36520 }, { "epoch": 0.2700245409656722, "grad_norm": 0.07943316549062729, "learning_rate": 3.663417022792023e-05, "loss": 0.0203, "step": 36530 }, { "epoch": 0.2700984595369741, "grad_norm": 0.2049197405576706, "learning_rate": 3.6630460588793925e-05, "loss": 0.0192, "step": 36540 }, { "epoch": 0.27017237810827593, "grad_norm": 0.09560728073120117, "learning_rate": 3.6626750949667614e-05, "loss": 0.0193, "step": 36550 }, { "epoch": 0.2702462966795778, "grad_norm": 0.09262557327747345, "learning_rate": 3.662304131054131e-05, "loss": 0.02, "step": 36560 }, { "epoch": 0.2703202152508796, "grad_norm": 0.13921691477298737, "learning_rate": 3.6619331671415006e-05, "loss": 0.0182, "step": 36570 }, { "epoch": 0.27039413382218147, "grad_norm": 0.06821108609437943, "learning_rate": 3.66156220322887e-05, "loss": 0.0196, "step": 36580 }, { "epoch": 0.2704680523934833, "grad_norm": 0.08378107100725174, "learning_rate": 3.66119123931624e-05, "loss": 0.0197, "step": 36590 }, { "epoch": 0.2705419709647852, "grad_norm": 0.09226445853710175, "learning_rate": 3.6608202754036094e-05, "loss": 0.0187, "step": 36600 }, { "epoch": 0.27061588953608706, "grad_norm": 0.07749415934085846, "learning_rate": 3.660449311490978e-05, "loss": 0.0191, "step": 36610 }, { "epoch": 0.2706898081073889, "grad_norm": 0.06596098095178604, "learning_rate": 3.660078347578348e-05, "loss": 0.0179, "step": 36620 }, { "epoch": 0.27076372667869075, "grad_norm": 0.12515129148960114, "learning_rate": 3.659707383665717e-05, "loss": 0.0242, "step": 36630 }, { "epoch": 0.2708376452499926, "grad_norm": 0.12110026925802231, "learning_rate": 3.6593364197530864e-05, "loss": 0.0204, "step": 36640 }, { "epoch": 0.27091156382129444, "grad_norm": 0.0709238350391388, "learning_rate": 3.658965455840456e-05, "loss": 0.0197, "step": 36650 }, { "epoch": 0.2709854823925963, "grad_norm": 0.07414538413286209, "learning_rate": 3.6585944919278256e-05, "loss": 0.0213, "step": 36660 }, { "epoch": 0.2710594009638982, "grad_norm": 0.07066690921783447, "learning_rate": 3.658223528015195e-05, "loss": 0.0189, "step": 36670 }, { "epoch": 0.27113331953520003, "grad_norm": 0.08719604462385178, "learning_rate": 3.657852564102564e-05, "loss": 0.0179, "step": 36680 }, { "epoch": 0.2712072381065019, "grad_norm": 0.08157003670930862, "learning_rate": 3.657481600189934e-05, "loss": 0.017, "step": 36690 }, { "epoch": 0.2712811566778037, "grad_norm": 0.07565820217132568, "learning_rate": 3.657110636277303e-05, "loss": 0.0172, "step": 36700 }, { "epoch": 0.27135507524910557, "grad_norm": 0.13831768929958344, "learning_rate": 3.656739672364672e-05, "loss": 0.0219, "step": 36710 }, { "epoch": 0.2714289938204074, "grad_norm": 0.09268336743116379, "learning_rate": 3.656368708452042e-05, "loss": 0.0169, "step": 36720 }, { "epoch": 0.2715029123917093, "grad_norm": 0.08384677022695541, "learning_rate": 3.6559977445394114e-05, "loss": 0.0228, "step": 36730 }, { "epoch": 0.27157683096301116, "grad_norm": 0.09165684878826141, "learning_rate": 3.655626780626781e-05, "loss": 0.0208, "step": 36740 }, { "epoch": 0.271650749534313, "grad_norm": 0.07229489833116531, "learning_rate": 3.6552558167141506e-05, "loss": 0.0211, "step": 36750 }, { "epoch": 0.27172466810561485, "grad_norm": 0.08151846379041672, "learning_rate": 3.6548848528015195e-05, "loss": 0.0168, "step": 36760 }, { "epoch": 0.2717985866769167, "grad_norm": 0.0799284502863884, "learning_rate": 3.654513888888889e-05, "loss": 0.0174, "step": 36770 }, { "epoch": 0.27187250524821854, "grad_norm": 0.11195094883441925, "learning_rate": 3.654142924976258e-05, "loss": 0.018, "step": 36780 }, { "epoch": 0.2719464238195204, "grad_norm": 0.06137610971927643, "learning_rate": 3.6537719610636277e-05, "loss": 0.0194, "step": 36790 }, { "epoch": 0.2720203423908223, "grad_norm": 0.10252392292022705, "learning_rate": 3.653400997150997e-05, "loss": 0.0169, "step": 36800 }, { "epoch": 0.27209426096212413, "grad_norm": 0.0966234877705574, "learning_rate": 3.653030033238367e-05, "loss": 0.0178, "step": 36810 }, { "epoch": 0.272168179533426, "grad_norm": 0.06209605187177658, "learning_rate": 3.6526590693257364e-05, "loss": 0.0201, "step": 36820 }, { "epoch": 0.2722420981047278, "grad_norm": 0.0901181623339653, "learning_rate": 3.652288105413106e-05, "loss": 0.0178, "step": 36830 }, { "epoch": 0.27231601667602967, "grad_norm": 0.11750251799821854, "learning_rate": 3.651917141500475e-05, "loss": 0.0179, "step": 36840 }, { "epoch": 0.2723899352473315, "grad_norm": 0.10816629230976105, "learning_rate": 3.6515461775878446e-05, "loss": 0.022, "step": 36850 }, { "epoch": 0.2724638538186334, "grad_norm": 0.06723657250404358, "learning_rate": 3.6511752136752135e-05, "loss": 0.0176, "step": 36860 }, { "epoch": 0.27253777238993526, "grad_norm": 0.10735520720481873, "learning_rate": 3.650804249762583e-05, "loss": 0.0199, "step": 36870 }, { "epoch": 0.2726116909612371, "grad_norm": 0.0668816789984703, "learning_rate": 3.650433285849953e-05, "loss": 0.0207, "step": 36880 }, { "epoch": 0.27268560953253895, "grad_norm": 0.07750372588634491, "learning_rate": 3.650062321937322e-05, "loss": 0.0185, "step": 36890 }, { "epoch": 0.2727595281038408, "grad_norm": 0.08497249335050583, "learning_rate": 3.649691358024692e-05, "loss": 0.0189, "step": 36900 }, { "epoch": 0.27283344667514264, "grad_norm": 0.08565185219049454, "learning_rate": 3.649320394112061e-05, "loss": 0.0197, "step": 36910 }, { "epoch": 0.27290736524644454, "grad_norm": 0.07891131937503815, "learning_rate": 3.6489494301994304e-05, "loss": 0.0182, "step": 36920 }, { "epoch": 0.2729812838177464, "grad_norm": 0.07422411441802979, "learning_rate": 3.6485784662868e-05, "loss": 0.0177, "step": 36930 }, { "epoch": 0.27305520238904823, "grad_norm": 0.11000849306583405, "learning_rate": 3.648207502374169e-05, "loss": 0.0187, "step": 36940 }, { "epoch": 0.2731291209603501, "grad_norm": 0.08401288837194443, "learning_rate": 3.6478365384615385e-05, "loss": 0.0206, "step": 36950 }, { "epoch": 0.2732030395316519, "grad_norm": 0.12165091186761856, "learning_rate": 3.647465574548908e-05, "loss": 0.0198, "step": 36960 }, { "epoch": 0.27327695810295377, "grad_norm": 0.10981786251068115, "learning_rate": 3.647094610636278e-05, "loss": 0.0195, "step": 36970 }, { "epoch": 0.2733508766742556, "grad_norm": 0.09434209018945694, "learning_rate": 3.646723646723647e-05, "loss": 0.0182, "step": 36980 }, { "epoch": 0.2734247952455575, "grad_norm": 0.17333875596523285, "learning_rate": 3.646352682811016e-05, "loss": 0.0197, "step": 36990 }, { "epoch": 0.27349871381685936, "grad_norm": 0.09001073986291885, "learning_rate": 3.645981718898386e-05, "loss": 0.0201, "step": 37000 }, { "epoch": 0.2735726323881612, "grad_norm": 0.08942365646362305, "learning_rate": 3.645610754985755e-05, "loss": 0.0236, "step": 37010 }, { "epoch": 0.27364655095946305, "grad_norm": 0.08353909850120544, "learning_rate": 3.645239791073124e-05, "loss": 0.0175, "step": 37020 }, { "epoch": 0.2737204695307649, "grad_norm": 0.08675676584243774, "learning_rate": 3.644868827160494e-05, "loss": 0.0167, "step": 37030 }, { "epoch": 0.27379438810206674, "grad_norm": 0.12086187303066254, "learning_rate": 3.6444978632478635e-05, "loss": 0.022, "step": 37040 }, { "epoch": 0.27386830667336864, "grad_norm": 0.11000093817710876, "learning_rate": 3.644126899335233e-05, "loss": 0.022, "step": 37050 }, { "epoch": 0.2739422252446705, "grad_norm": 0.16909393668174744, "learning_rate": 3.643755935422603e-05, "loss": 0.0188, "step": 37060 }, { "epoch": 0.27401614381597234, "grad_norm": 0.07956766337156296, "learning_rate": 3.6433849715099716e-05, "loss": 0.0206, "step": 37070 }, { "epoch": 0.2740900623872742, "grad_norm": 0.10060791671276093, "learning_rate": 3.643014007597341e-05, "loss": 0.0184, "step": 37080 }, { "epoch": 0.274163980958576, "grad_norm": 0.10526014119386673, "learning_rate": 3.64264304368471e-05, "loss": 0.0192, "step": 37090 }, { "epoch": 0.27423789952987787, "grad_norm": 0.080055370926857, "learning_rate": 3.64227207977208e-05, "loss": 0.0191, "step": 37100 }, { "epoch": 0.2743118181011797, "grad_norm": 0.12234311550855637, "learning_rate": 3.641901115859449e-05, "loss": 0.0184, "step": 37110 }, { "epoch": 0.2743857366724816, "grad_norm": 0.08261439949274063, "learning_rate": 3.641530151946819e-05, "loss": 0.0171, "step": 37120 }, { "epoch": 0.27445965524378346, "grad_norm": 0.07762432843446732, "learning_rate": 3.6411591880341885e-05, "loss": 0.0177, "step": 37130 }, { "epoch": 0.2745335738150853, "grad_norm": 0.1242787316441536, "learning_rate": 3.6407882241215574e-05, "loss": 0.0215, "step": 37140 }, { "epoch": 0.27460749238638715, "grad_norm": 0.1280965805053711, "learning_rate": 3.640417260208927e-05, "loss": 0.0171, "step": 37150 }, { "epoch": 0.274681410957689, "grad_norm": 0.058584120124578476, "learning_rate": 3.6400462962962966e-05, "loss": 0.0214, "step": 37160 }, { "epoch": 0.27475532952899084, "grad_norm": 0.07477357238531113, "learning_rate": 3.6396753323836656e-05, "loss": 0.0196, "step": 37170 }, { "epoch": 0.27482924810029274, "grad_norm": 0.10592483729124069, "learning_rate": 3.639304368471035e-05, "loss": 0.0194, "step": 37180 }, { "epoch": 0.2749031666715946, "grad_norm": 0.09464053064584732, "learning_rate": 3.638933404558405e-05, "loss": 0.0217, "step": 37190 }, { "epoch": 0.27497708524289644, "grad_norm": 0.09078387916088104, "learning_rate": 3.6385624406457743e-05, "loss": 0.02, "step": 37200 }, { "epoch": 0.2750510038141983, "grad_norm": 0.1125599816441536, "learning_rate": 3.638191476733144e-05, "loss": 0.0182, "step": 37210 }, { "epoch": 0.2751249223855001, "grad_norm": 0.08146911859512329, "learning_rate": 3.637820512820513e-05, "loss": 0.0216, "step": 37220 }, { "epoch": 0.27519884095680197, "grad_norm": 0.07568617165088654, "learning_rate": 3.6374495489078825e-05, "loss": 0.0182, "step": 37230 }, { "epoch": 0.2752727595281038, "grad_norm": 0.08872109651565552, "learning_rate": 3.6370785849952514e-05, "loss": 0.0184, "step": 37240 }, { "epoch": 0.2753466780994057, "grad_norm": 0.07912348955869675, "learning_rate": 3.636707621082621e-05, "loss": 0.0188, "step": 37250 }, { "epoch": 0.27542059667070756, "grad_norm": 0.09789282828569412, "learning_rate": 3.6363366571699906e-05, "loss": 0.0206, "step": 37260 }, { "epoch": 0.2754945152420094, "grad_norm": 0.07566897571086884, "learning_rate": 3.63596569325736e-05, "loss": 0.0213, "step": 37270 }, { "epoch": 0.27556843381331125, "grad_norm": 0.09172476083040237, "learning_rate": 3.63559472934473e-05, "loss": 0.0178, "step": 37280 }, { "epoch": 0.2756423523846131, "grad_norm": 0.08475707471370697, "learning_rate": 3.6352237654320994e-05, "loss": 0.0189, "step": 37290 }, { "epoch": 0.27571627095591494, "grad_norm": 0.08507103472948074, "learning_rate": 3.634852801519468e-05, "loss": 0.0201, "step": 37300 }, { "epoch": 0.27579018952721684, "grad_norm": 0.10753806680440903, "learning_rate": 3.634481837606838e-05, "loss": 0.0186, "step": 37310 }, { "epoch": 0.2758641080985187, "grad_norm": 0.0725020319223404, "learning_rate": 3.634110873694207e-05, "loss": 0.0194, "step": 37320 }, { "epoch": 0.27593802666982054, "grad_norm": 0.0779128149151802, "learning_rate": 3.6337399097815764e-05, "loss": 0.0182, "step": 37330 }, { "epoch": 0.2760119452411224, "grad_norm": 0.07879126816987991, "learning_rate": 3.633368945868946e-05, "loss": 0.0192, "step": 37340 }, { "epoch": 0.2760858638124242, "grad_norm": 0.07782237976789474, "learning_rate": 3.6329979819563156e-05, "loss": 0.0184, "step": 37350 }, { "epoch": 0.27615978238372607, "grad_norm": 0.07219192385673523, "learning_rate": 3.632627018043685e-05, "loss": 0.019, "step": 37360 }, { "epoch": 0.2762337009550279, "grad_norm": 0.07820811122655869, "learning_rate": 3.632256054131054e-05, "loss": 0.0219, "step": 37370 }, { "epoch": 0.2763076195263298, "grad_norm": 0.08021660149097443, "learning_rate": 3.631885090218424e-05, "loss": 0.0175, "step": 37380 }, { "epoch": 0.27638153809763166, "grad_norm": 0.0879962146282196, "learning_rate": 3.631514126305793e-05, "loss": 0.0183, "step": 37390 }, { "epoch": 0.2764554566689335, "grad_norm": 0.09419143944978714, "learning_rate": 3.631143162393162e-05, "loss": 0.0177, "step": 37400 }, { "epoch": 0.27652937524023535, "grad_norm": 0.07984619587659836, "learning_rate": 3.630772198480532e-05, "loss": 0.0173, "step": 37410 }, { "epoch": 0.2766032938115372, "grad_norm": 0.09308286011219025, "learning_rate": 3.6304012345679014e-05, "loss": 0.0198, "step": 37420 }, { "epoch": 0.27667721238283904, "grad_norm": 0.09586650133132935, "learning_rate": 3.630030270655271e-05, "loss": 0.0163, "step": 37430 }, { "epoch": 0.27675113095414094, "grad_norm": 0.10913722962141037, "learning_rate": 3.6296593067426406e-05, "loss": 0.0216, "step": 37440 }, { "epoch": 0.2768250495254428, "grad_norm": 0.07150280475616455, "learning_rate": 3.6292883428300095e-05, "loss": 0.0197, "step": 37450 }, { "epoch": 0.27689896809674464, "grad_norm": 0.0947035625576973, "learning_rate": 3.628917378917379e-05, "loss": 0.0185, "step": 37460 }, { "epoch": 0.2769728866680465, "grad_norm": 0.07512058317661285, "learning_rate": 3.628546415004749e-05, "loss": 0.0193, "step": 37470 }, { "epoch": 0.2770468052393483, "grad_norm": 0.07108630985021591, "learning_rate": 3.6281754510921176e-05, "loss": 0.0188, "step": 37480 }, { "epoch": 0.27712072381065017, "grad_norm": 0.08340652287006378, "learning_rate": 3.627804487179487e-05, "loss": 0.0202, "step": 37490 }, { "epoch": 0.277194642381952, "grad_norm": 0.09074801206588745, "learning_rate": 3.627433523266857e-05, "loss": 0.0181, "step": 37500 }, { "epoch": 0.2772685609532539, "grad_norm": 0.09117832779884338, "learning_rate": 3.6270625593542264e-05, "loss": 0.018, "step": 37510 }, { "epoch": 0.27734247952455576, "grad_norm": 0.06303833425045013, "learning_rate": 3.626691595441596e-05, "loss": 0.019, "step": 37520 }, { "epoch": 0.2774163980958576, "grad_norm": 0.06354062259197235, "learning_rate": 3.626320631528965e-05, "loss": 0.0199, "step": 37530 }, { "epoch": 0.27749031666715945, "grad_norm": 0.07251658290624619, "learning_rate": 3.6259496676163345e-05, "loss": 0.0162, "step": 37540 }, { "epoch": 0.2775642352384613, "grad_norm": 0.09875177592039108, "learning_rate": 3.6255787037037035e-05, "loss": 0.0184, "step": 37550 }, { "epoch": 0.27763815380976314, "grad_norm": 0.09109731763601303, "learning_rate": 3.625207739791073e-05, "loss": 0.0171, "step": 37560 }, { "epoch": 0.27771207238106504, "grad_norm": 0.09304312616586685, "learning_rate": 3.6248367758784426e-05, "loss": 0.0169, "step": 37570 }, { "epoch": 0.2777859909523669, "grad_norm": 0.07635471224784851, "learning_rate": 3.624465811965812e-05, "loss": 0.0183, "step": 37580 }, { "epoch": 0.27785990952366874, "grad_norm": 0.10096585005521774, "learning_rate": 3.624094848053182e-05, "loss": 0.018, "step": 37590 }, { "epoch": 0.2779338280949706, "grad_norm": 0.09930157661437988, "learning_rate": 3.623723884140551e-05, "loss": 0.0194, "step": 37600 }, { "epoch": 0.2780077466662724, "grad_norm": 0.10406079143285751, "learning_rate": 3.6233529202279204e-05, "loss": 0.0177, "step": 37610 }, { "epoch": 0.27808166523757427, "grad_norm": 0.05847761034965515, "learning_rate": 3.62298195631529e-05, "loss": 0.0164, "step": 37620 }, { "epoch": 0.2781555838088761, "grad_norm": 0.07200673967599869, "learning_rate": 3.622610992402659e-05, "loss": 0.0189, "step": 37630 }, { "epoch": 0.278229502380178, "grad_norm": 0.0837259292602539, "learning_rate": 3.6222400284900285e-05, "loss": 0.0186, "step": 37640 }, { "epoch": 0.27830342095147986, "grad_norm": 0.07048111408948898, "learning_rate": 3.621869064577398e-05, "loss": 0.0176, "step": 37650 }, { "epoch": 0.2783773395227817, "grad_norm": 0.12101917713880539, "learning_rate": 3.6214981006647677e-05, "loss": 0.0199, "step": 37660 }, { "epoch": 0.27845125809408355, "grad_norm": 0.09494868665933609, "learning_rate": 3.621127136752137e-05, "loss": 0.0211, "step": 37670 }, { "epoch": 0.2785251766653854, "grad_norm": 0.10044016689062119, "learning_rate": 3.620756172839506e-05, "loss": 0.0208, "step": 37680 }, { "epoch": 0.27859909523668724, "grad_norm": 0.0923566222190857, "learning_rate": 3.620385208926876e-05, "loss": 0.0203, "step": 37690 }, { "epoch": 0.27867301380798915, "grad_norm": 0.08192206919193268, "learning_rate": 3.6200142450142454e-05, "loss": 0.0179, "step": 37700 }, { "epoch": 0.278746932379291, "grad_norm": 0.10374264419078827, "learning_rate": 3.619643281101614e-05, "loss": 0.0215, "step": 37710 }, { "epoch": 0.27882085095059284, "grad_norm": 0.07484060525894165, "learning_rate": 3.619272317188984e-05, "loss": 0.02, "step": 37720 }, { "epoch": 0.2788947695218947, "grad_norm": 0.09017164260149002, "learning_rate": 3.6189013532763535e-05, "loss": 0.0178, "step": 37730 }, { "epoch": 0.2789686880931965, "grad_norm": 0.08458402752876282, "learning_rate": 3.618530389363723e-05, "loss": 0.0195, "step": 37740 }, { "epoch": 0.27904260666449837, "grad_norm": 0.06826536357402802, "learning_rate": 3.618159425451093e-05, "loss": 0.0194, "step": 37750 }, { "epoch": 0.2791165252358002, "grad_norm": 0.08888979256153107, "learning_rate": 3.6177884615384616e-05, "loss": 0.0203, "step": 37760 }, { "epoch": 0.2791904438071021, "grad_norm": 0.11521486937999725, "learning_rate": 3.617417497625831e-05, "loss": 0.0218, "step": 37770 }, { "epoch": 0.27926436237840396, "grad_norm": 0.07831291854381561, "learning_rate": 3.6170465337132e-05, "loss": 0.0212, "step": 37780 }, { "epoch": 0.2793382809497058, "grad_norm": 0.10350217670202255, "learning_rate": 3.61667556980057e-05, "loss": 0.0196, "step": 37790 }, { "epoch": 0.27941219952100765, "grad_norm": 0.10121927410364151, "learning_rate": 3.616304605887939e-05, "loss": 0.0186, "step": 37800 }, { "epoch": 0.2794861180923095, "grad_norm": 0.08976984024047852, "learning_rate": 3.615933641975309e-05, "loss": 0.0199, "step": 37810 }, { "epoch": 0.27956003666361134, "grad_norm": 0.09638135880231857, "learning_rate": 3.6155626780626785e-05, "loss": 0.0204, "step": 37820 }, { "epoch": 0.27963395523491325, "grad_norm": 0.09168626368045807, "learning_rate": 3.6151917141500474e-05, "loss": 0.0197, "step": 37830 }, { "epoch": 0.2797078738062151, "grad_norm": 0.0909101665019989, "learning_rate": 3.614820750237417e-05, "loss": 0.0203, "step": 37840 }, { "epoch": 0.27978179237751694, "grad_norm": 0.10678108781576157, "learning_rate": 3.6144497863247866e-05, "loss": 0.0186, "step": 37850 }, { "epoch": 0.2798557109488188, "grad_norm": 0.09719112515449524, "learning_rate": 3.6140788224121555e-05, "loss": 0.0193, "step": 37860 }, { "epoch": 0.2799296295201206, "grad_norm": 0.11422860622406006, "learning_rate": 3.613707858499525e-05, "loss": 0.022, "step": 37870 }, { "epoch": 0.28000354809142247, "grad_norm": 0.0775851309299469, "learning_rate": 3.613336894586895e-05, "loss": 0.0205, "step": 37880 }, { "epoch": 0.2800774666627243, "grad_norm": 0.06096609681844711, "learning_rate": 3.612965930674264e-05, "loss": 0.0178, "step": 37890 }, { "epoch": 0.2801513852340262, "grad_norm": 0.07704003155231476, "learning_rate": 3.612594966761634e-05, "loss": 0.0174, "step": 37900 }, { "epoch": 0.28022530380532806, "grad_norm": 0.08237231522798538, "learning_rate": 3.612224002849003e-05, "loss": 0.0185, "step": 37910 }, { "epoch": 0.2802992223766299, "grad_norm": 0.0777268186211586, "learning_rate": 3.6118530389363724e-05, "loss": 0.02, "step": 37920 }, { "epoch": 0.28037314094793175, "grad_norm": 0.08269333094358444, "learning_rate": 3.611482075023742e-05, "loss": 0.0211, "step": 37930 }, { "epoch": 0.2804470595192336, "grad_norm": 0.08607961237430573, "learning_rate": 3.611111111111111e-05, "loss": 0.0173, "step": 37940 }, { "epoch": 0.28052097809053544, "grad_norm": 0.09240085631608963, "learning_rate": 3.6107401471984805e-05, "loss": 0.0189, "step": 37950 }, { "epoch": 0.28059489666183735, "grad_norm": 0.10778304189443588, "learning_rate": 3.61036918328585e-05, "loss": 0.0193, "step": 37960 }, { "epoch": 0.2806688152331392, "grad_norm": 0.07810915261507034, "learning_rate": 3.60999821937322e-05, "loss": 0.0186, "step": 37970 }, { "epoch": 0.28074273380444104, "grad_norm": 0.07399895042181015, "learning_rate": 3.609627255460589e-05, "loss": 0.0181, "step": 37980 }, { "epoch": 0.2808166523757429, "grad_norm": 0.0926373153924942, "learning_rate": 3.609256291547958e-05, "loss": 0.019, "step": 37990 }, { "epoch": 0.2808905709470447, "grad_norm": 0.05467765033245087, "learning_rate": 3.608885327635328e-05, "loss": 0.021, "step": 38000 }, { "epoch": 0.28096448951834657, "grad_norm": 0.09927170723676682, "learning_rate": 3.608514363722697e-05, "loss": 0.0197, "step": 38010 }, { "epoch": 0.2810384080896484, "grad_norm": 0.09716068208217621, "learning_rate": 3.6081433998100664e-05, "loss": 0.0188, "step": 38020 }, { "epoch": 0.2811123266609503, "grad_norm": 0.096413254737854, "learning_rate": 3.607772435897436e-05, "loss": 0.0194, "step": 38030 }, { "epoch": 0.28118624523225216, "grad_norm": 0.08352841436862946, "learning_rate": 3.6074014719848056e-05, "loss": 0.0194, "step": 38040 }, { "epoch": 0.281260163803554, "grad_norm": 0.07367278635501862, "learning_rate": 3.607030508072175e-05, "loss": 0.018, "step": 38050 }, { "epoch": 0.28133408237485585, "grad_norm": 0.09048157185316086, "learning_rate": 3.606659544159544e-05, "loss": 0.0173, "step": 38060 }, { "epoch": 0.2814080009461577, "grad_norm": 0.07373079657554626, "learning_rate": 3.606288580246914e-05, "loss": 0.0201, "step": 38070 }, { "epoch": 0.28148191951745954, "grad_norm": 0.07853297889232635, "learning_rate": 3.605917616334283e-05, "loss": 0.018, "step": 38080 }, { "epoch": 0.28155583808876145, "grad_norm": 0.10812287032604218, "learning_rate": 3.605546652421652e-05, "loss": 0.017, "step": 38090 }, { "epoch": 0.2816297566600633, "grad_norm": 0.08013275265693665, "learning_rate": 3.605175688509022e-05, "loss": 0.0178, "step": 38100 }, { "epoch": 0.28170367523136514, "grad_norm": 0.08659382909536362, "learning_rate": 3.6048047245963914e-05, "loss": 0.0186, "step": 38110 }, { "epoch": 0.281777593802667, "grad_norm": 0.08591907471418381, "learning_rate": 3.604433760683761e-05, "loss": 0.02, "step": 38120 }, { "epoch": 0.2818515123739688, "grad_norm": 0.07610286772251129, "learning_rate": 3.6040627967711306e-05, "loss": 0.0153, "step": 38130 }, { "epoch": 0.28192543094527067, "grad_norm": 0.07261490821838379, "learning_rate": 3.6036918328584995e-05, "loss": 0.019, "step": 38140 }, { "epoch": 0.2819993495165725, "grad_norm": 0.10294881463050842, "learning_rate": 3.603320868945869e-05, "loss": 0.0183, "step": 38150 }, { "epoch": 0.2820732680878744, "grad_norm": 0.09938614070415497, "learning_rate": 3.602949905033239e-05, "loss": 0.0205, "step": 38160 }, { "epoch": 0.28214718665917626, "grad_norm": 0.07362860441207886, "learning_rate": 3.6025789411206076e-05, "loss": 0.021, "step": 38170 }, { "epoch": 0.2822211052304781, "grad_norm": 0.056989945471286774, "learning_rate": 3.602207977207977e-05, "loss": 0.0196, "step": 38180 }, { "epoch": 0.28229502380177995, "grad_norm": 0.18107661604881287, "learning_rate": 3.601837013295347e-05, "loss": 0.0174, "step": 38190 }, { "epoch": 0.2823689423730818, "grad_norm": 0.08354820311069489, "learning_rate": 3.6014660493827164e-05, "loss": 0.0192, "step": 38200 }, { "epoch": 0.28244286094438364, "grad_norm": 0.10664748400449753, "learning_rate": 3.601095085470086e-05, "loss": 0.0208, "step": 38210 }, { "epoch": 0.28251677951568555, "grad_norm": 0.08411481976509094, "learning_rate": 3.600724121557455e-05, "loss": 0.0181, "step": 38220 }, { "epoch": 0.2825906980869874, "grad_norm": 0.0959780141711235, "learning_rate": 3.6003531576448245e-05, "loss": 0.0183, "step": 38230 }, { "epoch": 0.28266461665828924, "grad_norm": 0.05065147951245308, "learning_rate": 3.5999821937321934e-05, "loss": 0.0201, "step": 38240 }, { "epoch": 0.2827385352295911, "grad_norm": 0.07555904239416122, "learning_rate": 3.599611229819563e-05, "loss": 0.0182, "step": 38250 }, { "epoch": 0.2828124538008929, "grad_norm": 0.08089818060398102, "learning_rate": 3.599240265906933e-05, "loss": 0.02, "step": 38260 }, { "epoch": 0.28288637237219477, "grad_norm": 0.09162278473377228, "learning_rate": 3.598869301994302e-05, "loss": 0.0191, "step": 38270 }, { "epoch": 0.2829602909434966, "grad_norm": 0.08376139402389526, "learning_rate": 3.598498338081672e-05, "loss": 0.0163, "step": 38280 }, { "epoch": 0.2830342095147985, "grad_norm": 0.10960046201944351, "learning_rate": 3.598127374169041e-05, "loss": 0.0216, "step": 38290 }, { "epoch": 0.28310812808610036, "grad_norm": 0.08518026024103165, "learning_rate": 3.59775641025641e-05, "loss": 0.0205, "step": 38300 }, { "epoch": 0.2831820466574022, "grad_norm": 0.08602633327245712, "learning_rate": 3.59738544634378e-05, "loss": 0.0192, "step": 38310 }, { "epoch": 0.28325596522870405, "grad_norm": 0.091323621571064, "learning_rate": 3.597014482431149e-05, "loss": 0.0168, "step": 38320 }, { "epoch": 0.2833298838000059, "grad_norm": 0.11848784238100052, "learning_rate": 3.5966435185185184e-05, "loss": 0.0181, "step": 38330 }, { "epoch": 0.28340380237130774, "grad_norm": 0.09957283735275269, "learning_rate": 3.596272554605888e-05, "loss": 0.0183, "step": 38340 }, { "epoch": 0.28347772094260965, "grad_norm": 0.07813524454832077, "learning_rate": 3.5959015906932576e-05, "loss": 0.0196, "step": 38350 }, { "epoch": 0.2835516395139115, "grad_norm": 0.11081357300281525, "learning_rate": 3.595530626780627e-05, "loss": 0.0178, "step": 38360 }, { "epoch": 0.28362555808521334, "grad_norm": 0.08587471395730972, "learning_rate": 3.595159662867996e-05, "loss": 0.0214, "step": 38370 }, { "epoch": 0.2836994766565152, "grad_norm": 0.07502496242523193, "learning_rate": 3.594788698955366e-05, "loss": 0.0188, "step": 38380 }, { "epoch": 0.283773395227817, "grad_norm": 0.0899985060095787, "learning_rate": 3.5944177350427353e-05, "loss": 0.0185, "step": 38390 }, { "epoch": 0.28384731379911887, "grad_norm": 0.08980336785316467, "learning_rate": 3.594046771130104e-05, "loss": 0.0179, "step": 38400 }, { "epoch": 0.2839212323704207, "grad_norm": 0.10397088527679443, "learning_rate": 3.5936758072174745e-05, "loss": 0.0179, "step": 38410 }, { "epoch": 0.2839951509417226, "grad_norm": 0.13703933358192444, "learning_rate": 3.5933048433048435e-05, "loss": 0.0192, "step": 38420 }, { "epoch": 0.28406906951302446, "grad_norm": 0.07654924690723419, "learning_rate": 3.592933879392213e-05, "loss": 0.0186, "step": 38430 }, { "epoch": 0.2841429880843263, "grad_norm": 0.09625089168548584, "learning_rate": 3.5925629154795826e-05, "loss": 0.0202, "step": 38440 }, { "epoch": 0.28421690665562815, "grad_norm": 0.10750327259302139, "learning_rate": 3.5921919515669516e-05, "loss": 0.0191, "step": 38450 }, { "epoch": 0.28429082522693, "grad_norm": 0.08809314668178558, "learning_rate": 3.591820987654321e-05, "loss": 0.018, "step": 38460 }, { "epoch": 0.28436474379823184, "grad_norm": 0.09000466018915176, "learning_rate": 3.59145002374169e-05, "loss": 0.021, "step": 38470 }, { "epoch": 0.28443866236953375, "grad_norm": 0.11071033775806427, "learning_rate": 3.59107905982906e-05, "loss": 0.0194, "step": 38480 }, { "epoch": 0.2845125809408356, "grad_norm": 0.09824886918067932, "learning_rate": 3.59070809591643e-05, "loss": 0.0193, "step": 38490 }, { "epoch": 0.28458649951213744, "grad_norm": 0.15032939612865448, "learning_rate": 3.590337132003799e-05, "loss": 0.0181, "step": 38500 }, { "epoch": 0.2846604180834393, "grad_norm": 0.07095257192850113, "learning_rate": 3.5899661680911685e-05, "loss": 0.0154, "step": 38510 }, { "epoch": 0.2847343366547411, "grad_norm": 0.07965070009231567, "learning_rate": 3.5895952041785374e-05, "loss": 0.0197, "step": 38520 }, { "epoch": 0.284808255226043, "grad_norm": 0.09079215675592422, "learning_rate": 3.589224240265907e-05, "loss": 0.0179, "step": 38530 }, { "epoch": 0.2848821737973448, "grad_norm": 0.09377455711364746, "learning_rate": 3.5888532763532766e-05, "loss": 0.0217, "step": 38540 }, { "epoch": 0.2849560923686467, "grad_norm": 0.09389619529247284, "learning_rate": 3.5884823124406455e-05, "loss": 0.0196, "step": 38550 }, { "epoch": 0.28503001093994856, "grad_norm": 0.09551979601383209, "learning_rate": 3.588111348528016e-05, "loss": 0.0208, "step": 38560 }, { "epoch": 0.2851039295112504, "grad_norm": 0.07076023519039154, "learning_rate": 3.587740384615385e-05, "loss": 0.0184, "step": 38570 }, { "epoch": 0.28517784808255225, "grad_norm": 0.11054020375013351, "learning_rate": 3.587369420702754e-05, "loss": 0.0206, "step": 38580 }, { "epoch": 0.2852517666538541, "grad_norm": 0.07012484967708588, "learning_rate": 3.586998456790124e-05, "loss": 0.017, "step": 38590 }, { "epoch": 0.28532568522515595, "grad_norm": 0.06504105031490326, "learning_rate": 3.586627492877493e-05, "loss": 0.0154, "step": 38600 }, { "epoch": 0.28539960379645785, "grad_norm": 0.0863649994134903, "learning_rate": 3.5862565289648624e-05, "loss": 0.0178, "step": 38610 }, { "epoch": 0.2854735223677597, "grad_norm": 0.08585851639509201, "learning_rate": 3.585885565052232e-05, "loss": 0.0186, "step": 38620 }, { "epoch": 0.28554744093906154, "grad_norm": 0.09086504578590393, "learning_rate": 3.585514601139601e-05, "loss": 0.0173, "step": 38630 }, { "epoch": 0.2856213595103634, "grad_norm": 0.07614096999168396, "learning_rate": 3.585143637226971e-05, "loss": 0.0194, "step": 38640 }, { "epoch": 0.2856952780816652, "grad_norm": 0.07605312764644623, "learning_rate": 3.58477267331434e-05, "loss": 0.0199, "step": 38650 }, { "epoch": 0.2857691966529671, "grad_norm": 0.08855976164340973, "learning_rate": 3.58440170940171e-05, "loss": 0.0182, "step": 38660 }, { "epoch": 0.2858431152242689, "grad_norm": 0.0881510004401207, "learning_rate": 3.584030745489079e-05, "loss": 0.0214, "step": 38670 }, { "epoch": 0.2859170337955708, "grad_norm": 0.10402607917785645, "learning_rate": 3.583659781576448e-05, "loss": 0.0179, "step": 38680 }, { "epoch": 0.28599095236687266, "grad_norm": 0.07266613841056824, "learning_rate": 3.583288817663818e-05, "loss": 0.0204, "step": 38690 }, { "epoch": 0.2860648709381745, "grad_norm": 0.09164243936538696, "learning_rate": 3.582917853751187e-05, "loss": 0.0202, "step": 38700 }, { "epoch": 0.28613878950947635, "grad_norm": 0.0948050394654274, "learning_rate": 3.582546889838557e-05, "loss": 0.0193, "step": 38710 }, { "epoch": 0.2862127080807782, "grad_norm": 0.14507903158664703, "learning_rate": 3.5821759259259266e-05, "loss": 0.0188, "step": 38720 }, { "epoch": 0.28628662665208005, "grad_norm": 0.08098135888576508, "learning_rate": 3.5818049620132955e-05, "loss": 0.0196, "step": 38730 }, { "epoch": 0.28636054522338195, "grad_norm": 0.25409314036369324, "learning_rate": 3.581433998100665e-05, "loss": 0.018, "step": 38740 }, { "epoch": 0.2864344637946838, "grad_norm": 0.07396399229764938, "learning_rate": 3.581063034188034e-05, "loss": 0.0176, "step": 38750 }, { "epoch": 0.28650838236598564, "grad_norm": 0.07812829315662384, "learning_rate": 3.5806920702754036e-05, "loss": 0.0201, "step": 38760 }, { "epoch": 0.2865823009372875, "grad_norm": 0.05777015537023544, "learning_rate": 3.580321106362773e-05, "loss": 0.0177, "step": 38770 }, { "epoch": 0.2866562195085893, "grad_norm": 0.08417957276105881, "learning_rate": 3.579950142450142e-05, "loss": 0.0172, "step": 38780 }, { "epoch": 0.2867301380798912, "grad_norm": 0.07578160613775253, "learning_rate": 3.5795791785375124e-05, "loss": 0.0192, "step": 38790 }, { "epoch": 0.2868040566511931, "grad_norm": 0.10593099892139435, "learning_rate": 3.5792082146248814e-05, "loss": 0.0184, "step": 38800 }, { "epoch": 0.2868779752224949, "grad_norm": 0.05731048807501793, "learning_rate": 3.578837250712251e-05, "loss": 0.0173, "step": 38810 }, { "epoch": 0.28695189379379676, "grad_norm": 0.08876123279333115, "learning_rate": 3.5784662867996205e-05, "loss": 0.0189, "step": 38820 }, { "epoch": 0.2870258123650986, "grad_norm": 0.08627317100763321, "learning_rate": 3.5780953228869895e-05, "loss": 0.0198, "step": 38830 }, { "epoch": 0.28709973093640045, "grad_norm": 0.0902194231748581, "learning_rate": 3.577724358974359e-05, "loss": 0.0174, "step": 38840 }, { "epoch": 0.2871736495077023, "grad_norm": 0.08449438959360123, "learning_rate": 3.5773533950617287e-05, "loss": 0.0181, "step": 38850 }, { "epoch": 0.28724756807900415, "grad_norm": 0.10555487871170044, "learning_rate": 3.576982431149098e-05, "loss": 0.019, "step": 38860 }, { "epoch": 0.28732148665030605, "grad_norm": 0.10188709199428558, "learning_rate": 3.576611467236468e-05, "loss": 0.0212, "step": 38870 }, { "epoch": 0.2873954052216079, "grad_norm": 0.08163904398679733, "learning_rate": 3.576240503323837e-05, "loss": 0.0226, "step": 38880 }, { "epoch": 0.28746932379290974, "grad_norm": 0.05684854835271835, "learning_rate": 3.5758695394112064e-05, "loss": 0.0167, "step": 38890 }, { "epoch": 0.2875432423642116, "grad_norm": 0.09646262973546982, "learning_rate": 3.575498575498576e-05, "loss": 0.0214, "step": 38900 }, { "epoch": 0.2876171609355134, "grad_norm": 0.06920493394136429, "learning_rate": 3.575127611585945e-05, "loss": 0.0218, "step": 38910 }, { "epoch": 0.2876910795068153, "grad_norm": 0.0737135261297226, "learning_rate": 3.5747566476733145e-05, "loss": 0.0183, "step": 38920 }, { "epoch": 0.2877649980781172, "grad_norm": 0.09294036775827408, "learning_rate": 3.5743856837606834e-05, "loss": 0.0175, "step": 38930 }, { "epoch": 0.287838916649419, "grad_norm": 0.11958757042884827, "learning_rate": 3.574014719848054e-05, "loss": 0.0208, "step": 38940 }, { "epoch": 0.28791283522072086, "grad_norm": 0.10666726529598236, "learning_rate": 3.573643755935423e-05, "loss": 0.0227, "step": 38950 }, { "epoch": 0.2879867537920227, "grad_norm": 0.08652665466070175, "learning_rate": 3.573272792022792e-05, "loss": 0.0173, "step": 38960 }, { "epoch": 0.28806067236332455, "grad_norm": 0.07823482155799866, "learning_rate": 3.572901828110162e-05, "loss": 0.0174, "step": 38970 }, { "epoch": 0.2881345909346264, "grad_norm": 0.07927388697862625, "learning_rate": 3.572530864197531e-05, "loss": 0.0187, "step": 38980 }, { "epoch": 0.28820850950592825, "grad_norm": 0.06484010070562363, "learning_rate": 3.5721599002849e-05, "loss": 0.0182, "step": 38990 }, { "epoch": 0.28828242807723015, "grad_norm": 0.11370483785867691, "learning_rate": 3.57178893637227e-05, "loss": 0.0179, "step": 39000 }, { "epoch": 0.288356346648532, "grad_norm": 0.0888226181268692, "learning_rate": 3.5714179724596395e-05, "loss": 0.0217, "step": 39010 }, { "epoch": 0.28843026521983384, "grad_norm": 0.09708042442798615, "learning_rate": 3.571047008547009e-05, "loss": 0.0189, "step": 39020 }, { "epoch": 0.2885041837911357, "grad_norm": 0.12996414303779602, "learning_rate": 3.570676044634378e-05, "loss": 0.0166, "step": 39030 }, { "epoch": 0.2885781023624375, "grad_norm": 0.079742431640625, "learning_rate": 3.5703050807217476e-05, "loss": 0.0177, "step": 39040 }, { "epoch": 0.2886520209337394, "grad_norm": 0.07856535166501999, "learning_rate": 3.569934116809117e-05, "loss": 0.0163, "step": 39050 }, { "epoch": 0.2887259395050413, "grad_norm": 0.09074665606021881, "learning_rate": 3.569563152896486e-05, "loss": 0.0182, "step": 39060 }, { "epoch": 0.2887998580763431, "grad_norm": 0.0902465432882309, "learning_rate": 3.569192188983856e-05, "loss": 0.0192, "step": 39070 }, { "epoch": 0.28887377664764496, "grad_norm": 0.09278262406587601, "learning_rate": 3.568821225071225e-05, "loss": 0.0189, "step": 39080 }, { "epoch": 0.2889476952189468, "grad_norm": 0.09800709784030914, "learning_rate": 3.568450261158595e-05, "loss": 0.017, "step": 39090 }, { "epoch": 0.28902161379024865, "grad_norm": 0.10501433163881302, "learning_rate": 3.5680792972459645e-05, "loss": 0.018, "step": 39100 }, { "epoch": 0.2890955323615505, "grad_norm": 0.10222896188497543, "learning_rate": 3.5677083333333334e-05, "loss": 0.0192, "step": 39110 }, { "epoch": 0.28916945093285235, "grad_norm": 0.11003319174051285, "learning_rate": 3.567337369420703e-05, "loss": 0.0189, "step": 39120 }, { "epoch": 0.28924336950415425, "grad_norm": 0.08521962910890579, "learning_rate": 3.5669664055080726e-05, "loss": 0.0195, "step": 39130 }, { "epoch": 0.2893172880754561, "grad_norm": 0.05133388191461563, "learning_rate": 3.5665954415954415e-05, "loss": 0.0175, "step": 39140 }, { "epoch": 0.28939120664675794, "grad_norm": 0.10458425432443619, "learning_rate": 3.566224477682811e-05, "loss": 0.0223, "step": 39150 }, { "epoch": 0.2894651252180598, "grad_norm": 0.09683454781770706, "learning_rate": 3.565853513770181e-05, "loss": 0.0195, "step": 39160 }, { "epoch": 0.2895390437893616, "grad_norm": 0.08803705126047134, "learning_rate": 3.56548254985755e-05, "loss": 0.0181, "step": 39170 }, { "epoch": 0.2896129623606635, "grad_norm": 0.07024317234754562, "learning_rate": 3.56511158594492e-05, "loss": 0.019, "step": 39180 }, { "epoch": 0.2896868809319654, "grad_norm": 0.0780089944601059, "learning_rate": 3.564740622032289e-05, "loss": 0.0188, "step": 39190 }, { "epoch": 0.2897607995032672, "grad_norm": 0.09941524267196655, "learning_rate": 3.5643696581196584e-05, "loss": 0.0184, "step": 39200 }, { "epoch": 0.28983471807456906, "grad_norm": 0.06821256130933762, "learning_rate": 3.5639986942070274e-05, "loss": 0.02, "step": 39210 }, { "epoch": 0.2899086366458709, "grad_norm": 0.11383607983589172, "learning_rate": 3.563627730294397e-05, "loss": 0.0219, "step": 39220 }, { "epoch": 0.28998255521717275, "grad_norm": 0.0734940692782402, "learning_rate": 3.5632567663817666e-05, "loss": 0.0197, "step": 39230 }, { "epoch": 0.2900564737884746, "grad_norm": 0.06543325632810593, "learning_rate": 3.562885802469136e-05, "loss": 0.016, "step": 39240 }, { "epoch": 0.29013039235977645, "grad_norm": 0.08477847278118134, "learning_rate": 3.562514838556506e-05, "loss": 0.0199, "step": 39250 }, { "epoch": 0.29020431093107835, "grad_norm": 0.10399157553911209, "learning_rate": 3.562143874643875e-05, "loss": 0.0202, "step": 39260 }, { "epoch": 0.2902782295023802, "grad_norm": 0.10714534670114517, "learning_rate": 3.561772910731244e-05, "loss": 0.0215, "step": 39270 }, { "epoch": 0.29035214807368204, "grad_norm": 0.10957160592079163, "learning_rate": 3.561401946818614e-05, "loss": 0.0176, "step": 39280 }, { "epoch": 0.2904260666449839, "grad_norm": 0.1137077659368515, "learning_rate": 3.561030982905983e-05, "loss": 0.0177, "step": 39290 }, { "epoch": 0.2904999852162857, "grad_norm": 0.10111214220523834, "learning_rate": 3.5606600189933524e-05, "loss": 0.0197, "step": 39300 }, { "epoch": 0.2905739037875876, "grad_norm": 0.08012203127145767, "learning_rate": 3.560289055080722e-05, "loss": 0.0212, "step": 39310 }, { "epoch": 0.2906478223588895, "grad_norm": 0.07070279866456985, "learning_rate": 3.5599180911680916e-05, "loss": 0.0183, "step": 39320 }, { "epoch": 0.2907217409301913, "grad_norm": 0.06599403917789459, "learning_rate": 3.559547127255461e-05, "loss": 0.0185, "step": 39330 }, { "epoch": 0.29079565950149316, "grad_norm": 0.09379260987043381, "learning_rate": 3.55917616334283e-05, "loss": 0.0191, "step": 39340 }, { "epoch": 0.290869578072795, "grad_norm": 0.08500421047210693, "learning_rate": 3.5588051994302e-05, "loss": 0.0179, "step": 39350 }, { "epoch": 0.29094349664409686, "grad_norm": 0.0872545838356018, "learning_rate": 3.558434235517569e-05, "loss": 0.0195, "step": 39360 }, { "epoch": 0.2910174152153987, "grad_norm": 0.09130999445915222, "learning_rate": 3.558063271604938e-05, "loss": 0.0214, "step": 39370 }, { "epoch": 0.29109133378670055, "grad_norm": 0.09407318383455276, "learning_rate": 3.557692307692308e-05, "loss": 0.0177, "step": 39380 }, { "epoch": 0.29116525235800245, "grad_norm": 0.09333614259958267, "learning_rate": 3.5573213437796774e-05, "loss": 0.0204, "step": 39390 }, { "epoch": 0.2912391709293043, "grad_norm": 0.10681622475385666, "learning_rate": 3.556950379867047e-05, "loss": 0.0177, "step": 39400 }, { "epoch": 0.29131308950060614, "grad_norm": 0.09072420746088028, "learning_rate": 3.5565794159544166e-05, "loss": 0.017, "step": 39410 }, { "epoch": 0.291387008071908, "grad_norm": 0.07721372693777084, "learning_rate": 3.5562084520417855e-05, "loss": 0.0178, "step": 39420 }, { "epoch": 0.29146092664320983, "grad_norm": 0.06005462631583214, "learning_rate": 3.555837488129155e-05, "loss": 0.0176, "step": 39430 }, { "epoch": 0.2915348452145117, "grad_norm": 0.06264548003673553, "learning_rate": 3.555466524216524e-05, "loss": 0.0198, "step": 39440 }, { "epoch": 0.2916087637858136, "grad_norm": 0.10125889629125595, "learning_rate": 3.5550955603038936e-05, "loss": 0.0177, "step": 39450 }, { "epoch": 0.2916826823571154, "grad_norm": 0.11964855343103409, "learning_rate": 3.554724596391263e-05, "loss": 0.0229, "step": 39460 }, { "epoch": 0.29175660092841726, "grad_norm": 0.09017671644687653, "learning_rate": 3.554353632478633e-05, "loss": 0.019, "step": 39470 }, { "epoch": 0.2918305194997191, "grad_norm": 0.0720110610127449, "learning_rate": 3.5539826685660024e-05, "loss": 0.0185, "step": 39480 }, { "epoch": 0.29190443807102096, "grad_norm": 0.06961306184530258, "learning_rate": 3.553611704653371e-05, "loss": 0.0202, "step": 39490 }, { "epoch": 0.2919783566423228, "grad_norm": 0.07258889079093933, "learning_rate": 3.553240740740741e-05, "loss": 0.0218, "step": 39500 }, { "epoch": 0.29205227521362465, "grad_norm": 0.14533229172229767, "learning_rate": 3.5528697768281105e-05, "loss": 0.0195, "step": 39510 }, { "epoch": 0.29212619378492655, "grad_norm": 0.07298830896615982, "learning_rate": 3.5524988129154794e-05, "loss": 0.0178, "step": 39520 }, { "epoch": 0.2922001123562284, "grad_norm": 0.08939662575721741, "learning_rate": 3.552127849002849e-05, "loss": 0.0199, "step": 39530 }, { "epoch": 0.29227403092753024, "grad_norm": 0.07126587629318237, "learning_rate": 3.5517568850902186e-05, "loss": 0.0151, "step": 39540 }, { "epoch": 0.2923479494988321, "grad_norm": 0.11612686514854431, "learning_rate": 3.551385921177588e-05, "loss": 0.0199, "step": 39550 }, { "epoch": 0.29242186807013393, "grad_norm": 0.08554835617542267, "learning_rate": 3.551014957264958e-05, "loss": 0.0186, "step": 39560 }, { "epoch": 0.2924957866414358, "grad_norm": 0.0877324566245079, "learning_rate": 3.550643993352327e-05, "loss": 0.0206, "step": 39570 }, { "epoch": 0.2925697052127377, "grad_norm": 0.25311246514320374, "learning_rate": 3.5502730294396963e-05, "loss": 0.0203, "step": 39580 }, { "epoch": 0.2926436237840395, "grad_norm": 0.07756414264440536, "learning_rate": 3.549902065527066e-05, "loss": 0.0183, "step": 39590 }, { "epoch": 0.29271754235534136, "grad_norm": 0.09529725462198257, "learning_rate": 3.549531101614435e-05, "loss": 0.0202, "step": 39600 }, { "epoch": 0.2927914609266432, "grad_norm": 0.09326574951410294, "learning_rate": 3.5491601377018045e-05, "loss": 0.0172, "step": 39610 }, { "epoch": 0.29286537949794506, "grad_norm": 0.11572094261646271, "learning_rate": 3.548789173789174e-05, "loss": 0.0225, "step": 39620 }, { "epoch": 0.2929392980692469, "grad_norm": 0.08779244869947433, "learning_rate": 3.5484182098765436e-05, "loss": 0.0194, "step": 39630 }, { "epoch": 0.29301321664054875, "grad_norm": 0.08656366169452667, "learning_rate": 3.548047245963913e-05, "loss": 0.0183, "step": 39640 }, { "epoch": 0.29308713521185065, "grad_norm": 0.09075984358787537, "learning_rate": 3.547676282051282e-05, "loss": 0.0189, "step": 39650 }, { "epoch": 0.2931610537831525, "grad_norm": 0.15344542264938354, "learning_rate": 3.547305318138652e-05, "loss": 0.0208, "step": 39660 }, { "epoch": 0.29323497235445434, "grad_norm": 0.08538174629211426, "learning_rate": 3.546934354226021e-05, "loss": 0.0193, "step": 39670 }, { "epoch": 0.2933088909257562, "grad_norm": 0.10595715790987015, "learning_rate": 3.54656339031339e-05, "loss": 0.0213, "step": 39680 }, { "epoch": 0.29338280949705803, "grad_norm": 0.08453144133090973, "learning_rate": 3.54619242640076e-05, "loss": 0.0199, "step": 39690 }, { "epoch": 0.2934567280683599, "grad_norm": 0.08898500353097916, "learning_rate": 3.5458214624881295e-05, "loss": 0.0189, "step": 39700 }, { "epoch": 0.2935306466396618, "grad_norm": 0.10838479548692703, "learning_rate": 3.545450498575499e-05, "loss": 0.0179, "step": 39710 }, { "epoch": 0.2936045652109636, "grad_norm": 0.07953338325023651, "learning_rate": 3.545079534662868e-05, "loss": 0.02, "step": 39720 }, { "epoch": 0.29367848378226546, "grad_norm": 0.11616776883602142, "learning_rate": 3.5447085707502376e-05, "loss": 0.0224, "step": 39730 }, { "epoch": 0.2937524023535673, "grad_norm": 0.097626693546772, "learning_rate": 3.544337606837607e-05, "loss": 0.0175, "step": 39740 }, { "epoch": 0.29382632092486916, "grad_norm": 0.09147676825523376, "learning_rate": 3.543966642924976e-05, "loss": 0.0188, "step": 39750 }, { "epoch": 0.293900239496171, "grad_norm": 0.07421558350324631, "learning_rate": 3.543595679012346e-05, "loss": 0.0196, "step": 39760 }, { "epoch": 0.29397415806747285, "grad_norm": 0.08205095678567886, "learning_rate": 3.543224715099715e-05, "loss": 0.0192, "step": 39770 }, { "epoch": 0.29404807663877475, "grad_norm": 0.09343995898962021, "learning_rate": 3.542853751187085e-05, "loss": 0.0197, "step": 39780 }, { "epoch": 0.2941219952100766, "grad_norm": 0.1180085763335228, "learning_rate": 3.5424827872744545e-05, "loss": 0.0207, "step": 39790 }, { "epoch": 0.29419591378137844, "grad_norm": 0.06754688918590546, "learning_rate": 3.5421118233618234e-05, "loss": 0.0173, "step": 39800 }, { "epoch": 0.2942698323526803, "grad_norm": 0.0712592601776123, "learning_rate": 3.541740859449193e-05, "loss": 0.0201, "step": 39810 }, { "epoch": 0.29434375092398213, "grad_norm": 0.09996171295642853, "learning_rate": 3.5413698955365626e-05, "loss": 0.0204, "step": 39820 }, { "epoch": 0.294417669495284, "grad_norm": 0.08990880846977234, "learning_rate": 3.5409989316239315e-05, "loss": 0.0211, "step": 39830 }, { "epoch": 0.2944915880665859, "grad_norm": 0.07393600046634674, "learning_rate": 3.540627967711301e-05, "loss": 0.0226, "step": 39840 }, { "epoch": 0.2945655066378877, "grad_norm": 0.06920602917671204, "learning_rate": 3.540257003798671e-05, "loss": 0.0175, "step": 39850 }, { "epoch": 0.29463942520918956, "grad_norm": 0.08680330216884613, "learning_rate": 3.53988603988604e-05, "loss": 0.0181, "step": 39860 }, { "epoch": 0.2947133437804914, "grad_norm": 0.10392973572015762, "learning_rate": 3.53951507597341e-05, "loss": 0.0192, "step": 39870 }, { "epoch": 0.29478726235179326, "grad_norm": 0.07698529213666916, "learning_rate": 3.539144112060779e-05, "loss": 0.0189, "step": 39880 }, { "epoch": 0.2948611809230951, "grad_norm": 0.0859031230211258, "learning_rate": 3.5387731481481484e-05, "loss": 0.0168, "step": 39890 }, { "epoch": 0.29493509949439695, "grad_norm": 0.10237284749746323, "learning_rate": 3.5384021842355173e-05, "loss": 0.0186, "step": 39900 }, { "epoch": 0.29500901806569885, "grad_norm": 0.09435314685106277, "learning_rate": 3.538031220322887e-05, "loss": 0.0192, "step": 39910 }, { "epoch": 0.2950829366370007, "grad_norm": 0.06342162936925888, "learning_rate": 3.5376602564102565e-05, "loss": 0.0184, "step": 39920 }, { "epoch": 0.29515685520830254, "grad_norm": 0.12394603341817856, "learning_rate": 3.537289292497626e-05, "loss": 0.0212, "step": 39930 }, { "epoch": 0.2952307737796044, "grad_norm": 0.09578089416027069, "learning_rate": 3.536918328584996e-05, "loss": 0.0191, "step": 39940 }, { "epoch": 0.29530469235090623, "grad_norm": 0.10095982998609543, "learning_rate": 3.5365473646723646e-05, "loss": 0.0176, "step": 39950 }, { "epoch": 0.2953786109222081, "grad_norm": 0.09211903065443039, "learning_rate": 3.536176400759734e-05, "loss": 0.0172, "step": 39960 }, { "epoch": 0.29545252949351, "grad_norm": 0.21059109270572662, "learning_rate": 3.535805436847104e-05, "loss": 0.0201, "step": 39970 }, { "epoch": 0.2955264480648118, "grad_norm": 0.09203150123357773, "learning_rate": 3.535434472934473e-05, "loss": 0.0217, "step": 39980 }, { "epoch": 0.29560036663611367, "grad_norm": 0.08103719353675842, "learning_rate": 3.5350635090218424e-05, "loss": 0.019, "step": 39990 }, { "epoch": 0.2956742852074155, "grad_norm": 0.0887136161327362, "learning_rate": 3.534692545109212e-05, "loss": 0.0197, "step": 40000 }, { "epoch": 0.2956742852074155, "eval_f1": 0.6048634691567629, "eval_loss": 0.018523240461945534, "eval_precision": 0.47730154244425604, "eval_recall": 0.8254777377525961, "eval_runtime": 2662.2523, "eval_samples_per_second": 203.262, "eval_steps_per_second": 3.176, "step": 40000 }, { "epoch": 0.29574820377871736, "grad_norm": 0.08944419771432877, "learning_rate": 3.5343215811965815e-05, "loss": 0.0191, "step": 40010 }, { "epoch": 0.2958221223500192, "grad_norm": 0.10585649311542511, "learning_rate": 3.533950617283951e-05, "loss": 0.0191, "step": 40020 }, { "epoch": 0.29589604092132105, "grad_norm": 0.09504344314336777, "learning_rate": 3.53357965337132e-05, "loss": 0.0199, "step": 40030 }, { "epoch": 0.29596995949262295, "grad_norm": 0.0804176926612854, "learning_rate": 3.5332086894586897e-05, "loss": 0.0174, "step": 40040 }, { "epoch": 0.2960438780639248, "grad_norm": 0.062001194804906845, "learning_rate": 3.532837725546059e-05, "loss": 0.0169, "step": 40050 }, { "epoch": 0.29611779663522664, "grad_norm": 0.08773874491453171, "learning_rate": 3.532466761633428e-05, "loss": 0.0215, "step": 40060 }, { "epoch": 0.2961917152065285, "grad_norm": 0.07420551031827927, "learning_rate": 3.532095797720798e-05, "loss": 0.0175, "step": 40070 }, { "epoch": 0.29626563377783033, "grad_norm": 0.08795443922281265, "learning_rate": 3.5317248338081674e-05, "loss": 0.0173, "step": 40080 }, { "epoch": 0.2963395523491322, "grad_norm": 0.07256721705198288, "learning_rate": 3.531353869895537e-05, "loss": 0.0194, "step": 40090 }, { "epoch": 0.2964134709204341, "grad_norm": 0.06961095333099365, "learning_rate": 3.5309829059829066e-05, "loss": 0.0184, "step": 40100 }, { "epoch": 0.2964873894917359, "grad_norm": 0.08534622192382812, "learning_rate": 3.5306119420702755e-05, "loss": 0.0181, "step": 40110 }, { "epoch": 0.29656130806303777, "grad_norm": 0.11563605815172195, "learning_rate": 3.530240978157645e-05, "loss": 0.021, "step": 40120 }, { "epoch": 0.2966352266343396, "grad_norm": 0.08745235949754715, "learning_rate": 3.529870014245014e-05, "loss": 0.0191, "step": 40130 }, { "epoch": 0.29670914520564146, "grad_norm": 0.06488395482301712, "learning_rate": 3.5294990503323836e-05, "loss": 0.0207, "step": 40140 }, { "epoch": 0.2967830637769433, "grad_norm": 0.08929703384637833, "learning_rate": 3.529128086419753e-05, "loss": 0.0189, "step": 40150 }, { "epoch": 0.29685698234824515, "grad_norm": 0.08577121794223785, "learning_rate": 3.528757122507123e-05, "loss": 0.023, "step": 40160 }, { "epoch": 0.29693090091954705, "grad_norm": 0.08573178201913834, "learning_rate": 3.5283861585944924e-05, "loss": 0.0186, "step": 40170 }, { "epoch": 0.2970048194908489, "grad_norm": 0.11787261813879013, "learning_rate": 3.528015194681861e-05, "loss": 0.018, "step": 40180 }, { "epoch": 0.29707873806215074, "grad_norm": 0.08737242966890335, "learning_rate": 3.527644230769231e-05, "loss": 0.0198, "step": 40190 }, { "epoch": 0.2971526566334526, "grad_norm": 0.08136210590600967, "learning_rate": 3.5272732668566005e-05, "loss": 0.0177, "step": 40200 }, { "epoch": 0.29722657520475443, "grad_norm": 0.07731893658638, "learning_rate": 3.5269023029439694e-05, "loss": 0.0177, "step": 40210 }, { "epoch": 0.2973004937760563, "grad_norm": 0.08985748142004013, "learning_rate": 3.526531339031339e-05, "loss": 0.0175, "step": 40220 }, { "epoch": 0.2973744123473582, "grad_norm": 0.08974775671958923, "learning_rate": 3.5261603751187086e-05, "loss": 0.0202, "step": 40230 }, { "epoch": 0.29744833091866, "grad_norm": 0.09473055601119995, "learning_rate": 3.525789411206078e-05, "loss": 0.0198, "step": 40240 }, { "epoch": 0.29752224948996187, "grad_norm": 0.08488566428422928, "learning_rate": 3.525418447293448e-05, "loss": 0.0184, "step": 40250 }, { "epoch": 0.2975961680612637, "grad_norm": 0.10066484659910202, "learning_rate": 3.525047483380817e-05, "loss": 0.0201, "step": 40260 }, { "epoch": 0.29767008663256556, "grad_norm": 0.08238562941551208, "learning_rate": 3.524676519468186e-05, "loss": 0.0184, "step": 40270 }, { "epoch": 0.2977440052038674, "grad_norm": 0.08526982367038727, "learning_rate": 3.524305555555556e-05, "loss": 0.0199, "step": 40280 }, { "epoch": 0.29781792377516925, "grad_norm": 0.11167903244495392, "learning_rate": 3.523934591642925e-05, "loss": 0.0184, "step": 40290 }, { "epoch": 0.29789184234647115, "grad_norm": 0.10231362283229828, "learning_rate": 3.5235636277302944e-05, "loss": 0.0203, "step": 40300 }, { "epoch": 0.297965760917773, "grad_norm": 0.11291981488466263, "learning_rate": 3.523192663817664e-05, "loss": 0.0187, "step": 40310 }, { "epoch": 0.29803967948907484, "grad_norm": 0.08946817368268967, "learning_rate": 3.5228216999050336e-05, "loss": 0.0174, "step": 40320 }, { "epoch": 0.2981135980603767, "grad_norm": 0.08896126598119736, "learning_rate": 3.522450735992403e-05, "loss": 0.0186, "step": 40330 }, { "epoch": 0.29818751663167853, "grad_norm": 0.06863762438297272, "learning_rate": 3.522079772079772e-05, "loss": 0.0178, "step": 40340 }, { "epoch": 0.2982614352029804, "grad_norm": 0.09483463317155838, "learning_rate": 3.521708808167142e-05, "loss": 0.0197, "step": 40350 }, { "epoch": 0.2983353537742823, "grad_norm": 0.07545123249292374, "learning_rate": 3.5213378442545107e-05, "loss": 0.0196, "step": 40360 }, { "epoch": 0.2984092723455841, "grad_norm": 0.11172790080308914, "learning_rate": 3.52096688034188e-05, "loss": 0.0199, "step": 40370 }, { "epoch": 0.29848319091688597, "grad_norm": 0.08551350235939026, "learning_rate": 3.52059591642925e-05, "loss": 0.0191, "step": 40380 }, { "epoch": 0.2985571094881878, "grad_norm": 0.11399293690919876, "learning_rate": 3.5202249525166194e-05, "loss": 0.018, "step": 40390 }, { "epoch": 0.29863102805948966, "grad_norm": 0.06697198003530502, "learning_rate": 3.519853988603989e-05, "loss": 0.0198, "step": 40400 }, { "epoch": 0.2987049466307915, "grad_norm": 0.08352407813072205, "learning_rate": 3.519483024691358e-05, "loss": 0.0191, "step": 40410 }, { "epoch": 0.29877886520209335, "grad_norm": 0.09723468124866486, "learning_rate": 3.5191120607787276e-05, "loss": 0.0185, "step": 40420 }, { "epoch": 0.29885278377339525, "grad_norm": 0.09839003533124924, "learning_rate": 3.518741096866097e-05, "loss": 0.0181, "step": 40430 }, { "epoch": 0.2989267023446971, "grad_norm": 0.07423264533281326, "learning_rate": 3.518370132953466e-05, "loss": 0.0179, "step": 40440 }, { "epoch": 0.29900062091599894, "grad_norm": 0.10543884336948395, "learning_rate": 3.517999169040836e-05, "loss": 0.0207, "step": 40450 }, { "epoch": 0.2990745394873008, "grad_norm": 0.09270637482404709, "learning_rate": 3.517628205128205e-05, "loss": 0.0204, "step": 40460 }, { "epoch": 0.29914845805860263, "grad_norm": 0.10229405760765076, "learning_rate": 3.517257241215575e-05, "loss": 0.0167, "step": 40470 }, { "epoch": 0.2992223766299045, "grad_norm": 0.07626686990261078, "learning_rate": 3.5168862773029445e-05, "loss": 0.0211, "step": 40480 }, { "epoch": 0.2992962952012064, "grad_norm": 0.09048005193471909, "learning_rate": 3.5165153133903134e-05, "loss": 0.0197, "step": 40490 }, { "epoch": 0.2993702137725082, "grad_norm": 0.08271709829568863, "learning_rate": 3.516144349477683e-05, "loss": 0.0186, "step": 40500 }, { "epoch": 0.29944413234381007, "grad_norm": 0.1680927276611328, "learning_rate": 3.5157733855650526e-05, "loss": 0.0232, "step": 40510 }, { "epoch": 0.2995180509151119, "grad_norm": 0.0796268880367279, "learning_rate": 3.5154024216524215e-05, "loss": 0.0185, "step": 40520 }, { "epoch": 0.29959196948641376, "grad_norm": 0.21023017168045044, "learning_rate": 3.515031457739791e-05, "loss": 0.0168, "step": 40530 }, { "epoch": 0.2996658880577156, "grad_norm": 0.11400981992483139, "learning_rate": 3.514660493827161e-05, "loss": 0.0181, "step": 40540 }, { "epoch": 0.29973980662901745, "grad_norm": 0.08473803102970123, "learning_rate": 3.51428952991453e-05, "loss": 0.0187, "step": 40550 }, { "epoch": 0.29981372520031935, "grad_norm": 0.06288875639438629, "learning_rate": 3.5139185660019e-05, "loss": 0.0188, "step": 40560 }, { "epoch": 0.2998876437716212, "grad_norm": 0.09498181939125061, "learning_rate": 3.513547602089269e-05, "loss": 0.0177, "step": 40570 }, { "epoch": 0.29996156234292304, "grad_norm": 0.09277638047933578, "learning_rate": 3.5131766381766384e-05, "loss": 0.0173, "step": 40580 }, { "epoch": 0.3000354809142249, "grad_norm": 0.097083680331707, "learning_rate": 3.512805674264007e-05, "loss": 0.0187, "step": 40590 }, { "epoch": 0.30010939948552673, "grad_norm": 0.09916821122169495, "learning_rate": 3.512434710351377e-05, "loss": 0.0216, "step": 40600 }, { "epoch": 0.3001833180568286, "grad_norm": 0.08453313261270523, "learning_rate": 3.5120637464387465e-05, "loss": 0.0192, "step": 40610 }, { "epoch": 0.3002572366281305, "grad_norm": 0.07424086332321167, "learning_rate": 3.511692782526116e-05, "loss": 0.0171, "step": 40620 }, { "epoch": 0.3003311551994323, "grad_norm": 0.07494385540485382, "learning_rate": 3.511321818613486e-05, "loss": 0.0194, "step": 40630 }, { "epoch": 0.30040507377073417, "grad_norm": 0.10998072475194931, "learning_rate": 3.5109508547008546e-05, "loss": 0.0191, "step": 40640 }, { "epoch": 0.300478992342036, "grad_norm": 0.0937514454126358, "learning_rate": 3.510579890788224e-05, "loss": 0.0193, "step": 40650 }, { "epoch": 0.30055291091333786, "grad_norm": 0.09247662872076035, "learning_rate": 3.510208926875594e-05, "loss": 0.0184, "step": 40660 }, { "epoch": 0.3006268294846397, "grad_norm": 0.07527650147676468, "learning_rate": 3.509837962962963e-05, "loss": 0.0191, "step": 40670 }, { "epoch": 0.3007007480559416, "grad_norm": 0.11358445882797241, "learning_rate": 3.509466999050332e-05, "loss": 0.0185, "step": 40680 }, { "epoch": 0.30077466662724345, "grad_norm": 0.06219291314482689, "learning_rate": 3.509096035137702e-05, "loss": 0.0181, "step": 40690 }, { "epoch": 0.3008485851985453, "grad_norm": 0.08207332342863083, "learning_rate": 3.5087250712250715e-05, "loss": 0.0179, "step": 40700 }, { "epoch": 0.30092250376984714, "grad_norm": 0.06086430326104164, "learning_rate": 3.508354107312441e-05, "loss": 0.0174, "step": 40710 }, { "epoch": 0.300996422341149, "grad_norm": 0.0684448629617691, "learning_rate": 3.50798314339981e-05, "loss": 0.0188, "step": 40720 }, { "epoch": 0.30107034091245083, "grad_norm": 0.051553964614868164, "learning_rate": 3.5076121794871796e-05, "loss": 0.0173, "step": 40730 }, { "epoch": 0.3011442594837527, "grad_norm": 0.1044883280992508, "learning_rate": 3.507241215574549e-05, "loss": 0.0191, "step": 40740 }, { "epoch": 0.3012181780550546, "grad_norm": 0.10106072574853897, "learning_rate": 3.506870251661918e-05, "loss": 0.0201, "step": 40750 }, { "epoch": 0.3012920966263564, "grad_norm": 0.09131675213575363, "learning_rate": 3.506499287749288e-05, "loss": 0.0207, "step": 40760 }, { "epoch": 0.30136601519765827, "grad_norm": 0.08383143693208694, "learning_rate": 3.5061283238366573e-05, "loss": 0.0232, "step": 40770 }, { "epoch": 0.3014399337689601, "grad_norm": 0.08969314396381378, "learning_rate": 3.505757359924027e-05, "loss": 0.0203, "step": 40780 }, { "epoch": 0.30151385234026196, "grad_norm": 0.12498711049556732, "learning_rate": 3.5053863960113965e-05, "loss": 0.0195, "step": 40790 }, { "epoch": 0.3015877709115638, "grad_norm": 0.06831370294094086, "learning_rate": 3.5050154320987655e-05, "loss": 0.019, "step": 40800 }, { "epoch": 0.3016616894828657, "grad_norm": 0.08495339006185532, "learning_rate": 3.504644468186135e-05, "loss": 0.0203, "step": 40810 }, { "epoch": 0.30173560805416755, "grad_norm": 0.11111482977867126, "learning_rate": 3.504273504273504e-05, "loss": 0.0179, "step": 40820 }, { "epoch": 0.3018095266254694, "grad_norm": 0.06847722828388214, "learning_rate": 3.5039025403608736e-05, "loss": 0.02, "step": 40830 }, { "epoch": 0.30188344519677124, "grad_norm": 0.1520591825246811, "learning_rate": 3.503531576448243e-05, "loss": 0.0189, "step": 40840 }, { "epoch": 0.3019573637680731, "grad_norm": 0.13091471791267395, "learning_rate": 3.503160612535613e-05, "loss": 0.0189, "step": 40850 }, { "epoch": 0.30203128233937493, "grad_norm": 0.08369293808937073, "learning_rate": 3.5027896486229824e-05, "loss": 0.019, "step": 40860 }, { "epoch": 0.3021052009106768, "grad_norm": 0.09184248745441437, "learning_rate": 3.502418684710351e-05, "loss": 0.0194, "step": 40870 }, { "epoch": 0.3021791194819787, "grad_norm": 0.08807466179132462, "learning_rate": 3.502047720797721e-05, "loss": 0.0197, "step": 40880 }, { "epoch": 0.3022530380532805, "grad_norm": 0.07857280224561691, "learning_rate": 3.5016767568850905e-05, "loss": 0.0186, "step": 40890 }, { "epoch": 0.30232695662458237, "grad_norm": 0.09837421774864197, "learning_rate": 3.5013057929724594e-05, "loss": 0.0214, "step": 40900 }, { "epoch": 0.3024008751958842, "grad_norm": 0.08540481328964233, "learning_rate": 3.500934829059829e-05, "loss": 0.0192, "step": 40910 }, { "epoch": 0.30247479376718606, "grad_norm": 0.0964401438832283, "learning_rate": 3.5005638651471986e-05, "loss": 0.0191, "step": 40920 }, { "epoch": 0.3025487123384879, "grad_norm": 0.07555688172578812, "learning_rate": 3.500192901234568e-05, "loss": 0.0182, "step": 40930 }, { "epoch": 0.3026226309097898, "grad_norm": 0.07472950965166092, "learning_rate": 3.499821937321938e-05, "loss": 0.0152, "step": 40940 }, { "epoch": 0.30269654948109165, "grad_norm": 0.0759170651435852, "learning_rate": 3.499450973409307e-05, "loss": 0.0199, "step": 40950 }, { "epoch": 0.3027704680523935, "grad_norm": 0.13162405788898468, "learning_rate": 3.499080009496676e-05, "loss": 0.0205, "step": 40960 }, { "epoch": 0.30284438662369534, "grad_norm": 0.08498091250658035, "learning_rate": 3.498709045584046e-05, "loss": 0.0189, "step": 40970 }, { "epoch": 0.3029183051949972, "grad_norm": 0.06392598152160645, "learning_rate": 3.498338081671415e-05, "loss": 0.0179, "step": 40980 }, { "epoch": 0.30299222376629903, "grad_norm": 0.08923903107643127, "learning_rate": 3.4979671177587844e-05, "loss": 0.0195, "step": 40990 }, { "epoch": 0.3030661423376009, "grad_norm": 0.10305962711572647, "learning_rate": 3.497596153846154e-05, "loss": 0.0169, "step": 41000 }, { "epoch": 0.3031400609089028, "grad_norm": 0.07298894971609116, "learning_rate": 3.4972251899335236e-05, "loss": 0.0166, "step": 41010 }, { "epoch": 0.3032139794802046, "grad_norm": 0.07091324776411057, "learning_rate": 3.496854226020893e-05, "loss": 0.0177, "step": 41020 }, { "epoch": 0.30328789805150647, "grad_norm": 0.2106444388628006, "learning_rate": 3.496483262108262e-05, "loss": 0.0219, "step": 41030 }, { "epoch": 0.3033618166228083, "grad_norm": 0.06905341893434525, "learning_rate": 3.496112298195632e-05, "loss": 0.0164, "step": 41040 }, { "epoch": 0.30343573519411016, "grad_norm": 0.06448206305503845, "learning_rate": 3.4957413342830006e-05, "loss": 0.02, "step": 41050 }, { "epoch": 0.303509653765412, "grad_norm": 0.07077588140964508, "learning_rate": 3.49537037037037e-05, "loss": 0.0175, "step": 41060 }, { "epoch": 0.3035835723367139, "grad_norm": 0.09758295118808746, "learning_rate": 3.49499940645774e-05, "loss": 0.0209, "step": 41070 }, { "epoch": 0.30365749090801575, "grad_norm": 0.10992822051048279, "learning_rate": 3.4946284425451094e-05, "loss": 0.0178, "step": 41080 }, { "epoch": 0.3037314094793176, "grad_norm": 0.09202392399311066, "learning_rate": 3.494257478632479e-05, "loss": 0.0202, "step": 41090 }, { "epoch": 0.30380532805061944, "grad_norm": 0.10223499685525894, "learning_rate": 3.493886514719848e-05, "loss": 0.018, "step": 41100 }, { "epoch": 0.3038792466219213, "grad_norm": 0.08392112702131271, "learning_rate": 3.4935155508072175e-05, "loss": 0.0211, "step": 41110 }, { "epoch": 0.30395316519322313, "grad_norm": 0.10613808035850525, "learning_rate": 3.493144586894587e-05, "loss": 0.0208, "step": 41120 }, { "epoch": 0.304027083764525, "grad_norm": 0.07967979460954666, "learning_rate": 3.492773622981956e-05, "loss": 0.0183, "step": 41130 }, { "epoch": 0.3041010023358269, "grad_norm": 0.09585115313529968, "learning_rate": 3.4924026590693256e-05, "loss": 0.0173, "step": 41140 }, { "epoch": 0.3041749209071287, "grad_norm": 0.09077829867601395, "learning_rate": 3.492031695156696e-05, "loss": 0.0191, "step": 41150 }, { "epoch": 0.30424883947843057, "grad_norm": 0.07524742186069489, "learning_rate": 3.491660731244065e-05, "loss": 0.0161, "step": 41160 }, { "epoch": 0.3043227580497324, "grad_norm": 0.09042250365018845, "learning_rate": 3.4912897673314344e-05, "loss": 0.0212, "step": 41170 }, { "epoch": 0.30439667662103426, "grad_norm": 0.10119567066431046, "learning_rate": 3.4909188034188034e-05, "loss": 0.0219, "step": 41180 }, { "epoch": 0.3044705951923361, "grad_norm": 0.09681563824415207, "learning_rate": 3.490547839506173e-05, "loss": 0.0183, "step": 41190 }, { "epoch": 0.304544513763638, "grad_norm": 0.10629399120807648, "learning_rate": 3.4901768755935425e-05, "loss": 0.0167, "step": 41200 }, { "epoch": 0.30461843233493985, "grad_norm": 0.08098754286766052, "learning_rate": 3.4898059116809115e-05, "loss": 0.0213, "step": 41210 }, { "epoch": 0.3046923509062417, "grad_norm": 0.09643225371837616, "learning_rate": 3.489434947768281e-05, "loss": 0.0211, "step": 41220 }, { "epoch": 0.30476626947754354, "grad_norm": 0.09403149038553238, "learning_rate": 3.4890639838556507e-05, "loss": 0.0193, "step": 41230 }, { "epoch": 0.3048401880488454, "grad_norm": 0.06702283024787903, "learning_rate": 3.48869301994302e-05, "loss": 0.0171, "step": 41240 }, { "epoch": 0.30491410662014723, "grad_norm": 0.08411083370447159, "learning_rate": 3.48832205603039e-05, "loss": 0.021, "step": 41250 }, { "epoch": 0.3049880251914491, "grad_norm": 0.08013994246721268, "learning_rate": 3.487951092117759e-05, "loss": 0.0192, "step": 41260 }, { "epoch": 0.305061943762751, "grad_norm": 0.08897579461336136, "learning_rate": 3.4875801282051284e-05, "loss": 0.0197, "step": 41270 }, { "epoch": 0.3051358623340528, "grad_norm": 0.08514079451560974, "learning_rate": 3.487209164292497e-05, "loss": 0.0197, "step": 41280 }, { "epoch": 0.30520978090535467, "grad_norm": 0.09539547562599182, "learning_rate": 3.486838200379867e-05, "loss": 0.0178, "step": 41290 }, { "epoch": 0.3052836994766565, "grad_norm": 0.086562380194664, "learning_rate": 3.486467236467237e-05, "loss": 0.0165, "step": 41300 }, { "epoch": 0.30535761804795836, "grad_norm": 0.13276004791259766, "learning_rate": 3.486096272554606e-05, "loss": 0.0169, "step": 41310 }, { "epoch": 0.3054315366192602, "grad_norm": 0.11075304448604584, "learning_rate": 3.485725308641976e-05, "loss": 0.0191, "step": 41320 }, { "epoch": 0.3055054551905621, "grad_norm": 0.102408267557621, "learning_rate": 3.4853543447293446e-05, "loss": 0.0188, "step": 41330 }, { "epoch": 0.30557937376186395, "grad_norm": 0.11963898688554764, "learning_rate": 3.484983380816714e-05, "loss": 0.0199, "step": 41340 }, { "epoch": 0.3056532923331658, "grad_norm": 0.10946375876665115, "learning_rate": 3.484612416904084e-05, "loss": 0.0193, "step": 41350 }, { "epoch": 0.30572721090446764, "grad_norm": 0.10319077968597412, "learning_rate": 3.484241452991453e-05, "loss": 0.0178, "step": 41360 }, { "epoch": 0.3058011294757695, "grad_norm": 0.09708920121192932, "learning_rate": 3.483870489078822e-05, "loss": 0.0172, "step": 41370 }, { "epoch": 0.30587504804707133, "grad_norm": 0.06438388675451279, "learning_rate": 3.4834995251661926e-05, "loss": 0.0181, "step": 41380 }, { "epoch": 0.3059489666183732, "grad_norm": 0.09373542666435242, "learning_rate": 3.4831285612535615e-05, "loss": 0.019, "step": 41390 }, { "epoch": 0.3060228851896751, "grad_norm": 0.07884660363197327, "learning_rate": 3.482757597340931e-05, "loss": 0.0173, "step": 41400 }, { "epoch": 0.3060968037609769, "grad_norm": 0.10232368856668472, "learning_rate": 3.4823866334283e-05, "loss": 0.02, "step": 41410 }, { "epoch": 0.30617072233227877, "grad_norm": 0.08321083337068558, "learning_rate": 3.4820156695156696e-05, "loss": 0.0161, "step": 41420 }, { "epoch": 0.3062446409035806, "grad_norm": 0.09922542423009872, "learning_rate": 3.481644705603039e-05, "loss": 0.0178, "step": 41430 }, { "epoch": 0.30631855947488246, "grad_norm": 0.08980782330036163, "learning_rate": 3.481273741690408e-05, "loss": 0.0188, "step": 41440 }, { "epoch": 0.3063924780461843, "grad_norm": 0.06885354965925217, "learning_rate": 3.4809027777777784e-05, "loss": 0.0186, "step": 41450 }, { "epoch": 0.3064663966174862, "grad_norm": 0.11679676920175552, "learning_rate": 3.480531813865147e-05, "loss": 0.0178, "step": 41460 }, { "epoch": 0.30654031518878805, "grad_norm": 0.09281063824892044, "learning_rate": 3.480160849952517e-05, "loss": 0.0199, "step": 41470 }, { "epoch": 0.3066142337600899, "grad_norm": 0.06953068822622299, "learning_rate": 3.4797898860398865e-05, "loss": 0.0205, "step": 41480 }, { "epoch": 0.30668815233139174, "grad_norm": 0.09276734292507172, "learning_rate": 3.4794189221272554e-05, "loss": 0.0194, "step": 41490 }, { "epoch": 0.3067620709026936, "grad_norm": 0.08479952067136765, "learning_rate": 3.479047958214625e-05, "loss": 0.0178, "step": 41500 }, { "epoch": 0.30683598947399543, "grad_norm": 0.07465128600597382, "learning_rate": 3.478676994301994e-05, "loss": 0.0187, "step": 41510 }, { "epoch": 0.3069099080452973, "grad_norm": 0.11329672485589981, "learning_rate": 3.4783060303893635e-05, "loss": 0.0181, "step": 41520 }, { "epoch": 0.3069838266165992, "grad_norm": 0.06531966477632523, "learning_rate": 3.477935066476734e-05, "loss": 0.019, "step": 41530 }, { "epoch": 0.307057745187901, "grad_norm": 0.09387999027967453, "learning_rate": 3.477564102564103e-05, "loss": 0.0192, "step": 41540 }, { "epoch": 0.30713166375920287, "grad_norm": 0.06443177163600922, "learning_rate": 3.477193138651472e-05, "loss": 0.0194, "step": 41550 }, { "epoch": 0.3072055823305047, "grad_norm": 0.10215765982866287, "learning_rate": 3.476822174738841e-05, "loss": 0.0194, "step": 41560 }, { "epoch": 0.30727950090180656, "grad_norm": 0.09269960969686508, "learning_rate": 3.476451210826211e-05, "loss": 0.0199, "step": 41570 }, { "epoch": 0.3073534194731084, "grad_norm": 0.08733227849006653, "learning_rate": 3.4760802469135804e-05, "loss": 0.0182, "step": 41580 }, { "epoch": 0.3074273380444103, "grad_norm": 0.08594011515378952, "learning_rate": 3.4757092830009494e-05, "loss": 0.02, "step": 41590 }, { "epoch": 0.30750125661571215, "grad_norm": 0.07571780681610107, "learning_rate": 3.4753383190883196e-05, "loss": 0.0209, "step": 41600 }, { "epoch": 0.307575175187014, "grad_norm": 0.07805407047271729, "learning_rate": 3.474967355175689e-05, "loss": 0.0199, "step": 41610 }, { "epoch": 0.30764909375831584, "grad_norm": 0.07116065174341202, "learning_rate": 3.474596391263058e-05, "loss": 0.0166, "step": 41620 }, { "epoch": 0.3077230123296177, "grad_norm": 0.07087753713130951, "learning_rate": 3.474225427350428e-05, "loss": 0.0215, "step": 41630 }, { "epoch": 0.30779693090091953, "grad_norm": 0.09777181595563889, "learning_rate": 3.473854463437797e-05, "loss": 0.0177, "step": 41640 }, { "epoch": 0.3078708494722214, "grad_norm": 0.07996222376823425, "learning_rate": 3.473483499525166e-05, "loss": 0.0201, "step": 41650 }, { "epoch": 0.3079447680435233, "grad_norm": 0.08959174901247025, "learning_rate": 3.473112535612536e-05, "loss": 0.0205, "step": 41660 }, { "epoch": 0.3080186866148251, "grad_norm": 0.11086247861385345, "learning_rate": 3.472741571699905e-05, "loss": 0.0192, "step": 41670 }, { "epoch": 0.30809260518612697, "grad_norm": 0.08845746517181396, "learning_rate": 3.472370607787275e-05, "loss": 0.0188, "step": 41680 }, { "epoch": 0.3081665237574288, "grad_norm": 0.08229666203260422, "learning_rate": 3.471999643874644e-05, "loss": 0.0191, "step": 41690 }, { "epoch": 0.30824044232873066, "grad_norm": 0.10134036093950272, "learning_rate": 3.4716286799620136e-05, "loss": 0.0199, "step": 41700 }, { "epoch": 0.3083143609000325, "grad_norm": 0.09389904886484146, "learning_rate": 3.471257716049383e-05, "loss": 0.0169, "step": 41710 }, { "epoch": 0.3083882794713344, "grad_norm": 0.1407119482755661, "learning_rate": 3.470886752136752e-05, "loss": 0.0218, "step": 41720 }, { "epoch": 0.30846219804263625, "grad_norm": 0.08079833537340164, "learning_rate": 3.470515788224122e-05, "loss": 0.0166, "step": 41730 }, { "epoch": 0.3085361166139381, "grad_norm": 0.07268788665533066, "learning_rate": 3.4701448243114906e-05, "loss": 0.0193, "step": 41740 }, { "epoch": 0.30861003518523994, "grad_norm": 0.08180305361747742, "learning_rate": 3.469773860398861e-05, "loss": 0.0193, "step": 41750 }, { "epoch": 0.3086839537565418, "grad_norm": 0.0704931765794754, "learning_rate": 3.4694028964862305e-05, "loss": 0.0174, "step": 41760 }, { "epoch": 0.30875787232784363, "grad_norm": 0.12671099603176117, "learning_rate": 3.4690319325735994e-05, "loss": 0.0169, "step": 41770 }, { "epoch": 0.3088317908991455, "grad_norm": 0.07578154653310776, "learning_rate": 3.468660968660969e-05, "loss": 0.016, "step": 41780 }, { "epoch": 0.3089057094704474, "grad_norm": 0.05910816416144371, "learning_rate": 3.468290004748338e-05, "loss": 0.0177, "step": 41790 }, { "epoch": 0.3089796280417492, "grad_norm": 0.1280171275138855, "learning_rate": 3.4679190408357075e-05, "loss": 0.0186, "step": 41800 }, { "epoch": 0.30905354661305107, "grad_norm": 0.07991659641265869, "learning_rate": 3.467548076923077e-05, "loss": 0.0178, "step": 41810 }, { "epoch": 0.3091274651843529, "grad_norm": 0.10132498294115067, "learning_rate": 3.467177113010446e-05, "loss": 0.0168, "step": 41820 }, { "epoch": 0.30920138375565476, "grad_norm": 0.08080621808767319, "learning_rate": 3.466806149097816e-05, "loss": 0.0168, "step": 41830 }, { "epoch": 0.3092753023269566, "grad_norm": 0.08449237048625946, "learning_rate": 3.466435185185186e-05, "loss": 0.0185, "step": 41840 }, { "epoch": 0.3093492208982585, "grad_norm": 0.08588672429323196, "learning_rate": 3.466064221272555e-05, "loss": 0.0179, "step": 41850 }, { "epoch": 0.30942313946956035, "grad_norm": 0.08748660981655121, "learning_rate": 3.4656932573599244e-05, "loss": 0.0216, "step": 41860 }, { "epoch": 0.3094970580408622, "grad_norm": 0.0776829645037651, "learning_rate": 3.465322293447293e-05, "loss": 0.0189, "step": 41870 }, { "epoch": 0.30957097661216404, "grad_norm": 0.05737544223666191, "learning_rate": 3.464951329534663e-05, "loss": 0.0193, "step": 41880 }, { "epoch": 0.3096448951834659, "grad_norm": 0.08774904906749725, "learning_rate": 3.4645803656220325e-05, "loss": 0.0207, "step": 41890 }, { "epoch": 0.30971881375476773, "grad_norm": 0.10542219877243042, "learning_rate": 3.4642094017094014e-05, "loss": 0.0182, "step": 41900 }, { "epoch": 0.3097927323260696, "grad_norm": 0.08650589734315872, "learning_rate": 3.463838437796772e-05, "loss": 0.0195, "step": 41910 }, { "epoch": 0.3098666508973715, "grad_norm": 0.07177089154720306, "learning_rate": 3.4634674738841406e-05, "loss": 0.0187, "step": 41920 }, { "epoch": 0.3099405694686733, "grad_norm": 0.09856868535280228, "learning_rate": 3.46309650997151e-05, "loss": 0.0192, "step": 41930 }, { "epoch": 0.31001448803997517, "grad_norm": 0.09403856843709946, "learning_rate": 3.46272554605888e-05, "loss": 0.0198, "step": 41940 }, { "epoch": 0.310088406611277, "grad_norm": 0.09185963124036789, "learning_rate": 3.462354582146249e-05, "loss": 0.0191, "step": 41950 }, { "epoch": 0.31016232518257886, "grad_norm": 0.08827266097068787, "learning_rate": 3.4619836182336183e-05, "loss": 0.0171, "step": 41960 }, { "epoch": 0.3102362437538807, "grad_norm": 0.10931427776813507, "learning_rate": 3.461612654320987e-05, "loss": 0.0182, "step": 41970 }, { "epoch": 0.3103101623251826, "grad_norm": 0.08554702252149582, "learning_rate": 3.4612416904083575e-05, "loss": 0.017, "step": 41980 }, { "epoch": 0.31038408089648445, "grad_norm": 0.0926201343536377, "learning_rate": 3.460870726495727e-05, "loss": 0.02, "step": 41990 }, { "epoch": 0.3104579994677863, "grad_norm": 0.08251968026161194, "learning_rate": 3.460499762583096e-05, "loss": 0.021, "step": 42000 }, { "epoch": 0.31053191803908814, "grad_norm": 0.1010434478521347, "learning_rate": 3.4601287986704657e-05, "loss": 0.0203, "step": 42010 }, { "epoch": 0.31060583661039, "grad_norm": 0.07276196032762527, "learning_rate": 3.4597578347578346e-05, "loss": 0.0184, "step": 42020 }, { "epoch": 0.31067975518169183, "grad_norm": 0.08228261023759842, "learning_rate": 3.459386870845204e-05, "loss": 0.0182, "step": 42030 }, { "epoch": 0.3107536737529937, "grad_norm": 0.07231447100639343, "learning_rate": 3.459015906932574e-05, "loss": 0.0183, "step": 42040 }, { "epoch": 0.3108275923242956, "grad_norm": 0.08942629396915436, "learning_rate": 3.458644943019943e-05, "loss": 0.0182, "step": 42050 }, { "epoch": 0.3109015108955974, "grad_norm": 0.07905402779579163, "learning_rate": 3.458273979107313e-05, "loss": 0.0201, "step": 42060 }, { "epoch": 0.31097542946689927, "grad_norm": 0.08255128562450409, "learning_rate": 3.4579030151946826e-05, "loss": 0.016, "step": 42070 }, { "epoch": 0.3110493480382011, "grad_norm": 0.07505086809396744, "learning_rate": 3.4575320512820515e-05, "loss": 0.0213, "step": 42080 }, { "epoch": 0.31112326660950296, "grad_norm": 0.07689981907606125, "learning_rate": 3.457161087369421e-05, "loss": 0.0186, "step": 42090 }, { "epoch": 0.3111971851808048, "grad_norm": 0.08315929025411606, "learning_rate": 3.45679012345679e-05, "loss": 0.0177, "step": 42100 }, { "epoch": 0.3112711037521067, "grad_norm": 0.09151861816644669, "learning_rate": 3.4564191595441596e-05, "loss": 0.0172, "step": 42110 }, { "epoch": 0.31134502232340855, "grad_norm": 0.11061935126781464, "learning_rate": 3.456048195631529e-05, "loss": 0.0223, "step": 42120 }, { "epoch": 0.3114189408947104, "grad_norm": 0.061550162732601166, "learning_rate": 3.455677231718899e-05, "loss": 0.0176, "step": 42130 }, { "epoch": 0.31149285946601224, "grad_norm": 0.11352083832025528, "learning_rate": 3.4553062678062684e-05, "loss": 0.0201, "step": 42140 }, { "epoch": 0.3115667780373141, "grad_norm": 0.12292362749576569, "learning_rate": 3.454935303893637e-05, "loss": 0.0197, "step": 42150 }, { "epoch": 0.31164069660861593, "grad_norm": 0.09067011624574661, "learning_rate": 3.454564339981007e-05, "loss": 0.0181, "step": 42160 }, { "epoch": 0.3117146151799178, "grad_norm": 0.08751238137483597, "learning_rate": 3.4541933760683765e-05, "loss": 0.0175, "step": 42170 }, { "epoch": 0.3117885337512197, "grad_norm": 0.11258953809738159, "learning_rate": 3.4538224121557454e-05, "loss": 0.0208, "step": 42180 }, { "epoch": 0.3118624523225215, "grad_norm": 0.06748968362808228, "learning_rate": 3.453451448243115e-05, "loss": 0.0176, "step": 42190 }, { "epoch": 0.31193637089382337, "grad_norm": 0.0773598849773407, "learning_rate": 3.453080484330484e-05, "loss": 0.0198, "step": 42200 }, { "epoch": 0.3120102894651252, "grad_norm": 0.09891007095575333, "learning_rate": 3.452709520417854e-05, "loss": 0.0183, "step": 42210 }, { "epoch": 0.31208420803642706, "grad_norm": 0.06439699977636337, "learning_rate": 3.452338556505224e-05, "loss": 0.0182, "step": 42220 }, { "epoch": 0.3121581266077289, "grad_norm": 0.09395837038755417, "learning_rate": 3.451967592592593e-05, "loss": 0.0199, "step": 42230 }, { "epoch": 0.3122320451790308, "grad_norm": 0.06609996408224106, "learning_rate": 3.451596628679962e-05, "loss": 0.0182, "step": 42240 }, { "epoch": 0.31230596375033265, "grad_norm": 0.13297858834266663, "learning_rate": 3.451225664767331e-05, "loss": 0.0208, "step": 42250 }, { "epoch": 0.3123798823216345, "grad_norm": 0.07555203139781952, "learning_rate": 3.450854700854701e-05, "loss": 0.017, "step": 42260 }, { "epoch": 0.31245380089293634, "grad_norm": 0.09166613966226578, "learning_rate": 3.4504837369420704e-05, "loss": 0.0201, "step": 42270 }, { "epoch": 0.3125277194642382, "grad_norm": 0.08490381389856339, "learning_rate": 3.45011277302944e-05, "loss": 0.0175, "step": 42280 }, { "epoch": 0.31260163803554003, "grad_norm": 0.09918981790542603, "learning_rate": 3.4497418091168096e-05, "loss": 0.0193, "step": 42290 }, { "epoch": 0.3126755566068419, "grad_norm": 0.09711208939552307, "learning_rate": 3.449370845204179e-05, "loss": 0.022, "step": 42300 }, { "epoch": 0.3127494751781438, "grad_norm": 0.08405983448028564, "learning_rate": 3.448999881291548e-05, "loss": 0.0179, "step": 42310 }, { "epoch": 0.3128233937494456, "grad_norm": 0.0851854532957077, "learning_rate": 3.448628917378918e-05, "loss": 0.0159, "step": 42320 }, { "epoch": 0.31289731232074747, "grad_norm": 0.09262275695800781, "learning_rate": 3.4482579534662866e-05, "loss": 0.0186, "step": 42330 }, { "epoch": 0.3129712308920493, "grad_norm": 0.08604475110769272, "learning_rate": 3.447886989553656e-05, "loss": 0.0219, "step": 42340 }, { "epoch": 0.31304514946335116, "grad_norm": 0.10185009241104126, "learning_rate": 3.447516025641026e-05, "loss": 0.0192, "step": 42350 }, { "epoch": 0.313119068034653, "grad_norm": 0.10257411748170853, "learning_rate": 3.4471450617283954e-05, "loss": 0.0196, "step": 42360 }, { "epoch": 0.3131929866059549, "grad_norm": 0.07268723100423813, "learning_rate": 3.446774097815765e-05, "loss": 0.0179, "step": 42370 }, { "epoch": 0.31326690517725675, "grad_norm": 0.07031035423278809, "learning_rate": 3.446403133903134e-05, "loss": 0.0182, "step": 42380 }, { "epoch": 0.3133408237485586, "grad_norm": 0.08224672079086304, "learning_rate": 3.4460321699905035e-05, "loss": 0.0175, "step": 42390 }, { "epoch": 0.31341474231986044, "grad_norm": 0.10886122286319733, "learning_rate": 3.445661206077873e-05, "loss": 0.0185, "step": 42400 }, { "epoch": 0.3134886608911623, "grad_norm": 0.10665310174226761, "learning_rate": 3.445290242165242e-05, "loss": 0.0197, "step": 42410 }, { "epoch": 0.31356257946246413, "grad_norm": 0.08749476820230484, "learning_rate": 3.444919278252612e-05, "loss": 0.0183, "step": 42420 }, { "epoch": 0.313636498033766, "grad_norm": 0.08360803872346878, "learning_rate": 3.444548314339981e-05, "loss": 0.0192, "step": 42430 }, { "epoch": 0.3137104166050679, "grad_norm": 0.07620719075202942, "learning_rate": 3.444177350427351e-05, "loss": 0.0173, "step": 42440 }, { "epoch": 0.3137843351763697, "grad_norm": 0.08754304051399231, "learning_rate": 3.4438063865147205e-05, "loss": 0.0189, "step": 42450 }, { "epoch": 0.31385825374767157, "grad_norm": 0.09048190712928772, "learning_rate": 3.4434354226020894e-05, "loss": 0.0195, "step": 42460 }, { "epoch": 0.3139321723189734, "grad_norm": 0.08618180453777313, "learning_rate": 3.443064458689459e-05, "loss": 0.0193, "step": 42470 }, { "epoch": 0.31400609089027526, "grad_norm": 0.11144917458295822, "learning_rate": 3.442693494776828e-05, "loss": 0.0202, "step": 42480 }, { "epoch": 0.3140800094615771, "grad_norm": 0.10908270627260208, "learning_rate": 3.4423225308641975e-05, "loss": 0.0192, "step": 42490 }, { "epoch": 0.314153928032879, "grad_norm": 0.09473959356546402, "learning_rate": 3.441951566951567e-05, "loss": 0.0179, "step": 42500 }, { "epoch": 0.31422784660418085, "grad_norm": 0.11607281863689423, "learning_rate": 3.441580603038937e-05, "loss": 0.0172, "step": 42510 }, { "epoch": 0.3143017651754827, "grad_norm": 0.0758347287774086, "learning_rate": 3.441209639126306e-05, "loss": 0.0177, "step": 42520 }, { "epoch": 0.31437568374678454, "grad_norm": 0.11561296880245209, "learning_rate": 3.440838675213676e-05, "loss": 0.0186, "step": 42530 }, { "epoch": 0.3144496023180864, "grad_norm": 0.09926974773406982, "learning_rate": 3.440467711301045e-05, "loss": 0.0176, "step": 42540 }, { "epoch": 0.31452352088938823, "grad_norm": 0.06947681307792664, "learning_rate": 3.4400967473884144e-05, "loss": 0.0181, "step": 42550 }, { "epoch": 0.31459743946069013, "grad_norm": 0.08658704906702042, "learning_rate": 3.439725783475783e-05, "loss": 0.0196, "step": 42560 }, { "epoch": 0.314671358031992, "grad_norm": 0.09495535492897034, "learning_rate": 3.439354819563153e-05, "loss": 0.0183, "step": 42570 }, { "epoch": 0.3147452766032938, "grad_norm": 0.07387121021747589, "learning_rate": 3.4389838556505225e-05, "loss": 0.0173, "step": 42580 }, { "epoch": 0.31481919517459567, "grad_norm": 0.08743169158697128, "learning_rate": 3.438612891737892e-05, "loss": 0.0203, "step": 42590 }, { "epoch": 0.3148931137458975, "grad_norm": 0.06493943184614182, "learning_rate": 3.438241927825262e-05, "loss": 0.0181, "step": 42600 }, { "epoch": 0.31496703231719936, "grad_norm": 0.09590958058834076, "learning_rate": 3.4378709639126306e-05, "loss": 0.0191, "step": 42610 }, { "epoch": 0.3150409508885012, "grad_norm": 0.11012791097164154, "learning_rate": 3.4375e-05, "loss": 0.0212, "step": 42620 }, { "epoch": 0.3151148694598031, "grad_norm": 0.09544768929481506, "learning_rate": 3.43712903608737e-05, "loss": 0.0197, "step": 42630 }, { "epoch": 0.31518878803110495, "grad_norm": 0.09332182258367538, "learning_rate": 3.436758072174739e-05, "loss": 0.0205, "step": 42640 }, { "epoch": 0.3152627066024068, "grad_norm": 0.0923343226313591, "learning_rate": 3.436387108262108e-05, "loss": 0.0183, "step": 42650 }, { "epoch": 0.31533662517370864, "grad_norm": 0.0833817794919014, "learning_rate": 3.436016144349478e-05, "loss": 0.0217, "step": 42660 }, { "epoch": 0.3154105437450105, "grad_norm": 0.08704043924808502, "learning_rate": 3.4356451804368475e-05, "loss": 0.02, "step": 42670 }, { "epoch": 0.31548446231631233, "grad_norm": 0.0813233032822609, "learning_rate": 3.435274216524217e-05, "loss": 0.0198, "step": 42680 }, { "epoch": 0.31555838088761423, "grad_norm": 0.08583715558052063, "learning_rate": 3.434903252611586e-05, "loss": 0.0181, "step": 42690 }, { "epoch": 0.3156322994589161, "grad_norm": 0.06066381186246872, "learning_rate": 3.4345322886989556e-05, "loss": 0.0191, "step": 42700 }, { "epoch": 0.3157062180302179, "grad_norm": 0.10361328721046448, "learning_rate": 3.4341613247863245e-05, "loss": 0.0198, "step": 42710 }, { "epoch": 0.31578013660151977, "grad_norm": 0.09133805334568024, "learning_rate": 3.433790360873694e-05, "loss": 0.0206, "step": 42720 }, { "epoch": 0.3158540551728216, "grad_norm": 0.09569969773292542, "learning_rate": 3.433419396961064e-05, "loss": 0.0211, "step": 42730 }, { "epoch": 0.31592797374412346, "grad_norm": 0.0903606042265892, "learning_rate": 3.433048433048433e-05, "loss": 0.0195, "step": 42740 }, { "epoch": 0.3160018923154253, "grad_norm": 0.06401552259922028, "learning_rate": 3.432677469135803e-05, "loss": 0.0175, "step": 42750 }, { "epoch": 0.3160758108867272, "grad_norm": 0.11987832933664322, "learning_rate": 3.4323065052231725e-05, "loss": 0.0171, "step": 42760 }, { "epoch": 0.31614972945802905, "grad_norm": 0.10897868871688843, "learning_rate": 3.4319355413105414e-05, "loss": 0.0185, "step": 42770 }, { "epoch": 0.3162236480293309, "grad_norm": 0.15359020233154297, "learning_rate": 3.431564577397911e-05, "loss": 0.0183, "step": 42780 }, { "epoch": 0.31629756660063274, "grad_norm": 0.06444056332111359, "learning_rate": 3.43119361348528e-05, "loss": 0.0184, "step": 42790 }, { "epoch": 0.3163714851719346, "grad_norm": 0.10911993682384491, "learning_rate": 3.4308226495726496e-05, "loss": 0.0175, "step": 42800 }, { "epoch": 0.31644540374323643, "grad_norm": 0.07998649030923843, "learning_rate": 3.430451685660019e-05, "loss": 0.0189, "step": 42810 }, { "epoch": 0.31651932231453833, "grad_norm": 0.09834294766187668, "learning_rate": 3.430080721747389e-05, "loss": 0.0177, "step": 42820 }, { "epoch": 0.3165932408858402, "grad_norm": 0.08719082176685333, "learning_rate": 3.4297097578347583e-05, "loss": 0.0195, "step": 42830 }, { "epoch": 0.316667159457142, "grad_norm": 0.12308837473392487, "learning_rate": 3.429338793922127e-05, "loss": 0.0178, "step": 42840 }, { "epoch": 0.31674107802844387, "grad_norm": 0.11948662251234055, "learning_rate": 3.428967830009497e-05, "loss": 0.0175, "step": 42850 }, { "epoch": 0.3168149965997457, "grad_norm": 0.07762668281793594, "learning_rate": 3.4285968660968665e-05, "loss": 0.0192, "step": 42860 }, { "epoch": 0.31688891517104756, "grad_norm": 0.09460771828889847, "learning_rate": 3.4282259021842354e-05, "loss": 0.0186, "step": 42870 }, { "epoch": 0.3169628337423494, "grad_norm": 0.07193564623594284, "learning_rate": 3.427854938271605e-05, "loss": 0.0189, "step": 42880 }, { "epoch": 0.3170367523136513, "grad_norm": 0.09276462346315384, "learning_rate": 3.4274839743589746e-05, "loss": 0.0175, "step": 42890 }, { "epoch": 0.31711067088495315, "grad_norm": 0.0885123535990715, "learning_rate": 3.427113010446344e-05, "loss": 0.0195, "step": 42900 }, { "epoch": 0.317184589456255, "grad_norm": 0.09436597675085068, "learning_rate": 3.426742046533714e-05, "loss": 0.0188, "step": 42910 }, { "epoch": 0.31725850802755684, "grad_norm": 0.07258325815200806, "learning_rate": 3.426371082621083e-05, "loss": 0.0178, "step": 42920 }, { "epoch": 0.3173324265988587, "grad_norm": 0.06716993451118469, "learning_rate": 3.426000118708452e-05, "loss": 0.0203, "step": 42930 }, { "epoch": 0.31740634517016053, "grad_norm": 0.08646200597286224, "learning_rate": 3.425629154795821e-05, "loss": 0.0198, "step": 42940 }, { "epoch": 0.31748026374146243, "grad_norm": 0.06386014074087143, "learning_rate": 3.425258190883191e-05, "loss": 0.017, "step": 42950 }, { "epoch": 0.3175541823127643, "grad_norm": 0.06515224277973175, "learning_rate": 3.4248872269705604e-05, "loss": 0.0179, "step": 42960 }, { "epoch": 0.3176281008840661, "grad_norm": 0.08746074140071869, "learning_rate": 3.42451626305793e-05, "loss": 0.0181, "step": 42970 }, { "epoch": 0.31770201945536797, "grad_norm": 0.08505991101264954, "learning_rate": 3.4241452991452996e-05, "loss": 0.0207, "step": 42980 }, { "epoch": 0.3177759380266698, "grad_norm": 0.08368141204118729, "learning_rate": 3.423774335232669e-05, "loss": 0.0183, "step": 42990 }, { "epoch": 0.31784985659797166, "grad_norm": 0.09414701908826828, "learning_rate": 3.423403371320038e-05, "loss": 0.0212, "step": 43000 }, { "epoch": 0.3179237751692735, "grad_norm": 0.09063467383384705, "learning_rate": 3.423032407407408e-05, "loss": 0.0183, "step": 43010 }, { "epoch": 0.3179976937405754, "grad_norm": 0.07970460504293442, "learning_rate": 3.4226614434947766e-05, "loss": 0.0227, "step": 43020 }, { "epoch": 0.31807161231187725, "grad_norm": 0.09081530570983887, "learning_rate": 3.422290479582146e-05, "loss": 0.0208, "step": 43030 }, { "epoch": 0.3181455308831791, "grad_norm": 0.08632205426692963, "learning_rate": 3.421919515669516e-05, "loss": 0.018, "step": 43040 }, { "epoch": 0.31821944945448094, "grad_norm": 0.06210676580667496, "learning_rate": 3.4215485517568854e-05, "loss": 0.0173, "step": 43050 }, { "epoch": 0.3182933680257828, "grad_norm": 0.09494701772928238, "learning_rate": 3.421177587844255e-05, "loss": 0.0188, "step": 43060 }, { "epoch": 0.31836728659708463, "grad_norm": 0.11246080696582794, "learning_rate": 3.420806623931624e-05, "loss": 0.0165, "step": 43070 }, { "epoch": 0.31844120516838653, "grad_norm": 0.11079894006252289, "learning_rate": 3.4204356600189935e-05, "loss": 0.0204, "step": 43080 }, { "epoch": 0.3185151237396884, "grad_norm": 0.30354100465774536, "learning_rate": 3.420064696106363e-05, "loss": 0.0207, "step": 43090 }, { "epoch": 0.3185890423109902, "grad_norm": 0.11234261095523834, "learning_rate": 3.419693732193732e-05, "loss": 0.0185, "step": 43100 }, { "epoch": 0.31866296088229207, "grad_norm": 0.09679894149303436, "learning_rate": 3.4193227682811016e-05, "loss": 0.0205, "step": 43110 }, { "epoch": 0.3187368794535939, "grad_norm": 0.10255374014377594, "learning_rate": 3.418951804368471e-05, "loss": 0.019, "step": 43120 }, { "epoch": 0.31881079802489576, "grad_norm": 0.09295099973678589, "learning_rate": 3.418580840455841e-05, "loss": 0.0179, "step": 43130 }, { "epoch": 0.3188847165961976, "grad_norm": 0.06588831543922424, "learning_rate": 3.4182098765432104e-05, "loss": 0.0185, "step": 43140 }, { "epoch": 0.3189586351674995, "grad_norm": 0.0991397351026535, "learning_rate": 3.4178389126305793e-05, "loss": 0.0199, "step": 43150 }, { "epoch": 0.31903255373880135, "grad_norm": 0.10081097483634949, "learning_rate": 3.417467948717949e-05, "loss": 0.0228, "step": 43160 }, { "epoch": 0.3191064723101032, "grad_norm": 0.09069965034723282, "learning_rate": 3.417096984805318e-05, "loss": 0.0166, "step": 43170 }, { "epoch": 0.31918039088140504, "grad_norm": 0.1002868041396141, "learning_rate": 3.4167260208926875e-05, "loss": 0.0216, "step": 43180 }, { "epoch": 0.3192543094527069, "grad_norm": 0.07414255291223526, "learning_rate": 3.416355056980057e-05, "loss": 0.0209, "step": 43190 }, { "epoch": 0.31932822802400873, "grad_norm": 0.08768635988235474, "learning_rate": 3.4159840930674267e-05, "loss": 0.0202, "step": 43200 }, { "epoch": 0.31940214659531063, "grad_norm": 0.08148328959941864, "learning_rate": 3.415613129154796e-05, "loss": 0.0211, "step": 43210 }, { "epoch": 0.3194760651666125, "grad_norm": 0.07470358908176422, "learning_rate": 3.415242165242166e-05, "loss": 0.0181, "step": 43220 }, { "epoch": 0.3195499837379143, "grad_norm": 0.07791826874017715, "learning_rate": 3.414871201329535e-05, "loss": 0.019, "step": 43230 }, { "epoch": 0.31962390230921617, "grad_norm": 0.07294123619794846, "learning_rate": 3.4145002374169044e-05, "loss": 0.0179, "step": 43240 }, { "epoch": 0.319697820880518, "grad_norm": 0.08214742690324783, "learning_rate": 3.414129273504273e-05, "loss": 0.0196, "step": 43250 }, { "epoch": 0.31977173945181986, "grad_norm": 0.08051939308643341, "learning_rate": 3.413758309591643e-05, "loss": 0.0198, "step": 43260 }, { "epoch": 0.3198456580231217, "grad_norm": 0.06528766453266144, "learning_rate": 3.4133873456790125e-05, "loss": 0.0174, "step": 43270 }, { "epoch": 0.3199195765944236, "grad_norm": 0.06888923048973083, "learning_rate": 3.413016381766382e-05, "loss": 0.0186, "step": 43280 }, { "epoch": 0.31999349516572545, "grad_norm": 0.06578348577022552, "learning_rate": 3.412645417853752e-05, "loss": 0.0164, "step": 43290 }, { "epoch": 0.3200674137370273, "grad_norm": 0.1435423642396927, "learning_rate": 3.4122744539411206e-05, "loss": 0.0201, "step": 43300 }, { "epoch": 0.32014133230832914, "grad_norm": 0.09891865402460098, "learning_rate": 3.41190349002849e-05, "loss": 0.0185, "step": 43310 }, { "epoch": 0.320215250879631, "grad_norm": 0.09307089447975159, "learning_rate": 3.41153252611586e-05, "loss": 0.0183, "step": 43320 }, { "epoch": 0.32028916945093283, "grad_norm": 0.14205126464366913, "learning_rate": 3.411161562203229e-05, "loss": 0.0214, "step": 43330 }, { "epoch": 0.32036308802223473, "grad_norm": 0.08174362778663635, "learning_rate": 3.410790598290598e-05, "loss": 0.0191, "step": 43340 }, { "epoch": 0.3204370065935366, "grad_norm": 0.06301737576723099, "learning_rate": 3.410419634377968e-05, "loss": 0.0156, "step": 43350 }, { "epoch": 0.3205109251648384, "grad_norm": 0.07327570766210556, "learning_rate": 3.4100486704653375e-05, "loss": 0.0186, "step": 43360 }, { "epoch": 0.32058484373614027, "grad_norm": 0.07892131060361862, "learning_rate": 3.409677706552707e-05, "loss": 0.0183, "step": 43370 }, { "epoch": 0.3206587623074421, "grad_norm": 0.10537906736135483, "learning_rate": 3.409306742640076e-05, "loss": 0.0181, "step": 43380 }, { "epoch": 0.32073268087874396, "grad_norm": 0.07635634392499924, "learning_rate": 3.4089357787274456e-05, "loss": 0.0193, "step": 43390 }, { "epoch": 0.3208065994500458, "grad_norm": 0.09027914702892303, "learning_rate": 3.4085648148148145e-05, "loss": 0.0216, "step": 43400 }, { "epoch": 0.3208805180213477, "grad_norm": 0.09778694808483124, "learning_rate": 3.408193850902184e-05, "loss": 0.0193, "step": 43410 }, { "epoch": 0.32095443659264955, "grad_norm": 0.071172334253788, "learning_rate": 3.407822886989554e-05, "loss": 0.0182, "step": 43420 }, { "epoch": 0.3210283551639514, "grad_norm": 0.08770966529846191, "learning_rate": 3.407451923076923e-05, "loss": 0.0195, "step": 43430 }, { "epoch": 0.32110227373525324, "grad_norm": 0.08739007264375687, "learning_rate": 3.407080959164293e-05, "loss": 0.0172, "step": 43440 }, { "epoch": 0.3211761923065551, "grad_norm": 0.0868479385972023, "learning_rate": 3.4067099952516625e-05, "loss": 0.017, "step": 43450 }, { "epoch": 0.32125011087785693, "grad_norm": 0.09316948801279068, "learning_rate": 3.4063390313390314e-05, "loss": 0.0195, "step": 43460 }, { "epoch": 0.32132402944915883, "grad_norm": 0.07854902744293213, "learning_rate": 3.405968067426401e-05, "loss": 0.0193, "step": 43470 }, { "epoch": 0.3213979480204607, "grad_norm": 0.08497950434684753, "learning_rate": 3.40559710351377e-05, "loss": 0.0192, "step": 43480 }, { "epoch": 0.3214718665917625, "grad_norm": 0.10147658735513687, "learning_rate": 3.4052261396011395e-05, "loss": 0.0187, "step": 43490 }, { "epoch": 0.32154578516306437, "grad_norm": 0.06862959265708923, "learning_rate": 3.404855175688509e-05, "loss": 0.0167, "step": 43500 }, { "epoch": 0.3216197037343662, "grad_norm": 0.1020599827170372, "learning_rate": 3.404484211775879e-05, "loss": 0.018, "step": 43510 }, { "epoch": 0.32169362230566806, "grad_norm": 0.09441424161195755, "learning_rate": 3.404113247863248e-05, "loss": 0.0197, "step": 43520 }, { "epoch": 0.3217675408769699, "grad_norm": 0.0880255475640297, "learning_rate": 3.403742283950617e-05, "loss": 0.0199, "step": 43530 }, { "epoch": 0.3218414594482718, "grad_norm": 0.0700983926653862, "learning_rate": 3.403371320037987e-05, "loss": 0.0181, "step": 43540 }, { "epoch": 0.32191537801957365, "grad_norm": 0.09148949384689331, "learning_rate": 3.4030003561253564e-05, "loss": 0.0192, "step": 43550 }, { "epoch": 0.3219892965908755, "grad_norm": 0.1262521594762802, "learning_rate": 3.4026293922127254e-05, "loss": 0.0187, "step": 43560 }, { "epoch": 0.32206321516217734, "grad_norm": 0.11587070673704147, "learning_rate": 3.402258428300095e-05, "loss": 0.0188, "step": 43570 }, { "epoch": 0.3221371337334792, "grad_norm": 0.09726160764694214, "learning_rate": 3.4018874643874645e-05, "loss": 0.0173, "step": 43580 }, { "epoch": 0.32221105230478103, "grad_norm": 0.09375026077032089, "learning_rate": 3.401516500474834e-05, "loss": 0.0172, "step": 43590 }, { "epoch": 0.32228497087608293, "grad_norm": 0.08319017291069031, "learning_rate": 3.401145536562204e-05, "loss": 0.0194, "step": 43600 }, { "epoch": 0.3223588894473848, "grad_norm": 0.07451638579368591, "learning_rate": 3.400774572649573e-05, "loss": 0.018, "step": 43610 }, { "epoch": 0.3224328080186866, "grad_norm": 0.07658302038908005, "learning_rate": 3.400403608736942e-05, "loss": 0.0192, "step": 43620 }, { "epoch": 0.32250672658998847, "grad_norm": 0.11052345484495163, "learning_rate": 3.400032644824311e-05, "loss": 0.0204, "step": 43630 }, { "epoch": 0.3225806451612903, "grad_norm": 0.09942183643579483, "learning_rate": 3.399661680911681e-05, "loss": 0.0198, "step": 43640 }, { "epoch": 0.32265456373259216, "grad_norm": 0.08108251541852951, "learning_rate": 3.3992907169990504e-05, "loss": 0.0183, "step": 43650 }, { "epoch": 0.322728482303894, "grad_norm": 0.10380920022726059, "learning_rate": 3.39891975308642e-05, "loss": 0.0213, "step": 43660 }, { "epoch": 0.3228024008751959, "grad_norm": 0.08956963568925858, "learning_rate": 3.3985487891737896e-05, "loss": 0.0186, "step": 43670 }, { "epoch": 0.32287631944649775, "grad_norm": 0.11362124979496002, "learning_rate": 3.398177825261159e-05, "loss": 0.0178, "step": 43680 }, { "epoch": 0.3229502380177996, "grad_norm": 0.05954618379473686, "learning_rate": 3.397806861348528e-05, "loss": 0.0185, "step": 43690 }, { "epoch": 0.32302415658910144, "grad_norm": 0.06360287964344025, "learning_rate": 3.397435897435898e-05, "loss": 0.018, "step": 43700 }, { "epoch": 0.3230980751604033, "grad_norm": 0.08753181248903275, "learning_rate": 3.3970649335232666e-05, "loss": 0.0178, "step": 43710 }, { "epoch": 0.32317199373170513, "grad_norm": 0.09147054702043533, "learning_rate": 3.396693969610636e-05, "loss": 0.0201, "step": 43720 }, { "epoch": 0.32324591230300703, "grad_norm": 0.0729442685842514, "learning_rate": 3.396323005698006e-05, "loss": 0.0169, "step": 43730 }, { "epoch": 0.3233198308743089, "grad_norm": 0.08728668093681335, "learning_rate": 3.3959520417853754e-05, "loss": 0.0201, "step": 43740 }, { "epoch": 0.3233937494456107, "grad_norm": 0.07614386081695557, "learning_rate": 3.395581077872745e-05, "loss": 0.0204, "step": 43750 }, { "epoch": 0.32346766801691257, "grad_norm": 0.09705346822738647, "learning_rate": 3.395210113960114e-05, "loss": 0.0211, "step": 43760 }, { "epoch": 0.3235415865882144, "grad_norm": 0.07552920281887054, "learning_rate": 3.3948391500474835e-05, "loss": 0.0189, "step": 43770 }, { "epoch": 0.32361550515951626, "grad_norm": 0.08856528252363205, "learning_rate": 3.394468186134853e-05, "loss": 0.0183, "step": 43780 }, { "epoch": 0.3236894237308181, "grad_norm": 0.07442531734704971, "learning_rate": 3.394097222222222e-05, "loss": 0.019, "step": 43790 }, { "epoch": 0.32376334230212, "grad_norm": 0.06279998272657394, "learning_rate": 3.3937262583095916e-05, "loss": 0.0189, "step": 43800 }, { "epoch": 0.32383726087342185, "grad_norm": 0.060555700212717056, "learning_rate": 3.393355294396961e-05, "loss": 0.0185, "step": 43810 }, { "epoch": 0.3239111794447237, "grad_norm": 0.06599309295415878, "learning_rate": 3.392984330484331e-05, "loss": 0.02, "step": 43820 }, { "epoch": 0.32398509801602554, "grad_norm": 0.05925067141652107, "learning_rate": 3.3926133665717004e-05, "loss": 0.0166, "step": 43830 }, { "epoch": 0.3240590165873274, "grad_norm": 0.0998731404542923, "learning_rate": 3.392242402659069e-05, "loss": 0.0179, "step": 43840 }, { "epoch": 0.32413293515862923, "grad_norm": 0.12187574058771133, "learning_rate": 3.391871438746439e-05, "loss": 0.0197, "step": 43850 }, { "epoch": 0.32420685372993113, "grad_norm": 0.07652764022350311, "learning_rate": 3.391500474833808e-05, "loss": 0.0178, "step": 43860 }, { "epoch": 0.324280772301233, "grad_norm": 0.08620169758796692, "learning_rate": 3.3911295109211774e-05, "loss": 0.0178, "step": 43870 }, { "epoch": 0.3243546908725348, "grad_norm": 0.09348164498806, "learning_rate": 3.390758547008547e-05, "loss": 0.0191, "step": 43880 }, { "epoch": 0.32442860944383667, "grad_norm": 0.07452642172574997, "learning_rate": 3.3903875830959166e-05, "loss": 0.0185, "step": 43890 }, { "epoch": 0.3245025280151385, "grad_norm": 0.07998265326023102, "learning_rate": 3.390016619183286e-05, "loss": 0.0176, "step": 43900 }, { "epoch": 0.32457644658644036, "grad_norm": 0.10004127770662308, "learning_rate": 3.389645655270656e-05, "loss": 0.0188, "step": 43910 }, { "epoch": 0.3246503651577422, "grad_norm": 0.0779428631067276, "learning_rate": 3.389274691358025e-05, "loss": 0.0186, "step": 43920 }, { "epoch": 0.3247242837290441, "grad_norm": 0.08592027425765991, "learning_rate": 3.388903727445394e-05, "loss": 0.0187, "step": 43930 }, { "epoch": 0.32479820230034595, "grad_norm": 0.09659634530544281, "learning_rate": 3.388532763532763e-05, "loss": 0.0218, "step": 43940 }, { "epoch": 0.3248721208716478, "grad_norm": 0.08549503237009048, "learning_rate": 3.388161799620133e-05, "loss": 0.0172, "step": 43950 }, { "epoch": 0.32494603944294964, "grad_norm": 0.07176053524017334, "learning_rate": 3.3877908357075024e-05, "loss": 0.0169, "step": 43960 }, { "epoch": 0.3250199580142515, "grad_norm": 0.09597663581371307, "learning_rate": 3.387419871794872e-05, "loss": 0.0182, "step": 43970 }, { "epoch": 0.32509387658555333, "grad_norm": 0.09322880953550339, "learning_rate": 3.3870489078822416e-05, "loss": 0.0192, "step": 43980 }, { "epoch": 0.32516779515685523, "grad_norm": 0.08309384435415268, "learning_rate": 3.3866779439696106e-05, "loss": 0.019, "step": 43990 }, { "epoch": 0.3252417137281571, "grad_norm": 0.08086520433425903, "learning_rate": 3.38630698005698e-05, "loss": 0.0196, "step": 44000 }, { "epoch": 0.3253156322994589, "grad_norm": 0.07314729690551758, "learning_rate": 3.38593601614435e-05, "loss": 0.0203, "step": 44010 }, { "epoch": 0.32538955087076077, "grad_norm": 0.08843156695365906, "learning_rate": 3.385565052231719e-05, "loss": 0.0184, "step": 44020 }, { "epoch": 0.3254634694420626, "grad_norm": 0.13710302114486694, "learning_rate": 3.385194088319088e-05, "loss": 0.0187, "step": 44030 }, { "epoch": 0.32553738801336446, "grad_norm": 0.06512115895748138, "learning_rate": 3.384823124406458e-05, "loss": 0.0186, "step": 44040 }, { "epoch": 0.3256113065846663, "grad_norm": 0.09785745292901993, "learning_rate": 3.3844521604938275e-05, "loss": 0.0159, "step": 44050 }, { "epoch": 0.3256852251559682, "grad_norm": 0.09728438407182693, "learning_rate": 3.384081196581197e-05, "loss": 0.0203, "step": 44060 }, { "epoch": 0.32575914372727005, "grad_norm": 0.0847766101360321, "learning_rate": 3.383710232668566e-05, "loss": 0.0181, "step": 44070 }, { "epoch": 0.3258330622985719, "grad_norm": 0.06976700574159622, "learning_rate": 3.3833392687559356e-05, "loss": 0.0194, "step": 44080 }, { "epoch": 0.32590698086987374, "grad_norm": 0.08276432007551193, "learning_rate": 3.3829683048433045e-05, "loss": 0.0192, "step": 44090 }, { "epoch": 0.3259808994411756, "grad_norm": 0.0673220157623291, "learning_rate": 3.382597340930674e-05, "loss": 0.0191, "step": 44100 }, { "epoch": 0.32605481801247743, "grad_norm": 0.07937823235988617, "learning_rate": 3.382226377018044e-05, "loss": 0.0177, "step": 44110 }, { "epoch": 0.32612873658377933, "grad_norm": 0.07397346198558807, "learning_rate": 3.381855413105413e-05, "loss": 0.019, "step": 44120 }, { "epoch": 0.3262026551550812, "grad_norm": 0.07269817590713501, "learning_rate": 3.381484449192783e-05, "loss": 0.0164, "step": 44130 }, { "epoch": 0.326276573726383, "grad_norm": 0.07143591344356537, "learning_rate": 3.3811134852801525e-05, "loss": 0.0194, "step": 44140 }, { "epoch": 0.32635049229768487, "grad_norm": 0.10848372429609299, "learning_rate": 3.3807425213675214e-05, "loss": 0.019, "step": 44150 }, { "epoch": 0.3264244108689867, "grad_norm": 0.08963832259178162, "learning_rate": 3.380371557454891e-05, "loss": 0.0202, "step": 44160 }, { "epoch": 0.32649832944028856, "grad_norm": 0.0764172300696373, "learning_rate": 3.38000059354226e-05, "loss": 0.0172, "step": 44170 }, { "epoch": 0.3265722480115904, "grad_norm": 0.08552178740501404, "learning_rate": 3.3796296296296295e-05, "loss": 0.0185, "step": 44180 }, { "epoch": 0.3266461665828923, "grad_norm": 0.1096053346991539, "learning_rate": 3.379258665717e-05, "loss": 0.0171, "step": 44190 }, { "epoch": 0.32672008515419415, "grad_norm": 0.16093607246875763, "learning_rate": 3.378887701804369e-05, "loss": 0.0178, "step": 44200 }, { "epoch": 0.326794003725496, "grad_norm": 0.07053223252296448, "learning_rate": 3.378516737891738e-05, "loss": 0.02, "step": 44210 }, { "epoch": 0.32686792229679784, "grad_norm": 0.1485041230916977, "learning_rate": 3.378145773979107e-05, "loss": 0.019, "step": 44220 }, { "epoch": 0.3269418408680997, "grad_norm": 0.07003764808177948, "learning_rate": 3.377774810066477e-05, "loss": 0.0166, "step": 44230 }, { "epoch": 0.32701575943940153, "grad_norm": 0.07502686232328415, "learning_rate": 3.3774038461538464e-05, "loss": 0.02, "step": 44240 }, { "epoch": 0.32708967801070343, "grad_norm": 0.08239313215017319, "learning_rate": 3.377032882241215e-05, "loss": 0.0203, "step": 44250 }, { "epoch": 0.3271635965820053, "grad_norm": 0.09772242605686188, "learning_rate": 3.376661918328585e-05, "loss": 0.0189, "step": 44260 }, { "epoch": 0.3272375151533071, "grad_norm": 0.11429288238286972, "learning_rate": 3.3762909544159545e-05, "loss": 0.0182, "step": 44270 }, { "epoch": 0.32731143372460897, "grad_norm": 0.0794249027967453, "learning_rate": 3.375919990503324e-05, "loss": 0.0176, "step": 44280 }, { "epoch": 0.3273853522959108, "grad_norm": 0.07520075887441635, "learning_rate": 3.375549026590694e-05, "loss": 0.0162, "step": 44290 }, { "epoch": 0.32745927086721266, "grad_norm": 0.0852094441652298, "learning_rate": 3.3751780626780626e-05, "loss": 0.0177, "step": 44300 }, { "epoch": 0.3275331894385145, "grad_norm": 0.09648241102695465, "learning_rate": 3.374807098765432e-05, "loss": 0.0215, "step": 44310 }, { "epoch": 0.3276071080098164, "grad_norm": 0.06515844166278839, "learning_rate": 3.374436134852801e-05, "loss": 0.0176, "step": 44320 }, { "epoch": 0.32768102658111825, "grad_norm": 0.1179385706782341, "learning_rate": 3.374065170940171e-05, "loss": 0.0173, "step": 44330 }, { "epoch": 0.3277549451524201, "grad_norm": 0.0831533819437027, "learning_rate": 3.373694207027541e-05, "loss": 0.0195, "step": 44340 }, { "epoch": 0.32782886372372194, "grad_norm": 0.11352211236953735, "learning_rate": 3.37332324311491e-05, "loss": 0.0205, "step": 44350 }, { "epoch": 0.3279027822950238, "grad_norm": 0.09942604601383209, "learning_rate": 3.3729522792022795e-05, "loss": 0.0167, "step": 44360 }, { "epoch": 0.32797670086632563, "grad_norm": 0.10662192106246948, "learning_rate": 3.372581315289649e-05, "loss": 0.0189, "step": 44370 }, { "epoch": 0.32805061943762753, "grad_norm": 0.11508975178003311, "learning_rate": 3.372210351377018e-05, "loss": 0.018, "step": 44380 }, { "epoch": 0.3281245380089294, "grad_norm": 0.1185436099767685, "learning_rate": 3.3718393874643877e-05, "loss": 0.0184, "step": 44390 }, { "epoch": 0.3281984565802312, "grad_norm": 0.08700943738222122, "learning_rate": 3.3714684235517566e-05, "loss": 0.0203, "step": 44400 }, { "epoch": 0.32827237515153307, "grad_norm": 0.0730283334851265, "learning_rate": 3.371097459639126e-05, "loss": 0.018, "step": 44410 }, { "epoch": 0.3283462937228349, "grad_norm": 0.067600317299366, "learning_rate": 3.3707264957264964e-05, "loss": 0.0175, "step": 44420 }, { "epoch": 0.32842021229413676, "grad_norm": 0.6665230989456177, "learning_rate": 3.3703555318138654e-05, "loss": 0.0235, "step": 44430 }, { "epoch": 0.32849413086543866, "grad_norm": 0.08568233251571655, "learning_rate": 3.369984567901235e-05, "loss": 0.0174, "step": 44440 }, { "epoch": 0.3285680494367405, "grad_norm": 0.060386039316654205, "learning_rate": 3.369613603988604e-05, "loss": 0.0179, "step": 44450 }, { "epoch": 0.32864196800804235, "grad_norm": 0.10326115787029266, "learning_rate": 3.3692426400759735e-05, "loss": 0.02, "step": 44460 }, { "epoch": 0.3287158865793442, "grad_norm": 0.06419151276350021, "learning_rate": 3.368871676163343e-05, "loss": 0.0203, "step": 44470 }, { "epoch": 0.32878980515064604, "grad_norm": 0.11964228004217148, "learning_rate": 3.368500712250712e-05, "loss": 0.0186, "step": 44480 }, { "epoch": 0.3288637237219479, "grad_norm": 0.08400668948888779, "learning_rate": 3.368129748338082e-05, "loss": 0.0185, "step": 44490 }, { "epoch": 0.32893764229324973, "grad_norm": 0.06105771288275719, "learning_rate": 3.367758784425451e-05, "loss": 0.0189, "step": 44500 }, { "epoch": 0.32901156086455163, "grad_norm": 0.09602080285549164, "learning_rate": 3.367387820512821e-05, "loss": 0.0164, "step": 44510 }, { "epoch": 0.3290854794358535, "grad_norm": 0.09508220851421356, "learning_rate": 3.3670168566001904e-05, "loss": 0.0196, "step": 44520 }, { "epoch": 0.3291593980071553, "grad_norm": 0.06339730322360992, "learning_rate": 3.366645892687559e-05, "loss": 0.0169, "step": 44530 }, { "epoch": 0.32923331657845717, "grad_norm": 0.10049261897802353, "learning_rate": 3.366274928774929e-05, "loss": 0.0179, "step": 44540 }, { "epoch": 0.329307235149759, "grad_norm": 0.07951238006353378, "learning_rate": 3.365903964862298e-05, "loss": 0.0209, "step": 44550 }, { "epoch": 0.32938115372106086, "grad_norm": 0.0696750357747078, "learning_rate": 3.3655330009496674e-05, "loss": 0.0184, "step": 44560 }, { "epoch": 0.32945507229236276, "grad_norm": 0.07983843982219696, "learning_rate": 3.365162037037038e-05, "loss": 0.0183, "step": 44570 }, { "epoch": 0.3295289908636646, "grad_norm": 0.08632298558950424, "learning_rate": 3.3647910731244066e-05, "loss": 0.0198, "step": 44580 }, { "epoch": 0.32960290943496645, "grad_norm": 0.08244680613279343, "learning_rate": 3.364420109211776e-05, "loss": 0.0165, "step": 44590 }, { "epoch": 0.3296768280062683, "grad_norm": 0.07452484965324402, "learning_rate": 3.364049145299146e-05, "loss": 0.0175, "step": 44600 }, { "epoch": 0.32975074657757014, "grad_norm": 0.06344713270664215, "learning_rate": 3.363678181386515e-05, "loss": 0.0182, "step": 44610 }, { "epoch": 0.329824665148872, "grad_norm": 0.13326257467269897, "learning_rate": 3.363307217473884e-05, "loss": 0.0213, "step": 44620 }, { "epoch": 0.32989858372017383, "grad_norm": 0.08266934752464294, "learning_rate": 3.362936253561253e-05, "loss": 0.0192, "step": 44630 }, { "epoch": 0.32997250229147573, "grad_norm": 0.07077519595623016, "learning_rate": 3.362565289648623e-05, "loss": 0.0199, "step": 44640 }, { "epoch": 0.3300464208627776, "grad_norm": 0.06713325530290604, "learning_rate": 3.362194325735993e-05, "loss": 0.0177, "step": 44650 }, { "epoch": 0.3301203394340794, "grad_norm": 0.10795174539089203, "learning_rate": 3.361823361823362e-05, "loss": 0.021, "step": 44660 }, { "epoch": 0.33019425800538127, "grad_norm": 0.11786088347434998, "learning_rate": 3.3614523979107316e-05, "loss": 0.0188, "step": 44670 }, { "epoch": 0.3302681765766831, "grad_norm": 0.09778054803609848, "learning_rate": 3.3610814339981005e-05, "loss": 0.0181, "step": 44680 }, { "epoch": 0.33034209514798496, "grad_norm": 0.10623161494731903, "learning_rate": 3.36071047008547e-05, "loss": 0.0195, "step": 44690 }, { "epoch": 0.33041601371928686, "grad_norm": 0.06341532617807388, "learning_rate": 3.36033950617284e-05, "loss": 0.0162, "step": 44700 }, { "epoch": 0.3304899322905887, "grad_norm": 0.08555193245410919, "learning_rate": 3.3599685422602086e-05, "loss": 0.0205, "step": 44710 }, { "epoch": 0.33056385086189055, "grad_norm": 0.10713338106870651, "learning_rate": 3.359597578347579e-05, "loss": 0.019, "step": 44720 }, { "epoch": 0.3306377694331924, "grad_norm": 0.0889672115445137, "learning_rate": 3.359226614434948e-05, "loss": 0.017, "step": 44730 }, { "epoch": 0.33071168800449424, "grad_norm": 0.1061701849102974, "learning_rate": 3.3588556505223174e-05, "loss": 0.0199, "step": 44740 }, { "epoch": 0.3307856065757961, "grad_norm": 0.0591684989631176, "learning_rate": 3.358484686609687e-05, "loss": 0.0203, "step": 44750 }, { "epoch": 0.33085952514709793, "grad_norm": 0.12138590216636658, "learning_rate": 3.358113722697056e-05, "loss": 0.0188, "step": 44760 }, { "epoch": 0.33093344371839983, "grad_norm": 0.06759916245937347, "learning_rate": 3.3577427587844256e-05, "loss": 0.0168, "step": 44770 }, { "epoch": 0.3310073622897017, "grad_norm": 0.07580891996622086, "learning_rate": 3.3573717948717945e-05, "loss": 0.0194, "step": 44780 }, { "epoch": 0.3310812808610035, "grad_norm": 0.10968596488237381, "learning_rate": 3.357000830959164e-05, "loss": 0.0163, "step": 44790 }, { "epoch": 0.33115519943230537, "grad_norm": 0.0971807911992073, "learning_rate": 3.3566298670465343e-05, "loss": 0.0205, "step": 44800 }, { "epoch": 0.3312291180036072, "grad_norm": 0.1008811816573143, "learning_rate": 3.356258903133903e-05, "loss": 0.0167, "step": 44810 }, { "epoch": 0.33130303657490906, "grad_norm": 0.09538344293832779, "learning_rate": 3.355887939221273e-05, "loss": 0.0166, "step": 44820 }, { "epoch": 0.33137695514621096, "grad_norm": 0.07427355647087097, "learning_rate": 3.3555169753086425e-05, "loss": 0.0202, "step": 44830 }, { "epoch": 0.3314508737175128, "grad_norm": 0.09792878478765488, "learning_rate": 3.3551460113960114e-05, "loss": 0.0205, "step": 44840 }, { "epoch": 0.33152479228881465, "grad_norm": 0.07082540541887283, "learning_rate": 3.354775047483381e-05, "loss": 0.0192, "step": 44850 }, { "epoch": 0.3315987108601165, "grad_norm": 0.08246816694736481, "learning_rate": 3.35440408357075e-05, "loss": 0.0182, "step": 44860 }, { "epoch": 0.33167262943141834, "grad_norm": 0.09148363769054413, "learning_rate": 3.35403311965812e-05, "loss": 0.0187, "step": 44870 }, { "epoch": 0.3317465480027202, "grad_norm": 0.07163092494010925, "learning_rate": 3.35366215574549e-05, "loss": 0.0208, "step": 44880 }, { "epoch": 0.33182046657402203, "grad_norm": 0.05248361453413963, "learning_rate": 3.353291191832859e-05, "loss": 0.0186, "step": 44890 }, { "epoch": 0.33189438514532393, "grad_norm": 0.08499059826135635, "learning_rate": 3.352920227920228e-05, "loss": 0.021, "step": 44900 }, { "epoch": 0.3319683037166258, "grad_norm": 0.09064117074012756, "learning_rate": 3.352549264007597e-05, "loss": 0.0226, "step": 44910 }, { "epoch": 0.3320422222879276, "grad_norm": 0.09755630791187286, "learning_rate": 3.352178300094967e-05, "loss": 0.0193, "step": 44920 }, { "epoch": 0.33211614085922947, "grad_norm": 0.1002260372042656, "learning_rate": 3.3518073361823364e-05, "loss": 0.0193, "step": 44930 }, { "epoch": 0.3321900594305313, "grad_norm": 0.0829191580414772, "learning_rate": 3.351436372269705e-05, "loss": 0.0199, "step": 44940 }, { "epoch": 0.33226397800183316, "grad_norm": 0.1327805370092392, "learning_rate": 3.3510654083570756e-05, "loss": 0.0187, "step": 44950 }, { "epoch": 0.33233789657313506, "grad_norm": 0.09947414696216583, "learning_rate": 3.3506944444444445e-05, "loss": 0.0203, "step": 44960 }, { "epoch": 0.3324118151444369, "grad_norm": 0.0643693134188652, "learning_rate": 3.350323480531814e-05, "loss": 0.0225, "step": 44970 }, { "epoch": 0.33248573371573875, "grad_norm": 0.10599484294652939, "learning_rate": 3.349952516619184e-05, "loss": 0.0188, "step": 44980 }, { "epoch": 0.3325596522870406, "grad_norm": 0.0734916552901268, "learning_rate": 3.3495815527065526e-05, "loss": 0.0186, "step": 44990 }, { "epoch": 0.33263357085834244, "grad_norm": 0.08003656566143036, "learning_rate": 3.349210588793922e-05, "loss": 0.0176, "step": 45000 }, { "epoch": 0.3327074894296443, "grad_norm": 0.08142461627721786, "learning_rate": 3.348839624881291e-05, "loss": 0.0183, "step": 45010 }, { "epoch": 0.33278140800094613, "grad_norm": 0.08227168768644333, "learning_rate": 3.3484686609686614e-05, "loss": 0.0182, "step": 45020 }, { "epoch": 0.33285532657224803, "grad_norm": 0.10435879230499268, "learning_rate": 3.348097697056031e-05, "loss": 0.0207, "step": 45030 }, { "epoch": 0.3329292451435499, "grad_norm": 0.09841688722372055, "learning_rate": 3.3477267331434e-05, "loss": 0.0174, "step": 45040 }, { "epoch": 0.3330031637148517, "grad_norm": 0.12566670775413513, "learning_rate": 3.3473557692307695e-05, "loss": 0.0212, "step": 45050 }, { "epoch": 0.33307708228615357, "grad_norm": 0.06397977471351624, "learning_rate": 3.346984805318139e-05, "loss": 0.0152, "step": 45060 }, { "epoch": 0.3331510008574554, "grad_norm": 0.10317044705152512, "learning_rate": 3.346613841405508e-05, "loss": 0.0219, "step": 45070 }, { "epoch": 0.33322491942875726, "grad_norm": 0.11175418645143509, "learning_rate": 3.3462428774928776e-05, "loss": 0.018, "step": 45080 }, { "epoch": 0.33329883800005916, "grad_norm": 0.09719778597354889, "learning_rate": 3.3458719135802465e-05, "loss": 0.0186, "step": 45090 }, { "epoch": 0.333372756571361, "grad_norm": 0.09944400191307068, "learning_rate": 3.345500949667617e-05, "loss": 0.0196, "step": 45100 }, { "epoch": 0.33344667514266285, "grad_norm": 0.1216149777173996, "learning_rate": 3.3451299857549864e-05, "loss": 0.019, "step": 45110 }, { "epoch": 0.3335205937139647, "grad_norm": 0.07389708608388901, "learning_rate": 3.344759021842355e-05, "loss": 0.0178, "step": 45120 }, { "epoch": 0.33359451228526654, "grad_norm": 0.13755720853805542, "learning_rate": 3.344388057929725e-05, "loss": 0.0182, "step": 45130 }, { "epoch": 0.3336684308565684, "grad_norm": 0.0727863758802414, "learning_rate": 3.344017094017094e-05, "loss": 0.0176, "step": 45140 }, { "epoch": 0.33374234942787023, "grad_norm": 0.1026555746793747, "learning_rate": 3.3436461301044634e-05, "loss": 0.0185, "step": 45150 }, { "epoch": 0.33381626799917213, "grad_norm": 0.06983400881290436, "learning_rate": 3.343275166191833e-05, "loss": 0.0173, "step": 45160 }, { "epoch": 0.333890186570474, "grad_norm": 0.11022666841745377, "learning_rate": 3.3429042022792026e-05, "loss": 0.0186, "step": 45170 }, { "epoch": 0.3339641051417758, "grad_norm": 0.06704510748386383, "learning_rate": 3.342533238366572e-05, "loss": 0.0195, "step": 45180 }, { "epoch": 0.33403802371307767, "grad_norm": 0.09733101725578308, "learning_rate": 3.342162274453941e-05, "loss": 0.0188, "step": 45190 }, { "epoch": 0.3341119422843795, "grad_norm": 0.11707833409309387, "learning_rate": 3.341791310541311e-05, "loss": 0.019, "step": 45200 }, { "epoch": 0.33418586085568136, "grad_norm": 0.08068402111530304, "learning_rate": 3.3414203466286804e-05, "loss": 0.02, "step": 45210 }, { "epoch": 0.33425977942698326, "grad_norm": 0.08782497048377991, "learning_rate": 3.341049382716049e-05, "loss": 0.0181, "step": 45220 }, { "epoch": 0.3343336979982851, "grad_norm": 0.0925891250371933, "learning_rate": 3.340678418803419e-05, "loss": 0.0188, "step": 45230 }, { "epoch": 0.33440761656958695, "grad_norm": 0.10016071051359177, "learning_rate": 3.340307454890788e-05, "loss": 0.0207, "step": 45240 }, { "epoch": 0.3344815351408888, "grad_norm": 0.09942296147346497, "learning_rate": 3.339936490978158e-05, "loss": 0.018, "step": 45250 }, { "epoch": 0.33455545371219064, "grad_norm": 0.07284510135650635, "learning_rate": 3.3395655270655277e-05, "loss": 0.0201, "step": 45260 }, { "epoch": 0.3346293722834925, "grad_norm": 0.08632287383079529, "learning_rate": 3.3391945631528966e-05, "loss": 0.0187, "step": 45270 }, { "epoch": 0.33470329085479433, "grad_norm": 0.07172413915395737, "learning_rate": 3.338823599240266e-05, "loss": 0.0208, "step": 45280 }, { "epoch": 0.33477720942609623, "grad_norm": 0.06224941462278366, "learning_rate": 3.338452635327636e-05, "loss": 0.0162, "step": 45290 }, { "epoch": 0.3348511279973981, "grad_norm": 0.09879124909639359, "learning_rate": 3.338081671415005e-05, "loss": 0.02, "step": 45300 }, { "epoch": 0.3349250465686999, "grad_norm": 0.1068207249045372, "learning_rate": 3.337710707502374e-05, "loss": 0.0176, "step": 45310 }, { "epoch": 0.33499896514000177, "grad_norm": 0.08688890188932419, "learning_rate": 3.337339743589744e-05, "loss": 0.021, "step": 45320 }, { "epoch": 0.3350728837113036, "grad_norm": 0.07991108298301697, "learning_rate": 3.3369687796771135e-05, "loss": 0.0176, "step": 45330 }, { "epoch": 0.33514680228260546, "grad_norm": 0.07760392129421234, "learning_rate": 3.336597815764483e-05, "loss": 0.0162, "step": 45340 }, { "epoch": 0.33522072085390736, "grad_norm": 0.10568118095397949, "learning_rate": 3.336226851851852e-05, "loss": 0.0175, "step": 45350 }, { "epoch": 0.3352946394252092, "grad_norm": 0.14464734494686127, "learning_rate": 3.3358558879392216e-05, "loss": 0.0178, "step": 45360 }, { "epoch": 0.33536855799651105, "grad_norm": 0.10452807694673538, "learning_rate": 3.3354849240265905e-05, "loss": 0.021, "step": 45370 }, { "epoch": 0.3354424765678129, "grad_norm": 0.06442765146493912, "learning_rate": 3.33511396011396e-05, "loss": 0.0183, "step": 45380 }, { "epoch": 0.33551639513911474, "grad_norm": 0.08903558552265167, "learning_rate": 3.33474299620133e-05, "loss": 0.0192, "step": 45390 }, { "epoch": 0.3355903137104166, "grad_norm": 0.09148462116718292, "learning_rate": 3.334372032288699e-05, "loss": 0.0186, "step": 45400 }, { "epoch": 0.33566423228171843, "grad_norm": 0.10632915049791336, "learning_rate": 3.334001068376069e-05, "loss": 0.0195, "step": 45410 }, { "epoch": 0.33573815085302033, "grad_norm": 0.0853031724691391, "learning_rate": 3.333630104463438e-05, "loss": 0.019, "step": 45420 }, { "epoch": 0.3358120694243222, "grad_norm": 0.08535119891166687, "learning_rate": 3.3332591405508074e-05, "loss": 0.0196, "step": 45430 }, { "epoch": 0.335885987995624, "grad_norm": 0.07558152079582214, "learning_rate": 3.332888176638177e-05, "loss": 0.0177, "step": 45440 }, { "epoch": 0.33595990656692587, "grad_norm": 0.0958716869354248, "learning_rate": 3.332517212725546e-05, "loss": 0.0176, "step": 45450 }, { "epoch": 0.3360338251382277, "grad_norm": 0.06695028394460678, "learning_rate": 3.3321462488129155e-05, "loss": 0.0174, "step": 45460 }, { "epoch": 0.33610774370952956, "grad_norm": 0.08938926458358765, "learning_rate": 3.331775284900285e-05, "loss": 0.0199, "step": 45470 }, { "epoch": 0.33618166228083146, "grad_norm": 0.07885266840457916, "learning_rate": 3.331404320987655e-05, "loss": 0.0167, "step": 45480 }, { "epoch": 0.3362555808521333, "grad_norm": 0.0742824599146843, "learning_rate": 3.331033357075024e-05, "loss": 0.0195, "step": 45490 }, { "epoch": 0.33632949942343515, "grad_norm": 0.07376478612422943, "learning_rate": 3.330662393162393e-05, "loss": 0.0191, "step": 45500 }, { "epoch": 0.336403417994737, "grad_norm": 0.06707164645195007, "learning_rate": 3.330291429249763e-05, "loss": 0.0183, "step": 45510 }, { "epoch": 0.33647733656603884, "grad_norm": 0.08750803023576736, "learning_rate": 3.3299204653371324e-05, "loss": 0.0181, "step": 45520 }, { "epoch": 0.3365512551373407, "grad_norm": 0.06902256608009338, "learning_rate": 3.3295495014245013e-05, "loss": 0.0197, "step": 45530 }, { "epoch": 0.33662517370864253, "grad_norm": 0.09635186940431595, "learning_rate": 3.329178537511871e-05, "loss": 0.0196, "step": 45540 }, { "epoch": 0.33669909227994443, "grad_norm": 0.12827034294605255, "learning_rate": 3.3288075735992405e-05, "loss": 0.0166, "step": 45550 }, { "epoch": 0.3367730108512463, "grad_norm": 0.07784906029701233, "learning_rate": 3.32843660968661e-05, "loss": 0.018, "step": 45560 }, { "epoch": 0.3368469294225481, "grad_norm": 0.06968273967504501, "learning_rate": 3.32806564577398e-05, "loss": 0.0177, "step": 45570 }, { "epoch": 0.33692084799384997, "grad_norm": 0.10143133252859116, "learning_rate": 3.3276946818613487e-05, "loss": 0.02, "step": 45580 }, { "epoch": 0.3369947665651518, "grad_norm": 0.06562886387109756, "learning_rate": 3.327323717948718e-05, "loss": 0.0186, "step": 45590 }, { "epoch": 0.33706868513645366, "grad_norm": 0.08129129558801651, "learning_rate": 3.326952754036087e-05, "loss": 0.0186, "step": 45600 }, { "epoch": 0.33714260370775556, "grad_norm": 0.09293606877326965, "learning_rate": 3.326581790123457e-05, "loss": 0.0169, "step": 45610 }, { "epoch": 0.3372165222790574, "grad_norm": 0.1033020094037056, "learning_rate": 3.3262108262108264e-05, "loss": 0.0198, "step": 45620 }, { "epoch": 0.33729044085035925, "grad_norm": 0.08983422070741653, "learning_rate": 3.325839862298196e-05, "loss": 0.0184, "step": 45630 }, { "epoch": 0.3373643594216611, "grad_norm": 0.10300349444150925, "learning_rate": 3.3254688983855656e-05, "loss": 0.0176, "step": 45640 }, { "epoch": 0.33743827799296294, "grad_norm": 0.07111994177103043, "learning_rate": 3.3250979344729345e-05, "loss": 0.0191, "step": 45650 }, { "epoch": 0.3375121965642648, "grad_norm": 0.06482949107885361, "learning_rate": 3.324726970560304e-05, "loss": 0.0156, "step": 45660 }, { "epoch": 0.33758611513556663, "grad_norm": 0.09989634156227112, "learning_rate": 3.324356006647674e-05, "loss": 0.019, "step": 45670 }, { "epoch": 0.33766003370686853, "grad_norm": 0.07140668481588364, "learning_rate": 3.3239850427350426e-05, "loss": 0.0186, "step": 45680 }, { "epoch": 0.3377339522781704, "grad_norm": 0.07863501459360123, "learning_rate": 3.323614078822412e-05, "loss": 0.0172, "step": 45690 }, { "epoch": 0.3378078708494722, "grad_norm": 0.0761060044169426, "learning_rate": 3.323243114909782e-05, "loss": 0.0181, "step": 45700 }, { "epoch": 0.33788178942077407, "grad_norm": 0.07722659409046173, "learning_rate": 3.3228721509971514e-05, "loss": 0.0177, "step": 45710 }, { "epoch": 0.3379557079920759, "grad_norm": 0.0757056325674057, "learning_rate": 3.322501187084521e-05, "loss": 0.0185, "step": 45720 }, { "epoch": 0.33802962656337776, "grad_norm": 0.09015494585037231, "learning_rate": 3.32213022317189e-05, "loss": 0.0162, "step": 45730 }, { "epoch": 0.33810354513467966, "grad_norm": 0.09859200567007065, "learning_rate": 3.3217592592592595e-05, "loss": 0.0176, "step": 45740 }, { "epoch": 0.3381774637059815, "grad_norm": 0.08525702357292175, "learning_rate": 3.321388295346629e-05, "loss": 0.0214, "step": 45750 }, { "epoch": 0.33825138227728335, "grad_norm": 0.07817837595939636, "learning_rate": 3.321017331433998e-05, "loss": 0.0172, "step": 45760 }, { "epoch": 0.3383253008485852, "grad_norm": 0.06586714833974838, "learning_rate": 3.3206463675213676e-05, "loss": 0.0174, "step": 45770 }, { "epoch": 0.33839921941988704, "grad_norm": 0.07378669828176498, "learning_rate": 3.320275403608737e-05, "loss": 0.0167, "step": 45780 }, { "epoch": 0.3384731379911889, "grad_norm": 0.09944503009319305, "learning_rate": 3.319904439696107e-05, "loss": 0.0178, "step": 45790 }, { "epoch": 0.33854705656249073, "grad_norm": 0.10161613672971725, "learning_rate": 3.3195334757834764e-05, "loss": 0.0202, "step": 45800 }, { "epoch": 0.33862097513379263, "grad_norm": 0.09542983025312424, "learning_rate": 3.319162511870845e-05, "loss": 0.0191, "step": 45810 }, { "epoch": 0.3386948937050945, "grad_norm": 0.0869336798787117, "learning_rate": 3.318791547958215e-05, "loss": 0.0191, "step": 45820 }, { "epoch": 0.3387688122763963, "grad_norm": 0.13735035061836243, "learning_rate": 3.318420584045584e-05, "loss": 0.021, "step": 45830 }, { "epoch": 0.33884273084769817, "grad_norm": 0.14309273660182953, "learning_rate": 3.3180496201329534e-05, "loss": 0.0202, "step": 45840 }, { "epoch": 0.338916649419, "grad_norm": 0.08051399141550064, "learning_rate": 3.317678656220323e-05, "loss": 0.0196, "step": 45850 }, { "epoch": 0.33899056799030186, "grad_norm": 0.12576155364513397, "learning_rate": 3.3173076923076926e-05, "loss": 0.019, "step": 45860 }, { "epoch": 0.33906448656160376, "grad_norm": 0.0772315189242363, "learning_rate": 3.316936728395062e-05, "loss": 0.0179, "step": 45870 }, { "epoch": 0.3391384051329056, "grad_norm": 0.0837082490324974, "learning_rate": 3.316565764482431e-05, "loss": 0.0191, "step": 45880 }, { "epoch": 0.33921232370420745, "grad_norm": 0.0778370201587677, "learning_rate": 3.316194800569801e-05, "loss": 0.017, "step": 45890 }, { "epoch": 0.3392862422755093, "grad_norm": 0.07121980935335159, "learning_rate": 3.31582383665717e-05, "loss": 0.0203, "step": 45900 }, { "epoch": 0.33936016084681114, "grad_norm": 0.1336478441953659, "learning_rate": 3.315452872744539e-05, "loss": 0.0178, "step": 45910 }, { "epoch": 0.339434079418113, "grad_norm": 0.10090693086385727, "learning_rate": 3.315081908831909e-05, "loss": 0.0193, "step": 45920 }, { "epoch": 0.33950799798941483, "grad_norm": 0.06936877965927124, "learning_rate": 3.3147109449192784e-05, "loss": 0.0164, "step": 45930 }, { "epoch": 0.33958191656071673, "grad_norm": 0.09035806357860565, "learning_rate": 3.314339981006648e-05, "loss": 0.0173, "step": 45940 }, { "epoch": 0.3396558351320186, "grad_norm": 0.08105256408452988, "learning_rate": 3.3139690170940176e-05, "loss": 0.0169, "step": 45950 }, { "epoch": 0.3397297537033204, "grad_norm": 0.10572589933872223, "learning_rate": 3.3135980531813866e-05, "loss": 0.0184, "step": 45960 }, { "epoch": 0.33980367227462227, "grad_norm": 0.1222388744354248, "learning_rate": 3.313227089268756e-05, "loss": 0.0185, "step": 45970 }, { "epoch": 0.3398775908459241, "grad_norm": 0.08362412452697754, "learning_rate": 3.312856125356126e-05, "loss": 0.0184, "step": 45980 }, { "epoch": 0.33995150941722596, "grad_norm": 0.0899990126490593, "learning_rate": 3.312485161443495e-05, "loss": 0.0184, "step": 45990 }, { "epoch": 0.34002542798852786, "grad_norm": 0.06433310359716415, "learning_rate": 3.312114197530864e-05, "loss": 0.0165, "step": 46000 }, { "epoch": 0.3400993465598297, "grad_norm": 0.08536859601736069, "learning_rate": 3.311743233618234e-05, "loss": 0.0173, "step": 46010 }, { "epoch": 0.34017326513113155, "grad_norm": 0.10119964927434921, "learning_rate": 3.3113722697056035e-05, "loss": 0.018, "step": 46020 }, { "epoch": 0.3402471837024334, "grad_norm": 0.07824676483869553, "learning_rate": 3.311001305792973e-05, "loss": 0.0159, "step": 46030 }, { "epoch": 0.34032110227373524, "grad_norm": 0.0952010378241539, "learning_rate": 3.310630341880342e-05, "loss": 0.0178, "step": 46040 }, { "epoch": 0.3403950208450371, "grad_norm": 0.09411787986755371, "learning_rate": 3.3102593779677116e-05, "loss": 0.0201, "step": 46050 }, { "epoch": 0.34046893941633893, "grad_norm": 0.12072297930717468, "learning_rate": 3.3098884140550805e-05, "loss": 0.0205, "step": 46060 }, { "epoch": 0.34054285798764083, "grad_norm": 0.07515337318181992, "learning_rate": 3.30951745014245e-05, "loss": 0.019, "step": 46070 }, { "epoch": 0.3406167765589427, "grad_norm": 0.07915613800287247, "learning_rate": 3.30914648622982e-05, "loss": 0.0167, "step": 46080 }, { "epoch": 0.3406906951302445, "grad_norm": 0.06810886412858963, "learning_rate": 3.308775522317189e-05, "loss": 0.0187, "step": 46090 }, { "epoch": 0.34076461370154637, "grad_norm": 0.05875418707728386, "learning_rate": 3.308404558404559e-05, "loss": 0.0191, "step": 46100 }, { "epoch": 0.3408385322728482, "grad_norm": 0.11320916563272476, "learning_rate": 3.308033594491928e-05, "loss": 0.0191, "step": 46110 }, { "epoch": 0.34091245084415006, "grad_norm": 0.10753299295902252, "learning_rate": 3.3076626305792974e-05, "loss": 0.0196, "step": 46120 }, { "epoch": 0.34098636941545196, "grad_norm": 0.06757346540689468, "learning_rate": 3.307291666666667e-05, "loss": 0.0214, "step": 46130 }, { "epoch": 0.3410602879867538, "grad_norm": 0.06744559854269028, "learning_rate": 3.306920702754036e-05, "loss": 0.022, "step": 46140 }, { "epoch": 0.34113420655805565, "grad_norm": 0.09945414960384369, "learning_rate": 3.3065497388414055e-05, "loss": 0.0208, "step": 46150 }, { "epoch": 0.3412081251293575, "grad_norm": 0.07971841096878052, "learning_rate": 3.306178774928775e-05, "loss": 0.0184, "step": 46160 }, { "epoch": 0.34128204370065934, "grad_norm": 0.09220457077026367, "learning_rate": 3.305807811016145e-05, "loss": 0.0186, "step": 46170 }, { "epoch": 0.3413559622719612, "grad_norm": 0.07157941162586212, "learning_rate": 3.305436847103514e-05, "loss": 0.0177, "step": 46180 }, { "epoch": 0.34142988084326303, "grad_norm": 0.08177167177200317, "learning_rate": 3.305065883190883e-05, "loss": 0.0193, "step": 46190 }, { "epoch": 0.34150379941456493, "grad_norm": 0.09292397648096085, "learning_rate": 3.304694919278253e-05, "loss": 0.0183, "step": 46200 }, { "epoch": 0.3415777179858668, "grad_norm": 0.0946432575583458, "learning_rate": 3.3043239553656224e-05, "loss": 0.0162, "step": 46210 }, { "epoch": 0.3416516365571686, "grad_norm": 0.0911048948764801, "learning_rate": 3.303952991452991e-05, "loss": 0.0205, "step": 46220 }, { "epoch": 0.34172555512847047, "grad_norm": 0.0834997296333313, "learning_rate": 3.303582027540361e-05, "loss": 0.0195, "step": 46230 }, { "epoch": 0.3417994736997723, "grad_norm": 0.12757158279418945, "learning_rate": 3.3032110636277305e-05, "loss": 0.0181, "step": 46240 }, { "epoch": 0.34187339227107416, "grad_norm": 0.10090693831443787, "learning_rate": 3.3028400997151e-05, "loss": 0.0188, "step": 46250 }, { "epoch": 0.34194731084237606, "grad_norm": 0.11772280186414719, "learning_rate": 3.30246913580247e-05, "loss": 0.0188, "step": 46260 }, { "epoch": 0.3420212294136779, "grad_norm": 0.0676715150475502, "learning_rate": 3.3020981718898386e-05, "loss": 0.0188, "step": 46270 }, { "epoch": 0.34209514798497975, "grad_norm": 0.09699052572250366, "learning_rate": 3.301727207977208e-05, "loss": 0.0171, "step": 46280 }, { "epoch": 0.3421690665562816, "grad_norm": 0.07975872606039047, "learning_rate": 3.301356244064577e-05, "loss": 0.0213, "step": 46290 }, { "epoch": 0.34224298512758344, "grad_norm": 0.045385707169771194, "learning_rate": 3.300985280151947e-05, "loss": 0.0188, "step": 46300 }, { "epoch": 0.3423169036988853, "grad_norm": 0.10349560528993607, "learning_rate": 3.300614316239316e-05, "loss": 0.0191, "step": 46310 }, { "epoch": 0.3423908222701872, "grad_norm": 0.08033833652734756, "learning_rate": 3.300243352326686e-05, "loss": 0.02, "step": 46320 }, { "epoch": 0.34246474084148903, "grad_norm": 0.0725812166929245, "learning_rate": 3.2998723884140555e-05, "loss": 0.0161, "step": 46330 }, { "epoch": 0.3425386594127909, "grad_norm": 0.08852728456258774, "learning_rate": 3.2995014245014244e-05, "loss": 0.0195, "step": 46340 }, { "epoch": 0.3426125779840927, "grad_norm": 0.07608164101839066, "learning_rate": 3.299130460588794e-05, "loss": 0.0171, "step": 46350 }, { "epoch": 0.34268649655539457, "grad_norm": 0.08210303634405136, "learning_rate": 3.2987594966761636e-05, "loss": 0.0181, "step": 46360 }, { "epoch": 0.3427604151266964, "grad_norm": 0.0832902044057846, "learning_rate": 3.2983885327635326e-05, "loss": 0.0186, "step": 46370 }, { "epoch": 0.34283433369799826, "grad_norm": 0.11009570956230164, "learning_rate": 3.298017568850902e-05, "loss": 0.0189, "step": 46380 }, { "epoch": 0.34290825226930016, "grad_norm": 0.078762948513031, "learning_rate": 3.297646604938272e-05, "loss": 0.018, "step": 46390 }, { "epoch": 0.342982170840602, "grad_norm": 0.10385294258594513, "learning_rate": 3.2972756410256414e-05, "loss": 0.0226, "step": 46400 }, { "epoch": 0.34305608941190385, "grad_norm": 0.08638278394937515, "learning_rate": 3.296904677113011e-05, "loss": 0.0166, "step": 46410 }, { "epoch": 0.3431300079832057, "grad_norm": 0.0991319864988327, "learning_rate": 3.29653371320038e-05, "loss": 0.019, "step": 46420 }, { "epoch": 0.34320392655450754, "grad_norm": 0.09847695380449295, "learning_rate": 3.2961627492877495e-05, "loss": 0.0165, "step": 46430 }, { "epoch": 0.3432778451258094, "grad_norm": 0.07403053343296051, "learning_rate": 3.295791785375119e-05, "loss": 0.0189, "step": 46440 }, { "epoch": 0.3433517636971113, "grad_norm": 0.06902497261762619, "learning_rate": 3.295420821462488e-05, "loss": 0.0177, "step": 46450 }, { "epoch": 0.34342568226841313, "grad_norm": 0.07637327909469604, "learning_rate": 3.2950498575498576e-05, "loss": 0.0189, "step": 46460 }, { "epoch": 0.343499600839715, "grad_norm": 0.06147047504782677, "learning_rate": 3.294678893637227e-05, "loss": 0.018, "step": 46470 }, { "epoch": 0.3435735194110168, "grad_norm": 0.08808895945549011, "learning_rate": 3.294307929724597e-05, "loss": 0.0177, "step": 46480 }, { "epoch": 0.34364743798231867, "grad_norm": 0.0873732939362526, "learning_rate": 3.2939369658119664e-05, "loss": 0.0157, "step": 46490 }, { "epoch": 0.3437213565536205, "grad_norm": 0.06639257818460464, "learning_rate": 3.293566001899335e-05, "loss": 0.0181, "step": 46500 }, { "epoch": 0.34379527512492236, "grad_norm": 0.08449820429086685, "learning_rate": 3.293195037986705e-05, "loss": 0.0199, "step": 46510 }, { "epoch": 0.34386919369622426, "grad_norm": 0.10189925134181976, "learning_rate": 3.292824074074074e-05, "loss": 0.0188, "step": 46520 }, { "epoch": 0.3439431122675261, "grad_norm": 0.08540836721658707, "learning_rate": 3.2924531101614434e-05, "loss": 0.0196, "step": 46530 }, { "epoch": 0.34401703083882795, "grad_norm": 0.08655638247728348, "learning_rate": 3.292082146248813e-05, "loss": 0.02, "step": 46540 }, { "epoch": 0.3440909494101298, "grad_norm": 0.08147939294576645, "learning_rate": 3.2917111823361826e-05, "loss": 0.0185, "step": 46550 }, { "epoch": 0.34416486798143164, "grad_norm": 0.07434861361980438, "learning_rate": 3.291340218423552e-05, "loss": 0.0194, "step": 46560 }, { "epoch": 0.3442387865527335, "grad_norm": 0.07616791874170303, "learning_rate": 3.290969254510921e-05, "loss": 0.0186, "step": 46570 }, { "epoch": 0.3443127051240354, "grad_norm": 0.08321139961481094, "learning_rate": 3.290598290598291e-05, "loss": 0.0224, "step": 46580 }, { "epoch": 0.34438662369533724, "grad_norm": 0.11349760740995407, "learning_rate": 3.29022732668566e-05, "loss": 0.0188, "step": 46590 }, { "epoch": 0.3444605422666391, "grad_norm": 0.09545985609292984, "learning_rate": 3.289856362773029e-05, "loss": 0.0193, "step": 46600 }, { "epoch": 0.3445344608379409, "grad_norm": 0.0886853039264679, "learning_rate": 3.289485398860399e-05, "loss": 0.0206, "step": 46610 }, { "epoch": 0.34460837940924277, "grad_norm": 0.07644405961036682, "learning_rate": 3.2891144349477684e-05, "loss": 0.0169, "step": 46620 }, { "epoch": 0.3446822979805446, "grad_norm": 0.0853935107588768, "learning_rate": 3.288743471035138e-05, "loss": 0.0193, "step": 46630 }, { "epoch": 0.34475621655184646, "grad_norm": 0.061725448817014694, "learning_rate": 3.2883725071225076e-05, "loss": 0.0175, "step": 46640 }, { "epoch": 0.34483013512314836, "grad_norm": 0.0924752801656723, "learning_rate": 3.2880015432098765e-05, "loss": 0.0184, "step": 46650 }, { "epoch": 0.3449040536944502, "grad_norm": 0.10381954163312912, "learning_rate": 3.287630579297246e-05, "loss": 0.0181, "step": 46660 }, { "epoch": 0.34497797226575205, "grad_norm": 0.09819740802049637, "learning_rate": 3.287259615384616e-05, "loss": 0.0181, "step": 46670 }, { "epoch": 0.3450518908370539, "grad_norm": 0.08267750591039658, "learning_rate": 3.2868886514719846e-05, "loss": 0.0196, "step": 46680 }, { "epoch": 0.34512580940835574, "grad_norm": 0.054967526346445084, "learning_rate": 3.286517687559354e-05, "loss": 0.0175, "step": 46690 }, { "epoch": 0.3451997279796576, "grad_norm": 0.09669110924005508, "learning_rate": 3.286146723646724e-05, "loss": 0.0196, "step": 46700 }, { "epoch": 0.3452736465509595, "grad_norm": 0.07660909742116928, "learning_rate": 3.2857757597340934e-05, "loss": 0.02, "step": 46710 }, { "epoch": 0.34534756512226134, "grad_norm": 0.09221714735031128, "learning_rate": 3.285404795821463e-05, "loss": 0.0155, "step": 46720 }, { "epoch": 0.3454214836935632, "grad_norm": 0.08598586916923523, "learning_rate": 3.285033831908832e-05, "loss": 0.0184, "step": 46730 }, { "epoch": 0.345495402264865, "grad_norm": 0.08627812564373016, "learning_rate": 3.2846628679962015e-05, "loss": 0.0188, "step": 46740 }, { "epoch": 0.34556932083616687, "grad_norm": 0.10398365557193756, "learning_rate": 3.2842919040835705e-05, "loss": 0.0162, "step": 46750 }, { "epoch": 0.3456432394074687, "grad_norm": 0.08908989280462265, "learning_rate": 3.28392094017094e-05, "loss": 0.0209, "step": 46760 }, { "epoch": 0.34571715797877056, "grad_norm": 0.08196178078651428, "learning_rate": 3.2835499762583097e-05, "loss": 0.019, "step": 46770 }, { "epoch": 0.34579107655007246, "grad_norm": 0.09578578919172287, "learning_rate": 3.283179012345679e-05, "loss": 0.0177, "step": 46780 }, { "epoch": 0.3458649951213743, "grad_norm": 0.0772024616599083, "learning_rate": 3.282808048433049e-05, "loss": 0.018, "step": 46790 }, { "epoch": 0.34593891369267615, "grad_norm": 0.06309277564287186, "learning_rate": 3.282437084520418e-05, "loss": 0.0181, "step": 46800 }, { "epoch": 0.346012832263978, "grad_norm": 0.10176903009414673, "learning_rate": 3.2820661206077874e-05, "loss": 0.0224, "step": 46810 }, { "epoch": 0.34608675083527984, "grad_norm": 0.10570260137319565, "learning_rate": 3.281695156695157e-05, "loss": 0.0172, "step": 46820 }, { "epoch": 0.3461606694065817, "grad_norm": 0.10192044824361801, "learning_rate": 3.281324192782526e-05, "loss": 0.0191, "step": 46830 }, { "epoch": 0.3462345879778836, "grad_norm": 0.07488798350095749, "learning_rate": 3.2809532288698955e-05, "loss": 0.02, "step": 46840 }, { "epoch": 0.34630850654918544, "grad_norm": 0.08586698770523071, "learning_rate": 3.280582264957265e-05, "loss": 0.0185, "step": 46850 }, { "epoch": 0.3463824251204873, "grad_norm": 0.09512021392583847, "learning_rate": 3.280211301044635e-05, "loss": 0.0178, "step": 46860 }, { "epoch": 0.3464563436917891, "grad_norm": 0.09688975661993027, "learning_rate": 3.279840337132004e-05, "loss": 0.02, "step": 46870 }, { "epoch": 0.34653026226309097, "grad_norm": 0.08890626579523087, "learning_rate": 3.279469373219373e-05, "loss": 0.0184, "step": 46880 }, { "epoch": 0.3466041808343928, "grad_norm": 0.09725047647953033, "learning_rate": 3.279098409306743e-05, "loss": 0.02, "step": 46890 }, { "epoch": 0.34667809940569466, "grad_norm": 0.124251589179039, "learning_rate": 3.2787274453941124e-05, "loss": 0.0179, "step": 46900 }, { "epoch": 0.34675201797699656, "grad_norm": 0.08104971796274185, "learning_rate": 3.278356481481481e-05, "loss": 0.0173, "step": 46910 }, { "epoch": 0.3468259365482984, "grad_norm": 0.119538314640522, "learning_rate": 3.277985517568851e-05, "loss": 0.0217, "step": 46920 }, { "epoch": 0.34689985511960025, "grad_norm": 0.0804719477891922, "learning_rate": 3.2776145536562205e-05, "loss": 0.0204, "step": 46930 }, { "epoch": 0.3469737736909021, "grad_norm": 0.09438547492027283, "learning_rate": 3.27724358974359e-05, "loss": 0.02, "step": 46940 }, { "epoch": 0.34704769226220394, "grad_norm": 0.08524240553379059, "learning_rate": 3.27687262583096e-05, "loss": 0.0192, "step": 46950 }, { "epoch": 0.3471216108335058, "grad_norm": 0.06681135296821594, "learning_rate": 3.2765016619183286e-05, "loss": 0.0194, "step": 46960 }, { "epoch": 0.3471955294048077, "grad_norm": 0.11396799981594086, "learning_rate": 3.276130698005698e-05, "loss": 0.0201, "step": 46970 }, { "epoch": 0.34726944797610954, "grad_norm": 0.09392783045768738, "learning_rate": 3.275759734093067e-05, "loss": 0.0192, "step": 46980 }, { "epoch": 0.3473433665474114, "grad_norm": 0.08238954097032547, "learning_rate": 3.275388770180437e-05, "loss": 0.0192, "step": 46990 }, { "epoch": 0.3474172851187132, "grad_norm": 0.08259283751249313, "learning_rate": 3.275017806267806e-05, "loss": 0.0193, "step": 47000 }, { "epoch": 0.34749120369001507, "grad_norm": 0.06521821767091751, "learning_rate": 3.274646842355176e-05, "loss": 0.0184, "step": 47010 }, { "epoch": 0.3475651222613169, "grad_norm": 0.08316667377948761, "learning_rate": 3.2742758784425455e-05, "loss": 0.017, "step": 47020 }, { "epoch": 0.34763904083261876, "grad_norm": 0.09221500903367996, "learning_rate": 3.2739049145299144e-05, "loss": 0.019, "step": 47030 }, { "epoch": 0.34771295940392066, "grad_norm": 0.1091422289609909, "learning_rate": 3.273533950617284e-05, "loss": 0.0195, "step": 47040 }, { "epoch": 0.3477868779752225, "grad_norm": 0.06781918555498123, "learning_rate": 3.2731629867046536e-05, "loss": 0.0171, "step": 47050 }, { "epoch": 0.34786079654652435, "grad_norm": 0.07065023481845856, "learning_rate": 3.2727920227920225e-05, "loss": 0.0187, "step": 47060 }, { "epoch": 0.3479347151178262, "grad_norm": 0.060498230159282684, "learning_rate": 3.272421058879392e-05, "loss": 0.02, "step": 47070 }, { "epoch": 0.34800863368912804, "grad_norm": 0.08753509074449539, "learning_rate": 3.272050094966762e-05, "loss": 0.0205, "step": 47080 }, { "epoch": 0.3480825522604299, "grad_norm": 0.08763568848371506, "learning_rate": 3.271679131054131e-05, "loss": 0.0193, "step": 47090 }, { "epoch": 0.3481564708317318, "grad_norm": 0.08908195793628693, "learning_rate": 3.271308167141501e-05, "loss": 0.0199, "step": 47100 }, { "epoch": 0.34823038940303364, "grad_norm": 0.07962878048419952, "learning_rate": 3.27093720322887e-05, "loss": 0.0176, "step": 47110 }, { "epoch": 0.3483043079743355, "grad_norm": 0.08842107653617859, "learning_rate": 3.2705662393162394e-05, "loss": 0.0206, "step": 47120 }, { "epoch": 0.3483782265456373, "grad_norm": 0.1007806733250618, "learning_rate": 3.270195275403609e-05, "loss": 0.0183, "step": 47130 }, { "epoch": 0.34845214511693917, "grad_norm": 0.05309021845459938, "learning_rate": 3.269824311490978e-05, "loss": 0.0192, "step": 47140 }, { "epoch": 0.348526063688241, "grad_norm": 0.07942581921815872, "learning_rate": 3.2694533475783476e-05, "loss": 0.0179, "step": 47150 }, { "epoch": 0.34859998225954286, "grad_norm": 0.08619439601898193, "learning_rate": 3.269082383665717e-05, "loss": 0.0179, "step": 47160 }, { "epoch": 0.34867390083084476, "grad_norm": 0.1479090005159378, "learning_rate": 3.268711419753087e-05, "loss": 0.0215, "step": 47170 }, { "epoch": 0.3487478194021466, "grad_norm": 0.07243809849023819, "learning_rate": 3.2683404558404563e-05, "loss": 0.0173, "step": 47180 }, { "epoch": 0.34882173797344845, "grad_norm": 0.059807922691106796, "learning_rate": 3.267969491927825e-05, "loss": 0.0174, "step": 47190 }, { "epoch": 0.3488956565447503, "grad_norm": 0.12337792664766312, "learning_rate": 3.267598528015195e-05, "loss": 0.0172, "step": 47200 }, { "epoch": 0.34896957511605214, "grad_norm": 0.09791093319654465, "learning_rate": 3.267227564102564e-05, "loss": 0.0193, "step": 47210 }, { "epoch": 0.349043493687354, "grad_norm": 0.15573714673519135, "learning_rate": 3.2668566001899334e-05, "loss": 0.0203, "step": 47220 }, { "epoch": 0.3491174122586559, "grad_norm": 0.06387767195701599, "learning_rate": 3.266485636277303e-05, "loss": 0.0181, "step": 47230 }, { "epoch": 0.34919133082995774, "grad_norm": 0.06677080690860748, "learning_rate": 3.2661146723646726e-05, "loss": 0.0192, "step": 47240 }, { "epoch": 0.3492652494012596, "grad_norm": 0.10616503655910492, "learning_rate": 3.265743708452042e-05, "loss": 0.0209, "step": 47250 }, { "epoch": 0.3493391679725614, "grad_norm": 0.0729021355509758, "learning_rate": 3.265372744539411e-05, "loss": 0.0166, "step": 47260 }, { "epoch": 0.34941308654386327, "grad_norm": 0.06525515764951706, "learning_rate": 3.265001780626781e-05, "loss": 0.0172, "step": 47270 }, { "epoch": 0.3494870051151651, "grad_norm": 0.0829407349228859, "learning_rate": 3.26463081671415e-05, "loss": 0.0221, "step": 47280 }, { "epoch": 0.34956092368646696, "grad_norm": 0.08970022201538086, "learning_rate": 3.264259852801519e-05, "loss": 0.0174, "step": 47290 }, { "epoch": 0.34963484225776886, "grad_norm": 0.09719527512788773, "learning_rate": 3.263888888888889e-05, "loss": 0.0178, "step": 47300 }, { "epoch": 0.3497087608290707, "grad_norm": 0.1067635715007782, "learning_rate": 3.2635179249762584e-05, "loss": 0.0175, "step": 47310 }, { "epoch": 0.34978267940037255, "grad_norm": 0.10435649007558823, "learning_rate": 3.263146961063628e-05, "loss": 0.0201, "step": 47320 }, { "epoch": 0.3498565979716744, "grad_norm": 0.10568960011005402, "learning_rate": 3.2627759971509976e-05, "loss": 0.0205, "step": 47330 }, { "epoch": 0.34993051654297624, "grad_norm": 0.08821310102939606, "learning_rate": 3.2624050332383665e-05, "loss": 0.0188, "step": 47340 }, { "epoch": 0.3500044351142781, "grad_norm": 0.07589121907949448, "learning_rate": 3.262034069325736e-05, "loss": 0.0189, "step": 47350 }, { "epoch": 0.35007835368558, "grad_norm": 0.07190071791410446, "learning_rate": 3.261663105413106e-05, "loss": 0.0165, "step": 47360 }, { "epoch": 0.35015227225688184, "grad_norm": 0.09277673810720444, "learning_rate": 3.2612921415004746e-05, "loss": 0.0185, "step": 47370 }, { "epoch": 0.3502261908281837, "grad_norm": 0.06976188719272614, "learning_rate": 3.260921177587844e-05, "loss": 0.0198, "step": 47380 }, { "epoch": 0.3503001093994855, "grad_norm": 0.09157153218984604, "learning_rate": 3.260550213675214e-05, "loss": 0.0178, "step": 47390 }, { "epoch": 0.35037402797078737, "grad_norm": 0.07097479701042175, "learning_rate": 3.2601792497625834e-05, "loss": 0.0202, "step": 47400 }, { "epoch": 0.3504479465420892, "grad_norm": 0.06325814872980118, "learning_rate": 3.259808285849953e-05, "loss": 0.0182, "step": 47410 }, { "epoch": 0.35052186511339106, "grad_norm": 0.08848169445991516, "learning_rate": 3.259437321937322e-05, "loss": 0.0196, "step": 47420 }, { "epoch": 0.35059578368469296, "grad_norm": 0.07658163458108902, "learning_rate": 3.2590663580246915e-05, "loss": 0.0199, "step": 47430 }, { "epoch": 0.3506697022559948, "grad_norm": 0.09558193385601044, "learning_rate": 3.2586953941120604e-05, "loss": 0.0185, "step": 47440 }, { "epoch": 0.35074362082729665, "grad_norm": 0.07930658757686615, "learning_rate": 3.25832443019943e-05, "loss": 0.0165, "step": 47450 }, { "epoch": 0.3508175393985985, "grad_norm": 0.099174365401268, "learning_rate": 3.2579534662868e-05, "loss": 0.0205, "step": 47460 }, { "epoch": 0.35089145796990034, "grad_norm": 0.08491765707731247, "learning_rate": 3.257582502374169e-05, "loss": 0.0192, "step": 47470 }, { "epoch": 0.3509653765412022, "grad_norm": 0.09877867996692657, "learning_rate": 3.257211538461539e-05, "loss": 0.0166, "step": 47480 }, { "epoch": 0.3510392951125041, "grad_norm": 0.09346239268779755, "learning_rate": 3.256840574548908e-05, "loss": 0.017, "step": 47490 }, { "epoch": 0.35111321368380594, "grad_norm": 0.11570316553115845, "learning_rate": 3.256469610636277e-05, "loss": 0.018, "step": 47500 }, { "epoch": 0.3511871322551078, "grad_norm": 0.0938878133893013, "learning_rate": 3.256098646723647e-05, "loss": 0.0201, "step": 47510 }, { "epoch": 0.3512610508264096, "grad_norm": 0.10068412870168686, "learning_rate": 3.255727682811016e-05, "loss": 0.0203, "step": 47520 }, { "epoch": 0.35133496939771147, "grad_norm": 0.08617176115512848, "learning_rate": 3.2553567188983854e-05, "loss": 0.0175, "step": 47530 }, { "epoch": 0.3514088879690133, "grad_norm": 0.07788795977830887, "learning_rate": 3.254985754985755e-05, "loss": 0.0194, "step": 47540 }, { "epoch": 0.35148280654031516, "grad_norm": 0.08169343322515488, "learning_rate": 3.2546147910731246e-05, "loss": 0.0186, "step": 47550 }, { "epoch": 0.35155672511161706, "grad_norm": 0.1000077873468399, "learning_rate": 3.254243827160494e-05, "loss": 0.0195, "step": 47560 }, { "epoch": 0.3516306436829189, "grad_norm": 0.05659700557589531, "learning_rate": 3.253872863247863e-05, "loss": 0.0183, "step": 47570 }, { "epoch": 0.35170456225422075, "grad_norm": 0.10793798416852951, "learning_rate": 3.253501899335233e-05, "loss": 0.0182, "step": 47580 }, { "epoch": 0.3517784808255226, "grad_norm": 0.07970179617404938, "learning_rate": 3.2531309354226024e-05, "loss": 0.0214, "step": 47590 }, { "epoch": 0.35185239939682444, "grad_norm": 0.07447035610675812, "learning_rate": 3.252759971509971e-05, "loss": 0.0174, "step": 47600 }, { "epoch": 0.3519263179681263, "grad_norm": 0.11135456711053848, "learning_rate": 3.2523890075973415e-05, "loss": 0.0156, "step": 47610 }, { "epoch": 0.3520002365394282, "grad_norm": 0.07851218432188034, "learning_rate": 3.2520180436847105e-05, "loss": 0.0194, "step": 47620 }, { "epoch": 0.35207415511073004, "grad_norm": 0.07464441657066345, "learning_rate": 3.25164707977208e-05, "loss": 0.0177, "step": 47630 }, { "epoch": 0.3521480736820319, "grad_norm": 0.07230711728334427, "learning_rate": 3.2512761158594497e-05, "loss": 0.0174, "step": 47640 }, { "epoch": 0.3522219922533337, "grad_norm": 0.08653979748487473, "learning_rate": 3.2509051519468186e-05, "loss": 0.0204, "step": 47650 }, { "epoch": 0.35229591082463557, "grad_norm": 0.08425447344779968, "learning_rate": 3.250534188034188e-05, "loss": 0.0186, "step": 47660 }, { "epoch": 0.3523698293959374, "grad_norm": 0.11556941270828247, "learning_rate": 3.250163224121557e-05, "loss": 0.0205, "step": 47670 }, { "epoch": 0.35244374796723926, "grad_norm": 0.11919292062520981, "learning_rate": 3.249792260208927e-05, "loss": 0.0178, "step": 47680 }, { "epoch": 0.35251766653854116, "grad_norm": 0.07132294774055481, "learning_rate": 3.249421296296297e-05, "loss": 0.0195, "step": 47690 }, { "epoch": 0.352591585109843, "grad_norm": 0.06593780219554901, "learning_rate": 3.249050332383666e-05, "loss": 0.0178, "step": 47700 }, { "epoch": 0.35266550368114485, "grad_norm": 0.1030346155166626, "learning_rate": 3.2486793684710355e-05, "loss": 0.0205, "step": 47710 }, { "epoch": 0.3527394222524467, "grad_norm": 0.07651791721582413, "learning_rate": 3.2483084045584044e-05, "loss": 0.016, "step": 47720 }, { "epoch": 0.35281334082374854, "grad_norm": 0.09694407880306244, "learning_rate": 3.247937440645774e-05, "loss": 0.0178, "step": 47730 }, { "epoch": 0.3528872593950504, "grad_norm": 0.09967559576034546, "learning_rate": 3.2475664767331436e-05, "loss": 0.0191, "step": 47740 }, { "epoch": 0.3529611779663523, "grad_norm": 0.0916508138179779, "learning_rate": 3.2471955128205125e-05, "loss": 0.0193, "step": 47750 }, { "epoch": 0.35303509653765414, "grad_norm": 0.08274427056312561, "learning_rate": 3.246824548907883e-05, "loss": 0.02, "step": 47760 }, { "epoch": 0.353109015108956, "grad_norm": 0.08642439544200897, "learning_rate": 3.246453584995252e-05, "loss": 0.0174, "step": 47770 }, { "epoch": 0.3531829336802578, "grad_norm": 0.09799844026565552, "learning_rate": 3.246082621082621e-05, "loss": 0.0176, "step": 47780 }, { "epoch": 0.35325685225155967, "grad_norm": 0.10590720176696777, "learning_rate": 3.245711657169991e-05, "loss": 0.0157, "step": 47790 }, { "epoch": 0.3533307708228615, "grad_norm": 0.07792959362268448, "learning_rate": 3.24534069325736e-05, "loss": 0.0158, "step": 47800 }, { "epoch": 0.35340468939416336, "grad_norm": 0.0911719799041748, "learning_rate": 3.2449697293447294e-05, "loss": 0.0195, "step": 47810 }, { "epoch": 0.35347860796546526, "grad_norm": 0.06966444104909897, "learning_rate": 3.244598765432099e-05, "loss": 0.0199, "step": 47820 }, { "epoch": 0.3535525265367671, "grad_norm": 0.07988017052412033, "learning_rate": 3.244227801519468e-05, "loss": 0.0177, "step": 47830 }, { "epoch": 0.35362644510806895, "grad_norm": 0.10488210618495941, "learning_rate": 3.243856837606838e-05, "loss": 0.0169, "step": 47840 }, { "epoch": 0.3537003636793708, "grad_norm": 0.06732849776744843, "learning_rate": 3.243485873694207e-05, "loss": 0.0183, "step": 47850 }, { "epoch": 0.35377428225067264, "grad_norm": 0.09752733260393143, "learning_rate": 3.243114909781577e-05, "loss": 0.0195, "step": 47860 }, { "epoch": 0.3538482008219745, "grad_norm": 0.09057949483394623, "learning_rate": 3.242743945868946e-05, "loss": 0.0191, "step": 47870 }, { "epoch": 0.3539221193932764, "grad_norm": 0.1418922394514084, "learning_rate": 3.242372981956315e-05, "loss": 0.0167, "step": 47880 }, { "epoch": 0.35399603796457824, "grad_norm": 0.10503596067428589, "learning_rate": 3.242002018043685e-05, "loss": 0.0193, "step": 47890 }, { "epoch": 0.3540699565358801, "grad_norm": 0.07770757377147675, "learning_rate": 3.241631054131054e-05, "loss": 0.0181, "step": 47900 }, { "epoch": 0.3541438751071819, "grad_norm": 0.08247490972280502, "learning_rate": 3.241260090218424e-05, "loss": 0.0171, "step": 47910 }, { "epoch": 0.35421779367848377, "grad_norm": 0.07183904200792313, "learning_rate": 3.2408891263057936e-05, "loss": 0.0157, "step": 47920 }, { "epoch": 0.3542917122497856, "grad_norm": 0.07240360975265503, "learning_rate": 3.2405181623931625e-05, "loss": 0.0166, "step": 47930 }, { "epoch": 0.35436563082108746, "grad_norm": 0.07742757350206375, "learning_rate": 3.240147198480532e-05, "loss": 0.0178, "step": 47940 }, { "epoch": 0.35443954939238936, "grad_norm": 0.05356835573911667, "learning_rate": 3.239776234567901e-05, "loss": 0.0175, "step": 47950 }, { "epoch": 0.3545134679636912, "grad_norm": 0.05533919483423233, "learning_rate": 3.2394052706552707e-05, "loss": 0.0178, "step": 47960 }, { "epoch": 0.35458738653499305, "grad_norm": 0.07981298863887787, "learning_rate": 3.23903430674264e-05, "loss": 0.0193, "step": 47970 }, { "epoch": 0.3546613051062949, "grad_norm": 0.09771832078695297, "learning_rate": 3.238663342830009e-05, "loss": 0.0176, "step": 47980 }, { "epoch": 0.35473522367759674, "grad_norm": 0.07720401883125305, "learning_rate": 3.2382923789173794e-05, "loss": 0.0156, "step": 47990 }, { "epoch": 0.3548091422488986, "grad_norm": 0.09286272525787354, "learning_rate": 3.237921415004749e-05, "loss": 0.0215, "step": 48000 }, { "epoch": 0.3548830608202005, "grad_norm": 0.08557448536157608, "learning_rate": 3.237550451092118e-05, "loss": 0.0195, "step": 48010 }, { "epoch": 0.35495697939150234, "grad_norm": 0.06496861577033997, "learning_rate": 3.2371794871794876e-05, "loss": 0.0205, "step": 48020 }, { "epoch": 0.3550308979628042, "grad_norm": 0.07169222831726074, "learning_rate": 3.2368085232668565e-05, "loss": 0.0194, "step": 48030 }, { "epoch": 0.355104816534106, "grad_norm": 0.11181885749101639, "learning_rate": 3.236437559354226e-05, "loss": 0.0198, "step": 48040 }, { "epoch": 0.3551787351054079, "grad_norm": 0.07448727637529373, "learning_rate": 3.236066595441596e-05, "loss": 0.0176, "step": 48050 }, { "epoch": 0.3552526536767097, "grad_norm": 0.11262239515781403, "learning_rate": 3.235695631528965e-05, "loss": 0.0184, "step": 48060 }, { "epoch": 0.35532657224801156, "grad_norm": 0.061267927289009094, "learning_rate": 3.235324667616335e-05, "loss": 0.0185, "step": 48070 }, { "epoch": 0.35540049081931346, "grad_norm": 0.08542980998754501, "learning_rate": 3.234953703703704e-05, "loss": 0.0186, "step": 48080 }, { "epoch": 0.3554744093906153, "grad_norm": 0.08174847811460495, "learning_rate": 3.2345827397910734e-05, "loss": 0.0199, "step": 48090 }, { "epoch": 0.35554832796191715, "grad_norm": 0.0893312469124794, "learning_rate": 3.234211775878443e-05, "loss": 0.02, "step": 48100 }, { "epoch": 0.355622246533219, "grad_norm": 0.07482835650444031, "learning_rate": 3.233840811965812e-05, "loss": 0.0193, "step": 48110 }, { "epoch": 0.35569616510452085, "grad_norm": 0.1367911845445633, "learning_rate": 3.2334698480531815e-05, "loss": 0.019, "step": 48120 }, { "epoch": 0.3557700836758227, "grad_norm": 0.09481962025165558, "learning_rate": 3.2330988841405504e-05, "loss": 0.0186, "step": 48130 }, { "epoch": 0.3558440022471246, "grad_norm": 0.09325818717479706, "learning_rate": 3.232727920227921e-05, "loss": 0.0208, "step": 48140 }, { "epoch": 0.35591792081842644, "grad_norm": 0.07967712730169296, "learning_rate": 3.23235695631529e-05, "loss": 0.0191, "step": 48150 }, { "epoch": 0.3559918393897283, "grad_norm": 0.07193194329738617, "learning_rate": 3.231985992402659e-05, "loss": 0.0167, "step": 48160 }, { "epoch": 0.3560657579610301, "grad_norm": 0.07769019901752472, "learning_rate": 3.231615028490029e-05, "loss": 0.0177, "step": 48170 }, { "epoch": 0.356139676532332, "grad_norm": 0.07792861014604568, "learning_rate": 3.231244064577398e-05, "loss": 0.0189, "step": 48180 }, { "epoch": 0.3562135951036338, "grad_norm": 0.0690118744969368, "learning_rate": 3.230873100664767e-05, "loss": 0.0186, "step": 48190 }, { "epoch": 0.3562875136749357, "grad_norm": 0.09299138933420181, "learning_rate": 3.230502136752137e-05, "loss": 0.0224, "step": 48200 }, { "epoch": 0.35636143224623756, "grad_norm": 0.07170496135950089, "learning_rate": 3.2301311728395065e-05, "loss": 0.0163, "step": 48210 }, { "epoch": 0.3564353508175394, "grad_norm": 0.07200492918491364, "learning_rate": 3.229760208926876e-05, "loss": 0.0187, "step": 48220 }, { "epoch": 0.35650926938884125, "grad_norm": 0.10119190812110901, "learning_rate": 3.229389245014246e-05, "loss": 0.0177, "step": 48230 }, { "epoch": 0.3565831879601431, "grad_norm": 0.08835913985967636, "learning_rate": 3.2290182811016146e-05, "loss": 0.0193, "step": 48240 }, { "epoch": 0.35665710653144495, "grad_norm": 0.0860733613371849, "learning_rate": 3.228647317188984e-05, "loss": 0.0182, "step": 48250 }, { "epoch": 0.3567310251027468, "grad_norm": 0.06780107319355011, "learning_rate": 3.228276353276353e-05, "loss": 0.0178, "step": 48260 }, { "epoch": 0.3568049436740487, "grad_norm": 0.09218813478946686, "learning_rate": 3.227905389363723e-05, "loss": 0.0201, "step": 48270 }, { "epoch": 0.35687886224535054, "grad_norm": 0.07747295498847961, "learning_rate": 3.227534425451092e-05, "loss": 0.0165, "step": 48280 }, { "epoch": 0.3569527808166524, "grad_norm": 0.0689418762922287, "learning_rate": 3.227163461538462e-05, "loss": 0.0191, "step": 48290 }, { "epoch": 0.3570266993879542, "grad_norm": 0.0954119861125946, "learning_rate": 3.2267924976258315e-05, "loss": 0.0161, "step": 48300 }, { "epoch": 0.3571006179592561, "grad_norm": 0.07031914591789246, "learning_rate": 3.2264215337132004e-05, "loss": 0.0195, "step": 48310 }, { "epoch": 0.3571745365305579, "grad_norm": 0.11735131591558456, "learning_rate": 3.22605056980057e-05, "loss": 0.0191, "step": 48320 }, { "epoch": 0.3572484551018598, "grad_norm": 0.08334904909133911, "learning_rate": 3.2256796058879396e-05, "loss": 0.0178, "step": 48330 }, { "epoch": 0.35732237367316166, "grad_norm": 0.10128627717494965, "learning_rate": 3.2253086419753086e-05, "loss": 0.0183, "step": 48340 }, { "epoch": 0.3573962922444635, "grad_norm": 0.07888349890708923, "learning_rate": 3.224937678062678e-05, "loss": 0.0182, "step": 48350 }, { "epoch": 0.35747021081576535, "grad_norm": 0.06927074491977692, "learning_rate": 3.224566714150048e-05, "loss": 0.0182, "step": 48360 }, { "epoch": 0.3575441293870672, "grad_norm": 0.10130536556243896, "learning_rate": 3.2241957502374173e-05, "loss": 0.018, "step": 48370 }, { "epoch": 0.35761804795836905, "grad_norm": 0.0818759873509407, "learning_rate": 3.223824786324787e-05, "loss": 0.0204, "step": 48380 }, { "epoch": 0.3576919665296709, "grad_norm": 0.0981663390994072, "learning_rate": 3.223453822412156e-05, "loss": 0.0196, "step": 48390 }, { "epoch": 0.3577658851009728, "grad_norm": 0.06864052265882492, "learning_rate": 3.2230828584995255e-05, "loss": 0.0188, "step": 48400 }, { "epoch": 0.35783980367227464, "grad_norm": 0.05629339441657066, "learning_rate": 3.2227118945868944e-05, "loss": 0.0183, "step": 48410 }, { "epoch": 0.3579137222435765, "grad_norm": 0.0950944796204567, "learning_rate": 3.222340930674264e-05, "loss": 0.0173, "step": 48420 }, { "epoch": 0.3579876408148783, "grad_norm": 0.0920860767364502, "learning_rate": 3.2219699667616336e-05, "loss": 0.0195, "step": 48430 }, { "epoch": 0.3580615593861802, "grad_norm": 0.09546150267124176, "learning_rate": 3.221599002849003e-05, "loss": 0.0202, "step": 48440 }, { "epoch": 0.358135477957482, "grad_norm": 0.14299006760120392, "learning_rate": 3.221228038936373e-05, "loss": 0.021, "step": 48450 }, { "epoch": 0.3582093965287839, "grad_norm": 0.10482155531644821, "learning_rate": 3.2208570750237424e-05, "loss": 0.0184, "step": 48460 }, { "epoch": 0.35828331510008576, "grad_norm": 0.1320812702178955, "learning_rate": 3.220486111111111e-05, "loss": 0.023, "step": 48470 }, { "epoch": 0.3583572336713876, "grad_norm": 0.09531274437904358, "learning_rate": 3.220115147198481e-05, "loss": 0.0211, "step": 48480 }, { "epoch": 0.35843115224268945, "grad_norm": 0.05454854667186737, "learning_rate": 3.21974418328585e-05, "loss": 0.0179, "step": 48490 }, { "epoch": 0.3585050708139913, "grad_norm": 0.10306868702173233, "learning_rate": 3.2193732193732194e-05, "loss": 0.018, "step": 48500 }, { "epoch": 0.35857898938529315, "grad_norm": 0.08248790353536606, "learning_rate": 3.219002255460589e-05, "loss": 0.0186, "step": 48510 }, { "epoch": 0.358652907956595, "grad_norm": 0.0634683296084404, "learning_rate": 3.2186312915479586e-05, "loss": 0.0178, "step": 48520 }, { "epoch": 0.3587268265278969, "grad_norm": 0.06007637456059456, "learning_rate": 3.218260327635328e-05, "loss": 0.0173, "step": 48530 }, { "epoch": 0.35880074509919874, "grad_norm": 0.10380519181489944, "learning_rate": 3.217889363722697e-05, "loss": 0.0181, "step": 48540 }, { "epoch": 0.3588746636705006, "grad_norm": 0.08306507021188736, "learning_rate": 3.217518399810067e-05, "loss": 0.0188, "step": 48550 }, { "epoch": 0.3589485822418024, "grad_norm": 0.07880030572414398, "learning_rate": 3.217147435897436e-05, "loss": 0.0199, "step": 48560 }, { "epoch": 0.3590225008131043, "grad_norm": 0.12690183520317078, "learning_rate": 3.216776471984805e-05, "loss": 0.0184, "step": 48570 }, { "epoch": 0.3590964193844061, "grad_norm": 0.06855998933315277, "learning_rate": 3.216405508072175e-05, "loss": 0.0201, "step": 48580 }, { "epoch": 0.359170337955708, "grad_norm": 0.11447902023792267, "learning_rate": 3.2160345441595444e-05, "loss": 0.0188, "step": 48590 }, { "epoch": 0.35924425652700986, "grad_norm": 0.09545428305864334, "learning_rate": 3.215663580246914e-05, "loss": 0.0177, "step": 48600 }, { "epoch": 0.3593181750983117, "grad_norm": 0.11311125010251999, "learning_rate": 3.2152926163342836e-05, "loss": 0.0168, "step": 48610 }, { "epoch": 0.35939209366961355, "grad_norm": 0.06668959558010101, "learning_rate": 3.2149216524216525e-05, "loss": 0.0187, "step": 48620 }, { "epoch": 0.3594660122409154, "grad_norm": 0.07591219991445541, "learning_rate": 3.214550688509022e-05, "loss": 0.0194, "step": 48630 }, { "epoch": 0.35953993081221725, "grad_norm": 0.05712525174021721, "learning_rate": 3.214179724596391e-05, "loss": 0.0188, "step": 48640 }, { "epoch": 0.3596138493835191, "grad_norm": 0.11006911098957062, "learning_rate": 3.2138087606837606e-05, "loss": 0.0202, "step": 48650 }, { "epoch": 0.359687767954821, "grad_norm": 0.1147850751876831, "learning_rate": 3.21343779677113e-05, "loss": 0.0193, "step": 48660 }, { "epoch": 0.35976168652612284, "grad_norm": 0.08865036815404892, "learning_rate": 3.2130668328585e-05, "loss": 0.0162, "step": 48670 }, { "epoch": 0.3598356050974247, "grad_norm": 0.10027143359184265, "learning_rate": 3.2126958689458694e-05, "loss": 0.0179, "step": 48680 }, { "epoch": 0.3599095236687265, "grad_norm": 0.08432824164628983, "learning_rate": 3.212324905033239e-05, "loss": 0.019, "step": 48690 }, { "epoch": 0.3599834422400284, "grad_norm": 0.09327958524227142, "learning_rate": 3.211953941120608e-05, "loss": 0.0181, "step": 48700 }, { "epoch": 0.3600573608113302, "grad_norm": 0.09955098479986191, "learning_rate": 3.2115829772079775e-05, "loss": 0.0179, "step": 48710 }, { "epoch": 0.3601312793826321, "grad_norm": 0.0701267421245575, "learning_rate": 3.2112120132953465e-05, "loss": 0.0187, "step": 48720 }, { "epoch": 0.36020519795393396, "grad_norm": 0.13661158084869385, "learning_rate": 3.210841049382716e-05, "loss": 0.0187, "step": 48730 }, { "epoch": 0.3602791165252358, "grad_norm": 0.0596172958612442, "learning_rate": 3.2104700854700856e-05, "loss": 0.017, "step": 48740 }, { "epoch": 0.36035303509653765, "grad_norm": 0.1898808777332306, "learning_rate": 3.210099121557455e-05, "loss": 0.019, "step": 48750 }, { "epoch": 0.3604269536678395, "grad_norm": 0.08930271118879318, "learning_rate": 3.209728157644825e-05, "loss": 0.0182, "step": 48760 }, { "epoch": 0.36050087223914135, "grad_norm": 0.11357209086418152, "learning_rate": 3.209357193732194e-05, "loss": 0.0183, "step": 48770 }, { "epoch": 0.3605747908104432, "grad_norm": 0.09769640117883682, "learning_rate": 3.2089862298195634e-05, "loss": 0.0166, "step": 48780 }, { "epoch": 0.3606487093817451, "grad_norm": 0.08066027611494064, "learning_rate": 3.208615265906933e-05, "loss": 0.0191, "step": 48790 }, { "epoch": 0.36072262795304694, "grad_norm": 0.06597844511270523, "learning_rate": 3.208244301994302e-05, "loss": 0.0163, "step": 48800 }, { "epoch": 0.3607965465243488, "grad_norm": 0.07833616435527802, "learning_rate": 3.2078733380816715e-05, "loss": 0.0185, "step": 48810 }, { "epoch": 0.3608704650956506, "grad_norm": 0.08092334121465683, "learning_rate": 3.207502374169041e-05, "loss": 0.0194, "step": 48820 }, { "epoch": 0.3609443836669525, "grad_norm": 0.07439634203910828, "learning_rate": 3.2071314102564107e-05, "loss": 0.0161, "step": 48830 }, { "epoch": 0.3610183022382543, "grad_norm": 0.10406707227230072, "learning_rate": 3.20676044634378e-05, "loss": 0.0202, "step": 48840 }, { "epoch": 0.3610922208095562, "grad_norm": 0.09756065160036087, "learning_rate": 3.206389482431149e-05, "loss": 0.0198, "step": 48850 }, { "epoch": 0.36116613938085806, "grad_norm": 0.07719270884990692, "learning_rate": 3.206018518518519e-05, "loss": 0.0199, "step": 48860 }, { "epoch": 0.3612400579521599, "grad_norm": 0.07252097129821777, "learning_rate": 3.205647554605888e-05, "loss": 0.0174, "step": 48870 }, { "epoch": 0.36131397652346176, "grad_norm": 0.08581475913524628, "learning_rate": 3.205276590693257e-05, "loss": 0.0187, "step": 48880 }, { "epoch": 0.3613878950947636, "grad_norm": 0.0722767785191536, "learning_rate": 3.204905626780627e-05, "loss": 0.02, "step": 48890 }, { "epoch": 0.36146181366606545, "grad_norm": 0.06997499614953995, "learning_rate": 3.2045346628679965e-05, "loss": 0.019, "step": 48900 }, { "epoch": 0.3615357322373673, "grad_norm": 0.07591132074594498, "learning_rate": 3.204163698955366e-05, "loss": 0.0178, "step": 48910 }, { "epoch": 0.3616096508086692, "grad_norm": 0.11934647709131241, "learning_rate": 3.203792735042736e-05, "loss": 0.0228, "step": 48920 }, { "epoch": 0.36168356937997104, "grad_norm": 0.07847454398870468, "learning_rate": 3.2034217711301046e-05, "loss": 0.019, "step": 48930 }, { "epoch": 0.3617574879512729, "grad_norm": 0.09507045894861221, "learning_rate": 3.203050807217474e-05, "loss": 0.0202, "step": 48940 }, { "epoch": 0.36183140652257473, "grad_norm": 0.08507265895605087, "learning_rate": 3.202679843304843e-05, "loss": 0.0209, "step": 48950 }, { "epoch": 0.3619053250938766, "grad_norm": 0.08112446963787079, "learning_rate": 3.202308879392213e-05, "loss": 0.0162, "step": 48960 }, { "epoch": 0.3619792436651784, "grad_norm": 0.09993197023868561, "learning_rate": 3.201937915479582e-05, "loss": 0.0169, "step": 48970 }, { "epoch": 0.3620531622364803, "grad_norm": 0.07737724483013153, "learning_rate": 3.201566951566952e-05, "loss": 0.0185, "step": 48980 }, { "epoch": 0.36212708080778216, "grad_norm": 0.21678310632705688, "learning_rate": 3.2011959876543215e-05, "loss": 0.0181, "step": 48990 }, { "epoch": 0.362200999379084, "grad_norm": 0.08399459719657898, "learning_rate": 3.2008250237416904e-05, "loss": 0.0179, "step": 49000 }, { "epoch": 0.36227491795038586, "grad_norm": 0.06960813701152802, "learning_rate": 3.20045405982906e-05, "loss": 0.0172, "step": 49010 }, { "epoch": 0.3623488365216877, "grad_norm": 0.08563554286956787, "learning_rate": 3.2000830959164296e-05, "loss": 0.0171, "step": 49020 }, { "epoch": 0.36242275509298955, "grad_norm": 0.11998796463012695, "learning_rate": 3.1997121320037985e-05, "loss": 0.0192, "step": 49030 }, { "epoch": 0.3624966736642914, "grad_norm": 0.07512512058019638, "learning_rate": 3.199341168091168e-05, "loss": 0.0176, "step": 49040 }, { "epoch": 0.3625705922355933, "grad_norm": 0.10795829445123672, "learning_rate": 3.198970204178538e-05, "loss": 0.0189, "step": 49050 }, { "epoch": 0.36264451080689514, "grad_norm": 0.06630425155162811, "learning_rate": 3.198599240265907e-05, "loss": 0.0189, "step": 49060 }, { "epoch": 0.362718429378197, "grad_norm": 0.06694331020116806, "learning_rate": 3.198228276353277e-05, "loss": 0.0169, "step": 49070 }, { "epoch": 0.36279234794949883, "grad_norm": 0.06272326409816742, "learning_rate": 3.197857312440646e-05, "loss": 0.0182, "step": 49080 }, { "epoch": 0.3628662665208007, "grad_norm": 0.0822906568646431, "learning_rate": 3.1974863485280154e-05, "loss": 0.0172, "step": 49090 }, { "epoch": 0.3629401850921025, "grad_norm": 0.1090676337480545, "learning_rate": 3.1971153846153843e-05, "loss": 0.0205, "step": 49100 }, { "epoch": 0.3630141036634044, "grad_norm": 0.09612789005041122, "learning_rate": 3.196744420702754e-05, "loss": 0.0192, "step": 49110 }, { "epoch": 0.36308802223470626, "grad_norm": 0.0747082456946373, "learning_rate": 3.1963734567901235e-05, "loss": 0.0188, "step": 49120 }, { "epoch": 0.3631619408060081, "grad_norm": 0.10992246866226196, "learning_rate": 3.196002492877493e-05, "loss": 0.0177, "step": 49130 }, { "epoch": 0.36323585937730996, "grad_norm": 0.06742114573717117, "learning_rate": 3.195631528964863e-05, "loss": 0.0184, "step": 49140 }, { "epoch": 0.3633097779486118, "grad_norm": 0.07350549101829529, "learning_rate": 3.195260565052232e-05, "loss": 0.0216, "step": 49150 }, { "epoch": 0.36338369651991365, "grad_norm": 0.07814744114875793, "learning_rate": 3.194889601139601e-05, "loss": 0.0164, "step": 49160 }, { "epoch": 0.3634576150912155, "grad_norm": 0.08196965605020523, "learning_rate": 3.194518637226971e-05, "loss": 0.0161, "step": 49170 }, { "epoch": 0.3635315336625174, "grad_norm": 0.06950008124113083, "learning_rate": 3.19414767331434e-05, "loss": 0.0189, "step": 49180 }, { "epoch": 0.36360545223381924, "grad_norm": 0.09519726783037186, "learning_rate": 3.1937767094017094e-05, "loss": 0.0184, "step": 49190 }, { "epoch": 0.3636793708051211, "grad_norm": 0.08564166724681854, "learning_rate": 3.193405745489079e-05, "loss": 0.0178, "step": 49200 }, { "epoch": 0.36375328937642293, "grad_norm": 0.07054101675748825, "learning_rate": 3.1930347815764486e-05, "loss": 0.0182, "step": 49210 }, { "epoch": 0.3638272079477248, "grad_norm": 0.07411817461252213, "learning_rate": 3.192663817663818e-05, "loss": 0.0186, "step": 49220 }, { "epoch": 0.3639011265190266, "grad_norm": 0.07335742563009262, "learning_rate": 3.192292853751187e-05, "loss": 0.0176, "step": 49230 }, { "epoch": 0.3639750450903285, "grad_norm": 0.09444653242826462, "learning_rate": 3.191921889838557e-05, "loss": 0.02, "step": 49240 }, { "epoch": 0.36404896366163036, "grad_norm": 0.12061870843172073, "learning_rate": 3.191550925925926e-05, "loss": 0.0195, "step": 49250 }, { "epoch": 0.3641228822329322, "grad_norm": 0.0709986537694931, "learning_rate": 3.191179962013295e-05, "loss": 0.0189, "step": 49260 }, { "epoch": 0.36419680080423406, "grad_norm": 0.10596373677253723, "learning_rate": 3.190808998100665e-05, "loss": 0.0223, "step": 49270 }, { "epoch": 0.3642707193755359, "grad_norm": 0.080999456346035, "learning_rate": 3.1904380341880344e-05, "loss": 0.0177, "step": 49280 }, { "epoch": 0.36434463794683775, "grad_norm": 0.10911361128091812, "learning_rate": 3.190067070275404e-05, "loss": 0.0186, "step": 49290 }, { "epoch": 0.3644185565181396, "grad_norm": 0.0921257883310318, "learning_rate": 3.1896961063627736e-05, "loss": 0.0199, "step": 49300 }, { "epoch": 0.3644924750894415, "grad_norm": 0.06992916762828827, "learning_rate": 3.1893251424501425e-05, "loss": 0.019, "step": 49310 }, { "epoch": 0.36456639366074334, "grad_norm": 0.07500123232603073, "learning_rate": 3.188954178537512e-05, "loss": 0.0187, "step": 49320 }, { "epoch": 0.3646403122320452, "grad_norm": 0.11022918671369553, "learning_rate": 3.188583214624881e-05, "loss": 0.0205, "step": 49330 }, { "epoch": 0.36471423080334703, "grad_norm": 0.08734451234340668, "learning_rate": 3.1882122507122506e-05, "loss": 0.0191, "step": 49340 }, { "epoch": 0.3647881493746489, "grad_norm": 0.0821605697274208, "learning_rate": 3.18784128679962e-05, "loss": 0.0208, "step": 49350 }, { "epoch": 0.3648620679459507, "grad_norm": 0.09052004665136337, "learning_rate": 3.18747032288699e-05, "loss": 0.0195, "step": 49360 }, { "epoch": 0.3649359865172526, "grad_norm": 0.12369329482316971, "learning_rate": 3.1870993589743594e-05, "loss": 0.022, "step": 49370 }, { "epoch": 0.36500990508855446, "grad_norm": 0.0797930434346199, "learning_rate": 3.186728395061729e-05, "loss": 0.0195, "step": 49380 }, { "epoch": 0.3650838236598563, "grad_norm": 0.11744547635316849, "learning_rate": 3.186357431149098e-05, "loss": 0.0213, "step": 49390 }, { "epoch": 0.36515774223115816, "grad_norm": 0.07720588147640228, "learning_rate": 3.1859864672364675e-05, "loss": 0.0219, "step": 49400 }, { "epoch": 0.36523166080246, "grad_norm": 0.07627621293067932, "learning_rate": 3.1856155033238364e-05, "loss": 0.0186, "step": 49410 }, { "epoch": 0.36530557937376185, "grad_norm": 0.06858126819133759, "learning_rate": 3.185244539411206e-05, "loss": 0.0157, "step": 49420 }, { "epoch": 0.3653794979450637, "grad_norm": 0.08645808696746826, "learning_rate": 3.1848735754985756e-05, "loss": 0.0206, "step": 49430 }, { "epoch": 0.3654534165163656, "grad_norm": 0.07037283480167389, "learning_rate": 3.184502611585945e-05, "loss": 0.0184, "step": 49440 }, { "epoch": 0.36552733508766744, "grad_norm": 0.09279239922761917, "learning_rate": 3.184131647673315e-05, "loss": 0.0162, "step": 49450 }, { "epoch": 0.3656012536589693, "grad_norm": 0.10505162179470062, "learning_rate": 3.183760683760684e-05, "loss": 0.0185, "step": 49460 }, { "epoch": 0.36567517223027113, "grad_norm": 0.10786660760641098, "learning_rate": 3.183389719848053e-05, "loss": 0.0195, "step": 49470 }, { "epoch": 0.365749090801573, "grad_norm": 0.08201677352190018, "learning_rate": 3.183018755935423e-05, "loss": 0.0176, "step": 49480 }, { "epoch": 0.3658230093728748, "grad_norm": 0.09974339604377747, "learning_rate": 3.182647792022792e-05, "loss": 0.0175, "step": 49490 }, { "epoch": 0.3658969279441767, "grad_norm": 0.05509033426642418, "learning_rate": 3.1822768281101614e-05, "loss": 0.0183, "step": 49500 }, { "epoch": 0.36597084651547857, "grad_norm": 0.07690016180276871, "learning_rate": 3.181905864197531e-05, "loss": 0.0176, "step": 49510 }, { "epoch": 0.3660447650867804, "grad_norm": 0.09903372079133987, "learning_rate": 3.1815349002849006e-05, "loss": 0.0195, "step": 49520 }, { "epoch": 0.36611868365808226, "grad_norm": 0.09054466336965561, "learning_rate": 3.18116393637227e-05, "loss": 0.0177, "step": 49530 }, { "epoch": 0.3661926022293841, "grad_norm": 0.06345314532518387, "learning_rate": 3.180792972459639e-05, "loss": 0.0157, "step": 49540 }, { "epoch": 0.36626652080068595, "grad_norm": 0.08934221416711807, "learning_rate": 3.180422008547009e-05, "loss": 0.0204, "step": 49550 }, { "epoch": 0.3663404393719878, "grad_norm": 0.10263720154762268, "learning_rate": 3.180051044634378e-05, "loss": 0.0176, "step": 49560 }, { "epoch": 0.3664143579432897, "grad_norm": 0.08406967669725418, "learning_rate": 3.179680080721747e-05, "loss": 0.0212, "step": 49570 }, { "epoch": 0.36648827651459154, "grad_norm": 0.10318008810281754, "learning_rate": 3.179309116809117e-05, "loss": 0.0176, "step": 49580 }, { "epoch": 0.3665621950858934, "grad_norm": 0.11742877215147018, "learning_rate": 3.1789381528964865e-05, "loss": 0.0183, "step": 49590 }, { "epoch": 0.36663611365719523, "grad_norm": 0.11578115075826645, "learning_rate": 3.178567188983856e-05, "loss": 0.0211, "step": 49600 }, { "epoch": 0.3667100322284971, "grad_norm": 0.07098636031150818, "learning_rate": 3.1781962250712256e-05, "loss": 0.0191, "step": 49610 }, { "epoch": 0.3667839507997989, "grad_norm": 0.09956346452236176, "learning_rate": 3.1778252611585946e-05, "loss": 0.0177, "step": 49620 }, { "epoch": 0.3668578693711008, "grad_norm": 0.07314296066761017, "learning_rate": 3.177454297245964e-05, "loss": 0.0186, "step": 49630 }, { "epoch": 0.36693178794240267, "grad_norm": 0.06977906078100204, "learning_rate": 3.177083333333333e-05, "loss": 0.0163, "step": 49640 }, { "epoch": 0.3670057065137045, "grad_norm": 0.07374892383813858, "learning_rate": 3.176712369420703e-05, "loss": 0.0198, "step": 49650 }, { "epoch": 0.36707962508500636, "grad_norm": 0.08748723566532135, "learning_rate": 3.176341405508072e-05, "loss": 0.0195, "step": 49660 }, { "epoch": 0.3671535436563082, "grad_norm": 0.08709771931171417, "learning_rate": 3.175970441595442e-05, "loss": 0.0168, "step": 49670 }, { "epoch": 0.36722746222761005, "grad_norm": 0.09110180288553238, "learning_rate": 3.1755994776828115e-05, "loss": 0.0203, "step": 49680 }, { "epoch": 0.3673013807989119, "grad_norm": 0.04776851460337639, "learning_rate": 3.1752285137701804e-05, "loss": 0.0189, "step": 49690 }, { "epoch": 0.3673752993702138, "grad_norm": 0.08647187799215317, "learning_rate": 3.17485754985755e-05, "loss": 0.0205, "step": 49700 }, { "epoch": 0.36744921794151564, "grad_norm": 0.08102229982614517, "learning_rate": 3.1744865859449196e-05, "loss": 0.0197, "step": 49710 }, { "epoch": 0.3675231365128175, "grad_norm": 0.11056692153215408, "learning_rate": 3.1741156220322885e-05, "loss": 0.0187, "step": 49720 }, { "epoch": 0.36759705508411933, "grad_norm": 0.16177508234977722, "learning_rate": 3.173744658119658e-05, "loss": 0.0199, "step": 49730 }, { "epoch": 0.3676709736554212, "grad_norm": 0.07288125157356262, "learning_rate": 3.173373694207028e-05, "loss": 0.0188, "step": 49740 }, { "epoch": 0.367744892226723, "grad_norm": 0.07510792464017868, "learning_rate": 3.173002730294397e-05, "loss": 0.0175, "step": 49750 }, { "epoch": 0.3678188107980249, "grad_norm": 0.10319831222295761, "learning_rate": 3.172631766381767e-05, "loss": 0.022, "step": 49760 }, { "epoch": 0.36789272936932677, "grad_norm": 0.10422424226999283, "learning_rate": 3.172260802469136e-05, "loss": 0.0174, "step": 49770 }, { "epoch": 0.3679666479406286, "grad_norm": 0.08082633465528488, "learning_rate": 3.1718898385565054e-05, "loss": 0.0174, "step": 49780 }, { "epoch": 0.36804056651193046, "grad_norm": 0.09931709617376328, "learning_rate": 3.171518874643874e-05, "loss": 0.0188, "step": 49790 }, { "epoch": 0.3681144850832323, "grad_norm": 0.07735582441091537, "learning_rate": 3.171147910731244e-05, "loss": 0.0158, "step": 49800 }, { "epoch": 0.36818840365453415, "grad_norm": 0.12492542713880539, "learning_rate": 3.1707769468186135e-05, "loss": 0.0223, "step": 49810 }, { "epoch": 0.368262322225836, "grad_norm": 0.1031833365559578, "learning_rate": 3.170405982905983e-05, "loss": 0.0224, "step": 49820 }, { "epoch": 0.3683362407971379, "grad_norm": 0.06540018320083618, "learning_rate": 3.170035018993353e-05, "loss": 0.0191, "step": 49830 }, { "epoch": 0.36841015936843974, "grad_norm": 0.07900650799274445, "learning_rate": 3.169664055080722e-05, "loss": 0.0196, "step": 49840 }, { "epoch": 0.3684840779397416, "grad_norm": 0.10357868671417236, "learning_rate": 3.169293091168091e-05, "loss": 0.0193, "step": 49850 }, { "epoch": 0.36855799651104343, "grad_norm": 0.07287750393152237, "learning_rate": 3.168922127255461e-05, "loss": 0.0162, "step": 49860 }, { "epoch": 0.3686319150823453, "grad_norm": 0.07523094117641449, "learning_rate": 3.16855116334283e-05, "loss": 0.0184, "step": 49870 }, { "epoch": 0.3687058336536471, "grad_norm": 0.09581726044416428, "learning_rate": 3.168180199430199e-05, "loss": 0.0202, "step": 49880 }, { "epoch": 0.368779752224949, "grad_norm": 0.07129789143800735, "learning_rate": 3.167809235517569e-05, "loss": 0.019, "step": 49890 }, { "epoch": 0.36885367079625087, "grad_norm": 0.07590305805206299, "learning_rate": 3.1674382716049385e-05, "loss": 0.0171, "step": 49900 }, { "epoch": 0.3689275893675527, "grad_norm": 0.09437566995620728, "learning_rate": 3.167067307692308e-05, "loss": 0.021, "step": 49910 }, { "epoch": 0.36900150793885456, "grad_norm": 0.06181221082806587, "learning_rate": 3.166696343779677e-05, "loss": 0.0199, "step": 49920 }, { "epoch": 0.3690754265101564, "grad_norm": 0.09170825034379959, "learning_rate": 3.1663253798670466e-05, "loss": 0.0188, "step": 49930 }, { "epoch": 0.36914934508145825, "grad_norm": 0.08520179986953735, "learning_rate": 3.165954415954416e-05, "loss": 0.0167, "step": 49940 }, { "epoch": 0.3692232636527601, "grad_norm": 0.07948411256074905, "learning_rate": 3.165583452041785e-05, "loss": 0.0173, "step": 49950 }, { "epoch": 0.369297182224062, "grad_norm": 0.09082730859518051, "learning_rate": 3.165212488129155e-05, "loss": 0.017, "step": 49960 }, { "epoch": 0.36937110079536384, "grad_norm": 0.07359523326158524, "learning_rate": 3.1648415242165244e-05, "loss": 0.018, "step": 49970 }, { "epoch": 0.3694450193666657, "grad_norm": 0.06806252151727676, "learning_rate": 3.164470560303894e-05, "loss": 0.0188, "step": 49980 }, { "epoch": 0.36951893793796753, "grad_norm": 0.10926640778779984, "learning_rate": 3.1640995963912635e-05, "loss": 0.0186, "step": 49990 }, { "epoch": 0.3695928565092694, "grad_norm": 0.09719149023294449, "learning_rate": 3.1637286324786325e-05, "loss": 0.0194, "step": 50000 }, { "epoch": 0.3695928565092694, "eval_f1": 0.6127174177207287, "eval_loss": 0.018170252442359924, "eval_precision": 0.48591030292742615, "eval_recall": 0.8290812108924811, "eval_runtime": 2665.6263, "eval_samples_per_second": 203.004, "eval_steps_per_second": 3.172, "step": 50000 }, { "epoch": 0.3696667750805712, "grad_norm": 0.06750772893428802, "learning_rate": 3.163357668566002e-05, "loss": 0.0181, "step": 50010 }, { "epoch": 0.3697406936518731, "grad_norm": 0.13104230165481567, "learning_rate": 3.162986704653371e-05, "loss": 0.0194, "step": 50020 }, { "epoch": 0.36981461222317497, "grad_norm": 0.07477361708879471, "learning_rate": 3.1626157407407406e-05, "loss": 0.0161, "step": 50030 }, { "epoch": 0.3698885307944768, "grad_norm": 0.09455181658267975, "learning_rate": 3.16224477682811e-05, "loss": 0.0168, "step": 50040 }, { "epoch": 0.36996244936577866, "grad_norm": 0.0634753629565239, "learning_rate": 3.16187381291548e-05, "loss": 0.0178, "step": 50050 }, { "epoch": 0.3700363679370805, "grad_norm": 0.10524077713489532, "learning_rate": 3.1615028490028494e-05, "loss": 0.0176, "step": 50060 }, { "epoch": 0.37011028650838235, "grad_norm": 0.07643004506826401, "learning_rate": 3.161131885090219e-05, "loss": 0.0187, "step": 50070 }, { "epoch": 0.37018420507968425, "grad_norm": 0.10781189054250717, "learning_rate": 3.160760921177588e-05, "loss": 0.0161, "step": 50080 }, { "epoch": 0.3702581236509861, "grad_norm": 0.0713755190372467, "learning_rate": 3.1603899572649575e-05, "loss": 0.0202, "step": 50090 }, { "epoch": 0.37033204222228794, "grad_norm": 0.07827604562044144, "learning_rate": 3.1600189933523264e-05, "loss": 0.0165, "step": 50100 }, { "epoch": 0.3704059607935898, "grad_norm": 0.0943358838558197, "learning_rate": 3.159648029439696e-05, "loss": 0.0191, "step": 50110 }, { "epoch": 0.37047987936489163, "grad_norm": 0.07688167691230774, "learning_rate": 3.1592770655270656e-05, "loss": 0.0207, "step": 50120 }, { "epoch": 0.3705537979361935, "grad_norm": 0.1343929022550583, "learning_rate": 3.158906101614435e-05, "loss": 0.019, "step": 50130 }, { "epoch": 0.3706277165074953, "grad_norm": 0.10538404434919357, "learning_rate": 3.158535137701805e-05, "loss": 0.0221, "step": 50140 }, { "epoch": 0.3707016350787972, "grad_norm": 0.13039255142211914, "learning_rate": 3.158164173789174e-05, "loss": 0.0195, "step": 50150 }, { "epoch": 0.37077555365009907, "grad_norm": 0.07820204645395279, "learning_rate": 3.157793209876543e-05, "loss": 0.0164, "step": 50160 }, { "epoch": 0.3708494722214009, "grad_norm": 0.09720060974359512, "learning_rate": 3.157422245963913e-05, "loss": 0.0199, "step": 50170 }, { "epoch": 0.37092339079270276, "grad_norm": 0.10176794975996017, "learning_rate": 3.157051282051282e-05, "loss": 0.017, "step": 50180 }, { "epoch": 0.3709973093640046, "grad_norm": 0.06773918122053146, "learning_rate": 3.1566803181386514e-05, "loss": 0.0199, "step": 50190 }, { "epoch": 0.37107122793530645, "grad_norm": 0.07437018305063248, "learning_rate": 3.156309354226021e-05, "loss": 0.0172, "step": 50200 }, { "epoch": 0.37114514650660835, "grad_norm": 0.08163496106863022, "learning_rate": 3.1559383903133906e-05, "loss": 0.0158, "step": 50210 }, { "epoch": 0.3712190650779102, "grad_norm": 0.0832415372133255, "learning_rate": 3.15556742640076e-05, "loss": 0.0187, "step": 50220 }, { "epoch": 0.37129298364921204, "grad_norm": 0.09557832777500153, "learning_rate": 3.155196462488129e-05, "loss": 0.0195, "step": 50230 }, { "epoch": 0.3713669022205139, "grad_norm": 0.06662998348474503, "learning_rate": 3.154825498575499e-05, "loss": 0.0179, "step": 50240 }, { "epoch": 0.37144082079181573, "grad_norm": 0.10676044225692749, "learning_rate": 3.1544545346628676e-05, "loss": 0.0204, "step": 50250 }, { "epoch": 0.3715147393631176, "grad_norm": 0.1034659594297409, "learning_rate": 3.154083570750237e-05, "loss": 0.0197, "step": 50260 }, { "epoch": 0.3715886579344194, "grad_norm": 0.08106374740600586, "learning_rate": 3.153712606837607e-05, "loss": 0.0199, "step": 50270 }, { "epoch": 0.3716625765057213, "grad_norm": 0.08168378472328186, "learning_rate": 3.1533416429249764e-05, "loss": 0.0192, "step": 50280 }, { "epoch": 0.37173649507702317, "grad_norm": 0.08763410896062851, "learning_rate": 3.152970679012346e-05, "loss": 0.0212, "step": 50290 }, { "epoch": 0.371810413648325, "grad_norm": 0.1779336780309677, "learning_rate": 3.1525997150997156e-05, "loss": 0.019, "step": 50300 }, { "epoch": 0.37188433221962686, "grad_norm": 0.05893019586801529, "learning_rate": 3.1522287511870845e-05, "loss": 0.0187, "step": 50310 }, { "epoch": 0.3719582507909287, "grad_norm": 0.08330386877059937, "learning_rate": 3.151857787274454e-05, "loss": 0.0204, "step": 50320 }, { "epoch": 0.37203216936223055, "grad_norm": 0.0755939707159996, "learning_rate": 3.151486823361823e-05, "loss": 0.0194, "step": 50330 }, { "epoch": 0.37210608793353245, "grad_norm": 0.08258412033319473, "learning_rate": 3.1511158594491927e-05, "loss": 0.0188, "step": 50340 }, { "epoch": 0.3721800065048343, "grad_norm": 0.09173570573329926, "learning_rate": 3.150744895536563e-05, "loss": 0.0174, "step": 50350 }, { "epoch": 0.37225392507613614, "grad_norm": 0.06343158334493637, "learning_rate": 3.150373931623932e-05, "loss": 0.0153, "step": 50360 }, { "epoch": 0.372327843647438, "grad_norm": 0.075467549264431, "learning_rate": 3.1500029677113014e-05, "loss": 0.0198, "step": 50370 }, { "epoch": 0.37240176221873983, "grad_norm": 0.06660743802785873, "learning_rate": 3.1496320037986704e-05, "loss": 0.0171, "step": 50380 }, { "epoch": 0.3724756807900417, "grad_norm": 0.07386814802885056, "learning_rate": 3.14926103988604e-05, "loss": 0.0171, "step": 50390 }, { "epoch": 0.3725495993613435, "grad_norm": 0.09839634597301483, "learning_rate": 3.1488900759734096e-05, "loss": 0.0195, "step": 50400 }, { "epoch": 0.3726235179326454, "grad_norm": 0.0697672888636589, "learning_rate": 3.1485191120607785e-05, "loss": 0.0207, "step": 50410 }, { "epoch": 0.37269743650394727, "grad_norm": 0.12948425114154816, "learning_rate": 3.148148148148148e-05, "loss": 0.019, "step": 50420 }, { "epoch": 0.3727713550752491, "grad_norm": 0.0835069864988327, "learning_rate": 3.147777184235518e-05, "loss": 0.0195, "step": 50430 }, { "epoch": 0.37284527364655096, "grad_norm": 0.07875664532184601, "learning_rate": 3.147406220322887e-05, "loss": 0.018, "step": 50440 }, { "epoch": 0.3729191922178528, "grad_norm": 0.09094227105379105, "learning_rate": 3.147035256410257e-05, "loss": 0.0164, "step": 50450 }, { "epoch": 0.37299311078915465, "grad_norm": 0.07479475438594818, "learning_rate": 3.146664292497626e-05, "loss": 0.0176, "step": 50460 }, { "epoch": 0.37306702936045655, "grad_norm": 0.0831725150346756, "learning_rate": 3.1462933285849954e-05, "loss": 0.0189, "step": 50470 }, { "epoch": 0.3731409479317584, "grad_norm": 0.08730162680149078, "learning_rate": 3.145922364672364e-05, "loss": 0.021, "step": 50480 }, { "epoch": 0.37321486650306024, "grad_norm": 0.06646716594696045, "learning_rate": 3.145551400759734e-05, "loss": 0.0182, "step": 50490 }, { "epoch": 0.3732887850743621, "grad_norm": 0.07522834837436676, "learning_rate": 3.145180436847104e-05, "loss": 0.0185, "step": 50500 }, { "epoch": 0.37336270364566393, "grad_norm": 0.09267633408308029, "learning_rate": 3.144809472934473e-05, "loss": 0.0163, "step": 50510 }, { "epoch": 0.3734366222169658, "grad_norm": 0.07390173524618149, "learning_rate": 3.144438509021843e-05, "loss": 0.019, "step": 50520 }, { "epoch": 0.3735105407882676, "grad_norm": 0.10241732001304626, "learning_rate": 3.144067545109212e-05, "loss": 0.0175, "step": 50530 }, { "epoch": 0.3735844593595695, "grad_norm": 0.07036875188350677, "learning_rate": 3.143696581196581e-05, "loss": 0.0195, "step": 50540 }, { "epoch": 0.37365837793087137, "grad_norm": 0.09159451723098755, "learning_rate": 3.143325617283951e-05, "loss": 0.0177, "step": 50550 }, { "epoch": 0.3737322965021732, "grad_norm": 0.07133959978818893, "learning_rate": 3.14295465337132e-05, "loss": 0.0196, "step": 50560 }, { "epoch": 0.37380621507347506, "grad_norm": 0.07279335707426071, "learning_rate": 3.142583689458689e-05, "loss": 0.0189, "step": 50570 }, { "epoch": 0.3738801336447769, "grad_norm": 0.13607998192310333, "learning_rate": 3.1422127255460596e-05, "loss": 0.0176, "step": 50580 }, { "epoch": 0.37395405221607875, "grad_norm": 0.07464282959699631, "learning_rate": 3.1418417616334285e-05, "loss": 0.0187, "step": 50590 }, { "epoch": 0.37402797078738065, "grad_norm": 0.08845172077417374, "learning_rate": 3.141470797720798e-05, "loss": 0.0196, "step": 50600 }, { "epoch": 0.3741018893586825, "grad_norm": 0.10331210494041443, "learning_rate": 3.141099833808167e-05, "loss": 0.0186, "step": 50610 }, { "epoch": 0.37417580792998434, "grad_norm": 0.06014440208673477, "learning_rate": 3.1407288698955366e-05, "loss": 0.0191, "step": 50620 }, { "epoch": 0.3742497265012862, "grad_norm": 0.09451748430728912, "learning_rate": 3.140357905982906e-05, "loss": 0.0167, "step": 50630 }, { "epoch": 0.37432364507258803, "grad_norm": 0.07870625704526901, "learning_rate": 3.139986942070275e-05, "loss": 0.0199, "step": 50640 }, { "epoch": 0.3743975636438899, "grad_norm": 0.0743972659111023, "learning_rate": 3.1396159781576454e-05, "loss": 0.018, "step": 50650 }, { "epoch": 0.3744714822151917, "grad_norm": 0.08390262722969055, "learning_rate": 3.139245014245014e-05, "loss": 0.0196, "step": 50660 }, { "epoch": 0.3745454007864936, "grad_norm": 0.10581986606121063, "learning_rate": 3.138874050332384e-05, "loss": 0.0177, "step": 50670 }, { "epoch": 0.37461931935779547, "grad_norm": 0.07332141697406769, "learning_rate": 3.1385030864197535e-05, "loss": 0.0173, "step": 50680 }, { "epoch": 0.3746932379290973, "grad_norm": 0.06791075319051743, "learning_rate": 3.1381321225071224e-05, "loss": 0.0213, "step": 50690 }, { "epoch": 0.37476715650039916, "grad_norm": 0.0779709741473198, "learning_rate": 3.137761158594492e-05, "loss": 0.0196, "step": 50700 }, { "epoch": 0.374841075071701, "grad_norm": 0.06821703165769577, "learning_rate": 3.137390194681861e-05, "loss": 0.0172, "step": 50710 }, { "epoch": 0.37491499364300285, "grad_norm": 0.07088646292686462, "learning_rate": 3.1370192307692306e-05, "loss": 0.0178, "step": 50720 }, { "epoch": 0.37498891221430475, "grad_norm": 0.09667670726776123, "learning_rate": 3.136648266856601e-05, "loss": 0.0187, "step": 50730 }, { "epoch": 0.3750628307856066, "grad_norm": 0.0873975083231926, "learning_rate": 3.13627730294397e-05, "loss": 0.0212, "step": 50740 }, { "epoch": 0.37513674935690844, "grad_norm": 0.10300181061029434, "learning_rate": 3.1359063390313393e-05, "loss": 0.0185, "step": 50750 }, { "epoch": 0.3752106679282103, "grad_norm": 0.16665935516357422, "learning_rate": 3.135535375118709e-05, "loss": 0.0179, "step": 50760 }, { "epoch": 0.37528458649951213, "grad_norm": 0.10472545772790909, "learning_rate": 3.135164411206078e-05, "loss": 0.0169, "step": 50770 }, { "epoch": 0.375358505070814, "grad_norm": 0.08101044595241547, "learning_rate": 3.1347934472934475e-05, "loss": 0.0174, "step": 50780 }, { "epoch": 0.3754324236421158, "grad_norm": 0.09952845424413681, "learning_rate": 3.1344224833808164e-05, "loss": 0.0192, "step": 50790 }, { "epoch": 0.3755063422134177, "grad_norm": 0.09925219416618347, "learning_rate": 3.1340515194681866e-05, "loss": 0.0175, "step": 50800 }, { "epoch": 0.37558026078471957, "grad_norm": 0.07916640490293503, "learning_rate": 3.133680555555556e-05, "loss": 0.0183, "step": 50810 }, { "epoch": 0.3756541793560214, "grad_norm": 0.08429214358329773, "learning_rate": 3.133309591642925e-05, "loss": 0.0168, "step": 50820 }, { "epoch": 0.37572809792732326, "grad_norm": 0.08063418418169022, "learning_rate": 3.132938627730295e-05, "loss": 0.0172, "step": 50830 }, { "epoch": 0.3758020164986251, "grad_norm": 0.07345176488161087, "learning_rate": 3.132567663817664e-05, "loss": 0.0187, "step": 50840 }, { "epoch": 0.37587593506992695, "grad_norm": 0.07181081175804138, "learning_rate": 3.132196699905033e-05, "loss": 0.0185, "step": 50850 }, { "epoch": 0.37594985364122885, "grad_norm": 0.0684572383761406, "learning_rate": 3.131825735992403e-05, "loss": 0.0179, "step": 50860 }, { "epoch": 0.3760237722125307, "grad_norm": 0.07158481329679489, "learning_rate": 3.131454772079772e-05, "loss": 0.0178, "step": 50870 }, { "epoch": 0.37609769078383254, "grad_norm": 0.08474763482809067, "learning_rate": 3.131083808167142e-05, "loss": 0.0188, "step": 50880 }, { "epoch": 0.3761716093551344, "grad_norm": 0.10242009907960892, "learning_rate": 3.130712844254511e-05, "loss": 0.0212, "step": 50890 }, { "epoch": 0.37624552792643623, "grad_norm": 0.11419926583766937, "learning_rate": 3.1303418803418806e-05, "loss": 0.0198, "step": 50900 }, { "epoch": 0.3763194464977381, "grad_norm": 0.07014594227075577, "learning_rate": 3.12997091642925e-05, "loss": 0.0187, "step": 50910 }, { "epoch": 0.3763933650690399, "grad_norm": 0.09786113351583481, "learning_rate": 3.129599952516619e-05, "loss": 0.0184, "step": 50920 }, { "epoch": 0.3764672836403418, "grad_norm": 0.0822451189160347, "learning_rate": 3.129228988603989e-05, "loss": 0.0177, "step": 50930 }, { "epoch": 0.37654120221164367, "grad_norm": 0.09000653773546219, "learning_rate": 3.1288580246913576e-05, "loss": 0.0201, "step": 50940 }, { "epoch": 0.3766151207829455, "grad_norm": 0.11525629460811615, "learning_rate": 3.128487060778728e-05, "loss": 0.0199, "step": 50950 }, { "epoch": 0.37668903935424736, "grad_norm": 0.08533230423927307, "learning_rate": 3.1281160968660975e-05, "loss": 0.0184, "step": 50960 }, { "epoch": 0.3767629579255492, "grad_norm": 0.1094772070646286, "learning_rate": 3.1277451329534664e-05, "loss": 0.0199, "step": 50970 }, { "epoch": 0.37683687649685105, "grad_norm": 0.06830190867185593, "learning_rate": 3.127374169040836e-05, "loss": 0.0168, "step": 50980 }, { "epoch": 0.37691079506815295, "grad_norm": 0.09465599805116653, "learning_rate": 3.1270032051282056e-05, "loss": 0.0198, "step": 50990 }, { "epoch": 0.3769847136394548, "grad_norm": 0.07513085007667542, "learning_rate": 3.1266322412155745e-05, "loss": 0.0185, "step": 51000 }, { "epoch": 0.37705863221075664, "grad_norm": 0.06580290198326111, "learning_rate": 3.126261277302944e-05, "loss": 0.0201, "step": 51010 }, { "epoch": 0.3771325507820585, "grad_norm": 0.10376956313848495, "learning_rate": 3.125890313390313e-05, "loss": 0.0182, "step": 51020 }, { "epoch": 0.37720646935336033, "grad_norm": 0.11452928185462952, "learning_rate": 3.125519349477683e-05, "loss": 0.0215, "step": 51030 }, { "epoch": 0.3772803879246622, "grad_norm": 0.08027191460132599, "learning_rate": 3.125148385565053e-05, "loss": 0.0195, "step": 51040 }, { "epoch": 0.377354306495964, "grad_norm": 0.10047151148319244, "learning_rate": 3.124777421652422e-05, "loss": 0.0197, "step": 51050 }, { "epoch": 0.3774282250672659, "grad_norm": 0.08046058565378189, "learning_rate": 3.1244064577397914e-05, "loss": 0.0186, "step": 51060 }, { "epoch": 0.37750214363856777, "grad_norm": 0.06543967872858047, "learning_rate": 3.1240354938271603e-05, "loss": 0.0184, "step": 51070 }, { "epoch": 0.3775760622098696, "grad_norm": 0.0672696903347969, "learning_rate": 3.12366452991453e-05, "loss": 0.0197, "step": 51080 }, { "epoch": 0.37764998078117146, "grad_norm": 0.08534809947013855, "learning_rate": 3.1232935660018995e-05, "loss": 0.0174, "step": 51090 }, { "epoch": 0.3777238993524733, "grad_norm": 0.11928018182516098, "learning_rate": 3.1229226020892685e-05, "loss": 0.0186, "step": 51100 }, { "epoch": 0.37779781792377515, "grad_norm": 0.10310760885477066, "learning_rate": 3.122551638176639e-05, "loss": 0.0179, "step": 51110 }, { "epoch": 0.37787173649507705, "grad_norm": 0.08315908908843994, "learning_rate": 3.1221806742640076e-05, "loss": 0.0187, "step": 51120 }, { "epoch": 0.3779456550663789, "grad_norm": 0.11481058597564697, "learning_rate": 3.121809710351377e-05, "loss": 0.0205, "step": 51130 }, { "epoch": 0.37801957363768074, "grad_norm": 0.07198639214038849, "learning_rate": 3.121438746438747e-05, "loss": 0.016, "step": 51140 }, { "epoch": 0.3780934922089826, "grad_norm": 0.09661037474870682, "learning_rate": 3.121067782526116e-05, "loss": 0.018, "step": 51150 }, { "epoch": 0.37816741078028443, "grad_norm": 0.08421141654253006, "learning_rate": 3.1206968186134854e-05, "loss": 0.0195, "step": 51160 }, { "epoch": 0.3782413293515863, "grad_norm": 0.07664620876312256, "learning_rate": 3.120325854700854e-05, "loss": 0.0207, "step": 51170 }, { "epoch": 0.3783152479228881, "grad_norm": 0.07323388755321503, "learning_rate": 3.1199548907882245e-05, "loss": 0.0181, "step": 51180 }, { "epoch": 0.37838916649419, "grad_norm": 0.09078080207109451, "learning_rate": 3.119583926875594e-05, "loss": 0.0192, "step": 51190 }, { "epoch": 0.37846308506549187, "grad_norm": 0.13930262625217438, "learning_rate": 3.119212962962963e-05, "loss": 0.0175, "step": 51200 }, { "epoch": 0.3785370036367937, "grad_norm": 0.0816483274102211, "learning_rate": 3.1188419990503327e-05, "loss": 0.0191, "step": 51210 }, { "epoch": 0.37861092220809556, "grad_norm": 0.10349602997303009, "learning_rate": 3.118471035137702e-05, "loss": 0.0184, "step": 51220 }, { "epoch": 0.3786848407793974, "grad_norm": 0.06943171471357346, "learning_rate": 3.118100071225071e-05, "loss": 0.0194, "step": 51230 }, { "epoch": 0.37875875935069925, "grad_norm": 0.11047179996967316, "learning_rate": 3.117729107312441e-05, "loss": 0.0176, "step": 51240 }, { "epoch": 0.37883267792200115, "grad_norm": 0.13045692443847656, "learning_rate": 3.11735814339981e-05, "loss": 0.0202, "step": 51250 }, { "epoch": 0.378906596493303, "grad_norm": 0.10083399713039398, "learning_rate": 3.11698717948718e-05, "loss": 0.0187, "step": 51260 }, { "epoch": 0.37898051506460484, "grad_norm": 0.09727805107831955, "learning_rate": 3.1166162155745496e-05, "loss": 0.0176, "step": 51270 }, { "epoch": 0.3790544336359067, "grad_norm": 0.08823196589946747, "learning_rate": 3.1162452516619185e-05, "loss": 0.019, "step": 51280 }, { "epoch": 0.37912835220720853, "grad_norm": 0.09118539839982986, "learning_rate": 3.115874287749288e-05, "loss": 0.0187, "step": 51290 }, { "epoch": 0.3792022707785104, "grad_norm": 0.09754306823015213, "learning_rate": 3.115503323836657e-05, "loss": 0.0173, "step": 51300 }, { "epoch": 0.3792761893498122, "grad_norm": 0.08700606226921082, "learning_rate": 3.1151323599240266e-05, "loss": 0.0218, "step": 51310 }, { "epoch": 0.3793501079211141, "grad_norm": 0.10025543719530106, "learning_rate": 3.114761396011396e-05, "loss": 0.0181, "step": 51320 }, { "epoch": 0.37942402649241597, "grad_norm": 0.0970856323838234, "learning_rate": 3.114390432098766e-05, "loss": 0.0196, "step": 51330 }, { "epoch": 0.3794979450637178, "grad_norm": 0.08487201482057571, "learning_rate": 3.1140194681861354e-05, "loss": 0.0178, "step": 51340 }, { "epoch": 0.37957186363501966, "grad_norm": 0.10383712500333786, "learning_rate": 3.113648504273504e-05, "loss": 0.0186, "step": 51350 }, { "epoch": 0.3796457822063215, "grad_norm": 0.07665007561445236, "learning_rate": 3.113277540360874e-05, "loss": 0.0181, "step": 51360 }, { "epoch": 0.37971970077762335, "grad_norm": 0.10688511282205582, "learning_rate": 3.1129065764482435e-05, "loss": 0.0202, "step": 51370 }, { "epoch": 0.37979361934892525, "grad_norm": 0.12255487591028214, "learning_rate": 3.1125356125356124e-05, "loss": 0.0173, "step": 51380 }, { "epoch": 0.3798675379202271, "grad_norm": 0.12634336948394775, "learning_rate": 3.112164648622982e-05, "loss": 0.0175, "step": 51390 }, { "epoch": 0.37994145649152894, "grad_norm": 0.09347711503505707, "learning_rate": 3.111793684710351e-05, "loss": 0.0184, "step": 51400 }, { "epoch": 0.3800153750628308, "grad_norm": 0.10192801058292389, "learning_rate": 3.111422720797721e-05, "loss": 0.0202, "step": 51410 }, { "epoch": 0.38008929363413263, "grad_norm": 0.09505399316549301, "learning_rate": 3.111051756885091e-05, "loss": 0.0166, "step": 51420 }, { "epoch": 0.3801632122054345, "grad_norm": 0.10079313069581985, "learning_rate": 3.11068079297246e-05, "loss": 0.0196, "step": 51430 }, { "epoch": 0.3802371307767363, "grad_norm": 0.08078725636005402, "learning_rate": 3.110309829059829e-05, "loss": 0.0184, "step": 51440 }, { "epoch": 0.3803110493480382, "grad_norm": 0.08401284366846085, "learning_rate": 3.109938865147199e-05, "loss": 0.0168, "step": 51450 }, { "epoch": 0.38038496791934007, "grad_norm": 0.08584748953580856, "learning_rate": 3.109567901234568e-05, "loss": 0.0203, "step": 51460 }, { "epoch": 0.3804588864906419, "grad_norm": 0.07659079879522324, "learning_rate": 3.1091969373219374e-05, "loss": 0.0186, "step": 51470 }, { "epoch": 0.38053280506194376, "grad_norm": 0.11942635476589203, "learning_rate": 3.108825973409307e-05, "loss": 0.0183, "step": 51480 }, { "epoch": 0.3806067236332456, "grad_norm": 0.10971416532993317, "learning_rate": 3.1084550094966766e-05, "loss": 0.0171, "step": 51490 }, { "epoch": 0.38068064220454745, "grad_norm": 0.10077308118343353, "learning_rate": 3.108084045584046e-05, "loss": 0.0182, "step": 51500 }, { "epoch": 0.38075456077584935, "grad_norm": 0.0781073048710823, "learning_rate": 3.107713081671415e-05, "loss": 0.0181, "step": 51510 }, { "epoch": 0.3808284793471512, "grad_norm": 0.156025230884552, "learning_rate": 3.107342117758785e-05, "loss": 0.0191, "step": 51520 }, { "epoch": 0.38090239791845304, "grad_norm": 0.10990814119577408, "learning_rate": 3.1069711538461537e-05, "loss": 0.018, "step": 51530 }, { "epoch": 0.3809763164897549, "grad_norm": 0.10066544264554977, "learning_rate": 3.106600189933523e-05, "loss": 0.0194, "step": 51540 }, { "epoch": 0.38105023506105673, "grad_norm": 0.08037177473306656, "learning_rate": 3.106229226020893e-05, "loss": 0.0208, "step": 51550 }, { "epoch": 0.3811241536323586, "grad_norm": 0.0665728896856308, "learning_rate": 3.1058582621082624e-05, "loss": 0.0185, "step": 51560 }, { "epoch": 0.3811980722036604, "grad_norm": 0.07329696416854858, "learning_rate": 3.105487298195632e-05, "loss": 0.02, "step": 51570 }, { "epoch": 0.3812719907749623, "grad_norm": 0.07585390657186508, "learning_rate": 3.105116334283001e-05, "loss": 0.0192, "step": 51580 }, { "epoch": 0.38134590934626417, "grad_norm": 0.07885745167732239, "learning_rate": 3.1047453703703706e-05, "loss": 0.017, "step": 51590 }, { "epoch": 0.381419827917566, "grad_norm": 0.18103165924549103, "learning_rate": 3.10437440645774e-05, "loss": 0.0157, "step": 51600 }, { "epoch": 0.38149374648886786, "grad_norm": 0.07363313436508179, "learning_rate": 3.104003442545109e-05, "loss": 0.0169, "step": 51610 }, { "epoch": 0.3815676650601697, "grad_norm": 0.11146856844425201, "learning_rate": 3.103632478632479e-05, "loss": 0.0192, "step": 51620 }, { "epoch": 0.38164158363147155, "grad_norm": 0.14162760972976685, "learning_rate": 3.103261514719848e-05, "loss": 0.0204, "step": 51630 }, { "epoch": 0.38171550220277345, "grad_norm": 0.1353944092988968, "learning_rate": 3.102890550807218e-05, "loss": 0.018, "step": 51640 }, { "epoch": 0.3817894207740753, "grad_norm": 0.10039281100034714, "learning_rate": 3.1025195868945875e-05, "loss": 0.0181, "step": 51650 }, { "epoch": 0.38186333934537714, "grad_norm": 0.08758821338415146, "learning_rate": 3.1021486229819564e-05, "loss": 0.0198, "step": 51660 }, { "epoch": 0.381937257916679, "grad_norm": 0.08009107410907745, "learning_rate": 3.101777659069326e-05, "loss": 0.0217, "step": 51670 }, { "epoch": 0.38201117648798083, "grad_norm": 0.07778944820165634, "learning_rate": 3.1014066951566956e-05, "loss": 0.0172, "step": 51680 }, { "epoch": 0.3820850950592827, "grad_norm": 0.09274286776781082, "learning_rate": 3.1010357312440645e-05, "loss": 0.0179, "step": 51690 }, { "epoch": 0.3821590136305845, "grad_norm": 0.08399894088506699, "learning_rate": 3.100664767331434e-05, "loss": 0.0183, "step": 51700 }, { "epoch": 0.3822329322018864, "grad_norm": 0.12259208410978317, "learning_rate": 3.100293803418804e-05, "loss": 0.0181, "step": 51710 }, { "epoch": 0.38230685077318827, "grad_norm": 0.06090375408530235, "learning_rate": 3.099922839506173e-05, "loss": 0.0198, "step": 51720 }, { "epoch": 0.3823807693444901, "grad_norm": 0.09268014878034592, "learning_rate": 3.099551875593543e-05, "loss": 0.0191, "step": 51730 }, { "epoch": 0.38245468791579196, "grad_norm": 0.09483334422111511, "learning_rate": 3.099180911680912e-05, "loss": 0.0176, "step": 51740 }, { "epoch": 0.3825286064870938, "grad_norm": 0.07935747504234314, "learning_rate": 3.0988099477682814e-05, "loss": 0.0184, "step": 51750 }, { "epoch": 0.38260252505839565, "grad_norm": 0.08421669155359268, "learning_rate": 3.09843898385565e-05, "loss": 0.0192, "step": 51760 }, { "epoch": 0.38267644362969755, "grad_norm": 0.06987878680229187, "learning_rate": 3.09806801994302e-05, "loss": 0.0177, "step": 51770 }, { "epoch": 0.3827503622009994, "grad_norm": 0.09059000015258789, "learning_rate": 3.0976970560303895e-05, "loss": 0.0181, "step": 51780 }, { "epoch": 0.38282428077230124, "grad_norm": 0.12444675713777542, "learning_rate": 3.097326092117759e-05, "loss": 0.0189, "step": 51790 }, { "epoch": 0.3828981993436031, "grad_norm": 0.08249559253454208, "learning_rate": 3.096955128205129e-05, "loss": 0.0184, "step": 51800 }, { "epoch": 0.38297211791490493, "grad_norm": 0.056822020560503006, "learning_rate": 3.0965841642924976e-05, "loss": 0.0166, "step": 51810 }, { "epoch": 0.3830460364862068, "grad_norm": 0.092983677983284, "learning_rate": 3.096213200379867e-05, "loss": 0.0194, "step": 51820 }, { "epoch": 0.3831199550575086, "grad_norm": 0.10161788016557693, "learning_rate": 3.095842236467237e-05, "loss": 0.0178, "step": 51830 }, { "epoch": 0.3831938736288105, "grad_norm": 0.09972761571407318, "learning_rate": 3.095471272554606e-05, "loss": 0.018, "step": 51840 }, { "epoch": 0.38326779220011237, "grad_norm": 0.05881828814744949, "learning_rate": 3.095100308641975e-05, "loss": 0.018, "step": 51850 }, { "epoch": 0.3833417107714142, "grad_norm": 0.1028822734951973, "learning_rate": 3.094729344729345e-05, "loss": 0.0179, "step": 51860 }, { "epoch": 0.38341562934271606, "grad_norm": 0.07917283475399017, "learning_rate": 3.0943583808167145e-05, "loss": 0.0198, "step": 51870 }, { "epoch": 0.3834895479140179, "grad_norm": 0.08172168582677841, "learning_rate": 3.093987416904084e-05, "loss": 0.0183, "step": 51880 }, { "epoch": 0.38356346648531975, "grad_norm": 0.06272972375154495, "learning_rate": 3.093616452991453e-05, "loss": 0.0188, "step": 51890 }, { "epoch": 0.38363738505662165, "grad_norm": 0.093136265873909, "learning_rate": 3.0932454890788226e-05, "loss": 0.0203, "step": 51900 }, { "epoch": 0.3837113036279235, "grad_norm": 0.0781591534614563, "learning_rate": 3.092874525166192e-05, "loss": 0.0197, "step": 51910 }, { "epoch": 0.38378522219922534, "grad_norm": 0.08194848895072937, "learning_rate": 3.092503561253561e-05, "loss": 0.0194, "step": 51920 }, { "epoch": 0.3838591407705272, "grad_norm": 0.06483061611652374, "learning_rate": 3.092132597340931e-05, "loss": 0.0175, "step": 51930 }, { "epoch": 0.38393305934182903, "grad_norm": 0.09434029459953308, "learning_rate": 3.0917616334283003e-05, "loss": 0.0216, "step": 51940 }, { "epoch": 0.3840069779131309, "grad_norm": 0.09067126363515854, "learning_rate": 3.09139066951567e-05, "loss": 0.0207, "step": 51950 }, { "epoch": 0.3840808964844328, "grad_norm": 0.10259506106376648, "learning_rate": 3.0910197056030395e-05, "loss": 0.0221, "step": 51960 }, { "epoch": 0.3841548150557346, "grad_norm": 0.11004582047462463, "learning_rate": 3.0906487416904085e-05, "loss": 0.0194, "step": 51970 }, { "epoch": 0.38422873362703647, "grad_norm": 0.09257286041975021, "learning_rate": 3.090277777777778e-05, "loss": 0.0182, "step": 51980 }, { "epoch": 0.3843026521983383, "grad_norm": 0.07792217284440994, "learning_rate": 3.089906813865147e-05, "loss": 0.0188, "step": 51990 }, { "epoch": 0.38437657076964016, "grad_norm": 0.0765538364648819, "learning_rate": 3.0895358499525166e-05, "loss": 0.0185, "step": 52000 }, { "epoch": 0.384450489340942, "grad_norm": 0.09007059782743454, "learning_rate": 3.089164886039886e-05, "loss": 0.0203, "step": 52010 }, { "epoch": 0.38452440791224385, "grad_norm": 0.08688057959079742, "learning_rate": 3.088793922127256e-05, "loss": 0.0184, "step": 52020 }, { "epoch": 0.38459832648354575, "grad_norm": 0.08209887892007828, "learning_rate": 3.0884229582146254e-05, "loss": 0.0177, "step": 52030 }, { "epoch": 0.3846722450548476, "grad_norm": 0.07236919552087784, "learning_rate": 3.088051994301994e-05, "loss": 0.0171, "step": 52040 }, { "epoch": 0.38474616362614944, "grad_norm": 0.08763102442026138, "learning_rate": 3.087681030389364e-05, "loss": 0.018, "step": 52050 }, { "epoch": 0.3848200821974513, "grad_norm": 0.07670623809099197, "learning_rate": 3.0873100664767335e-05, "loss": 0.0179, "step": 52060 }, { "epoch": 0.38489400076875313, "grad_norm": 0.10598905384540558, "learning_rate": 3.0869391025641024e-05, "loss": 0.0188, "step": 52070 }, { "epoch": 0.384967919340055, "grad_norm": 0.08035948127508163, "learning_rate": 3.086568138651472e-05, "loss": 0.0157, "step": 52080 }, { "epoch": 0.3850418379113569, "grad_norm": 0.09968321025371552, "learning_rate": 3.0861971747388416e-05, "loss": 0.0205, "step": 52090 }, { "epoch": 0.3851157564826587, "grad_norm": 0.08303764462471008, "learning_rate": 3.085826210826211e-05, "loss": 0.0196, "step": 52100 }, { "epoch": 0.38518967505396057, "grad_norm": 0.09184552729129791, "learning_rate": 3.085455246913581e-05, "loss": 0.0189, "step": 52110 }, { "epoch": 0.3852635936252624, "grad_norm": 0.07583629339933395, "learning_rate": 3.08508428300095e-05, "loss": 0.0219, "step": 52120 }, { "epoch": 0.38533751219656426, "grad_norm": 0.06147291883826256, "learning_rate": 3.084713319088319e-05, "loss": 0.0174, "step": 52130 }, { "epoch": 0.3854114307678661, "grad_norm": 0.09958159178495407, "learning_rate": 3.084342355175689e-05, "loss": 0.0172, "step": 52140 }, { "epoch": 0.38548534933916795, "grad_norm": 0.11025216430425644, "learning_rate": 3.083971391263058e-05, "loss": 0.019, "step": 52150 }, { "epoch": 0.38555926791046985, "grad_norm": 0.08332915604114532, "learning_rate": 3.0836004273504274e-05, "loss": 0.0196, "step": 52160 }, { "epoch": 0.3856331864817717, "grad_norm": 0.10837563872337341, "learning_rate": 3.083229463437797e-05, "loss": 0.0192, "step": 52170 }, { "epoch": 0.38570710505307354, "grad_norm": 0.08369705826044083, "learning_rate": 3.0828584995251666e-05, "loss": 0.0158, "step": 52180 }, { "epoch": 0.3857810236243754, "grad_norm": 0.09527096152305603, "learning_rate": 3.082487535612536e-05, "loss": 0.0188, "step": 52190 }, { "epoch": 0.38585494219567723, "grad_norm": 0.06685808300971985, "learning_rate": 3.082116571699905e-05, "loss": 0.0173, "step": 52200 }, { "epoch": 0.3859288607669791, "grad_norm": 0.07597069442272186, "learning_rate": 3.081745607787275e-05, "loss": 0.0208, "step": 52210 }, { "epoch": 0.386002779338281, "grad_norm": 0.06681588292121887, "learning_rate": 3.0813746438746436e-05, "loss": 0.018, "step": 52220 }, { "epoch": 0.3860766979095828, "grad_norm": 0.07467056810855865, "learning_rate": 3.081003679962013e-05, "loss": 0.0224, "step": 52230 }, { "epoch": 0.38615061648088467, "grad_norm": 0.0811832919716835, "learning_rate": 3.080632716049383e-05, "loss": 0.0204, "step": 52240 }, { "epoch": 0.3862245350521865, "grad_norm": 0.09243257343769073, "learning_rate": 3.0802617521367524e-05, "loss": 0.0188, "step": 52250 }, { "epoch": 0.38629845362348836, "grad_norm": 0.08218041807413101, "learning_rate": 3.079890788224122e-05, "loss": 0.0175, "step": 52260 }, { "epoch": 0.3863723721947902, "grad_norm": 0.07658617943525314, "learning_rate": 3.079519824311491e-05, "loss": 0.018, "step": 52270 }, { "epoch": 0.38644629076609205, "grad_norm": 0.08019266277551651, "learning_rate": 3.0791488603988605e-05, "loss": 0.019, "step": 52280 }, { "epoch": 0.38652020933739395, "grad_norm": 0.0836150050163269, "learning_rate": 3.07877789648623e-05, "loss": 0.0198, "step": 52290 }, { "epoch": 0.3865941279086958, "grad_norm": 0.07567259669303894, "learning_rate": 3.078406932573599e-05, "loss": 0.021, "step": 52300 }, { "epoch": 0.38666804647999764, "grad_norm": 0.07380937784910202, "learning_rate": 3.0780359686609686e-05, "loss": 0.0185, "step": 52310 }, { "epoch": 0.3867419650512995, "grad_norm": 0.07615092396736145, "learning_rate": 3.077665004748338e-05, "loss": 0.0176, "step": 52320 }, { "epoch": 0.38681588362260133, "grad_norm": 0.08575232326984406, "learning_rate": 3.077294040835708e-05, "loss": 0.0175, "step": 52330 }, { "epoch": 0.3868898021939032, "grad_norm": 0.08019275218248367, "learning_rate": 3.0769230769230774e-05, "loss": 0.0184, "step": 52340 }, { "epoch": 0.3869637207652051, "grad_norm": 0.08791986107826233, "learning_rate": 3.0765521130104464e-05, "loss": 0.0184, "step": 52350 }, { "epoch": 0.3870376393365069, "grad_norm": 0.08571562170982361, "learning_rate": 3.076181149097816e-05, "loss": 0.0181, "step": 52360 }, { "epoch": 0.38711155790780877, "grad_norm": 0.13191141188144684, "learning_rate": 3.0758101851851855e-05, "loss": 0.0197, "step": 52370 }, { "epoch": 0.3871854764791106, "grad_norm": 0.07944611459970474, "learning_rate": 3.0754392212725545e-05, "loss": 0.0192, "step": 52380 }, { "epoch": 0.38725939505041246, "grad_norm": 0.06848800927400589, "learning_rate": 3.075068257359924e-05, "loss": 0.0166, "step": 52390 }, { "epoch": 0.3873333136217143, "grad_norm": 0.07772406935691833, "learning_rate": 3.0746972934472937e-05, "loss": 0.0179, "step": 52400 }, { "epoch": 0.38740723219301615, "grad_norm": 0.08584585040807724, "learning_rate": 3.074326329534663e-05, "loss": 0.0176, "step": 52410 }, { "epoch": 0.38748115076431805, "grad_norm": 0.08768728375434875, "learning_rate": 3.073955365622033e-05, "loss": 0.0169, "step": 52420 }, { "epoch": 0.3875550693356199, "grad_norm": 0.06692370772361755, "learning_rate": 3.073584401709402e-05, "loss": 0.0179, "step": 52430 }, { "epoch": 0.38762898790692174, "grad_norm": 0.11034579575061798, "learning_rate": 3.0732134377967714e-05, "loss": 0.0178, "step": 52440 }, { "epoch": 0.3877029064782236, "grad_norm": 0.08563010394573212, "learning_rate": 3.07284247388414e-05, "loss": 0.0193, "step": 52450 }, { "epoch": 0.38777682504952543, "grad_norm": 0.0796736404299736, "learning_rate": 3.07247150997151e-05, "loss": 0.0177, "step": 52460 }, { "epoch": 0.3878507436208273, "grad_norm": 0.10029114782810211, "learning_rate": 3.0721005460588795e-05, "loss": 0.0157, "step": 52470 }, { "epoch": 0.3879246621921292, "grad_norm": 0.07146019488573074, "learning_rate": 3.071729582146249e-05, "loss": 0.0209, "step": 52480 }, { "epoch": 0.387998580763431, "grad_norm": 0.07550966739654541, "learning_rate": 3.071358618233619e-05, "loss": 0.0204, "step": 52490 }, { "epoch": 0.38807249933473287, "grad_norm": 0.08018456399440765, "learning_rate": 3.0709876543209876e-05, "loss": 0.0173, "step": 52500 }, { "epoch": 0.3881464179060347, "grad_norm": 0.12307467311620712, "learning_rate": 3.070616690408357e-05, "loss": 0.0196, "step": 52510 }, { "epoch": 0.38822033647733656, "grad_norm": 0.10490459203720093, "learning_rate": 3.070245726495727e-05, "loss": 0.0182, "step": 52520 }, { "epoch": 0.3882942550486384, "grad_norm": 0.07732898741960526, "learning_rate": 3.069874762583096e-05, "loss": 0.0204, "step": 52530 }, { "epoch": 0.38836817361994025, "grad_norm": 0.09091013669967651, "learning_rate": 3.069503798670465e-05, "loss": 0.019, "step": 52540 }, { "epoch": 0.38844209219124215, "grad_norm": 0.07816635072231293, "learning_rate": 3.069132834757835e-05, "loss": 0.0173, "step": 52550 }, { "epoch": 0.388516010762544, "grad_norm": 0.06775733083486557, "learning_rate": 3.0687618708452045e-05, "loss": 0.0186, "step": 52560 }, { "epoch": 0.38858992933384584, "grad_norm": 0.0946992039680481, "learning_rate": 3.068390906932574e-05, "loss": 0.0188, "step": 52570 }, { "epoch": 0.3886638479051477, "grad_norm": 0.06431379169225693, "learning_rate": 3.068019943019943e-05, "loss": 0.0165, "step": 52580 }, { "epoch": 0.38873776647644953, "grad_norm": 0.08713816851377487, "learning_rate": 3.0676489791073126e-05, "loss": 0.0181, "step": 52590 }, { "epoch": 0.3888116850477514, "grad_norm": 0.08138715475797653, "learning_rate": 3.067278015194682e-05, "loss": 0.0171, "step": 52600 }, { "epoch": 0.3888856036190533, "grad_norm": 0.06727848201990128, "learning_rate": 3.066907051282051e-05, "loss": 0.0196, "step": 52610 }, { "epoch": 0.3889595221903551, "grad_norm": 0.09258706122636795, "learning_rate": 3.066536087369421e-05, "loss": 0.0193, "step": 52620 }, { "epoch": 0.38903344076165697, "grad_norm": 0.05659174174070358, "learning_rate": 3.06616512345679e-05, "loss": 0.0173, "step": 52630 }, { "epoch": 0.3891073593329588, "grad_norm": 0.09105684608221054, "learning_rate": 3.06579415954416e-05, "loss": 0.0191, "step": 52640 }, { "epoch": 0.38918127790426066, "grad_norm": 0.08389654755592346, "learning_rate": 3.0654231956315295e-05, "loss": 0.0193, "step": 52650 }, { "epoch": 0.3892551964755625, "grad_norm": 0.06399132311344147, "learning_rate": 3.0650522317188984e-05, "loss": 0.02, "step": 52660 }, { "epoch": 0.38932911504686435, "grad_norm": 0.06903476268053055, "learning_rate": 3.064681267806268e-05, "loss": 0.0171, "step": 52670 }, { "epoch": 0.38940303361816625, "grad_norm": 0.10140169411897659, "learning_rate": 3.064310303893637e-05, "loss": 0.0177, "step": 52680 }, { "epoch": 0.3894769521894681, "grad_norm": 0.09307069331407547, "learning_rate": 3.0639393399810065e-05, "loss": 0.0168, "step": 52690 }, { "epoch": 0.38955087076076994, "grad_norm": 0.08971722424030304, "learning_rate": 3.063568376068376e-05, "loss": 0.0181, "step": 52700 }, { "epoch": 0.3896247893320718, "grad_norm": 0.06755110621452332, "learning_rate": 3.063197412155746e-05, "loss": 0.018, "step": 52710 }, { "epoch": 0.38969870790337363, "grad_norm": 0.07892455160617828, "learning_rate": 3.062826448243115e-05, "loss": 0.0206, "step": 52720 }, { "epoch": 0.3897726264746755, "grad_norm": 0.09181396663188934, "learning_rate": 3.062455484330484e-05, "loss": 0.0182, "step": 52730 }, { "epoch": 0.3898465450459774, "grad_norm": 0.07258135080337524, "learning_rate": 3.062084520417854e-05, "loss": 0.0201, "step": 52740 }, { "epoch": 0.3899204636172792, "grad_norm": 0.1641390025615692, "learning_rate": 3.0617135565052234e-05, "loss": 0.0193, "step": 52750 }, { "epoch": 0.38999438218858107, "grad_norm": 0.05282848700881004, "learning_rate": 3.0613425925925924e-05, "loss": 0.0176, "step": 52760 }, { "epoch": 0.3900683007598829, "grad_norm": 0.10620077699422836, "learning_rate": 3.060971628679962e-05, "loss": 0.0164, "step": 52770 }, { "epoch": 0.39014221933118476, "grad_norm": 0.07189995795488358, "learning_rate": 3.0606006647673316e-05, "loss": 0.019, "step": 52780 }, { "epoch": 0.3902161379024866, "grad_norm": 0.06073181703686714, "learning_rate": 3.060229700854701e-05, "loss": 0.016, "step": 52790 }, { "epoch": 0.39029005647378845, "grad_norm": 0.10170228779315948, "learning_rate": 3.059858736942071e-05, "loss": 0.021, "step": 52800 }, { "epoch": 0.39036397504509035, "grad_norm": 0.07040001451969147, "learning_rate": 3.05948777302944e-05, "loss": 0.0186, "step": 52810 }, { "epoch": 0.3904378936163922, "grad_norm": 0.10885073989629745, "learning_rate": 3.059116809116809e-05, "loss": 0.0195, "step": 52820 }, { "epoch": 0.39051181218769404, "grad_norm": 0.09115403145551682, "learning_rate": 3.058745845204179e-05, "loss": 0.0187, "step": 52830 }, { "epoch": 0.3905857307589959, "grad_norm": 0.07751933485269547, "learning_rate": 3.058374881291548e-05, "loss": 0.0186, "step": 52840 }, { "epoch": 0.39065964933029773, "grad_norm": 0.0606808103621006, "learning_rate": 3.0580039173789174e-05, "loss": 0.018, "step": 52850 }, { "epoch": 0.3907335679015996, "grad_norm": 0.0965239480137825, "learning_rate": 3.057632953466287e-05, "loss": 0.0188, "step": 52860 }, { "epoch": 0.3908074864729015, "grad_norm": 0.11499795317649841, "learning_rate": 3.0572619895536566e-05, "loss": 0.0172, "step": 52870 }, { "epoch": 0.3908814050442033, "grad_norm": 0.08791990578174591, "learning_rate": 3.056891025641026e-05, "loss": 0.0185, "step": 52880 }, { "epoch": 0.39095532361550517, "grad_norm": 0.09780432283878326, "learning_rate": 3.056520061728395e-05, "loss": 0.0198, "step": 52890 }, { "epoch": 0.391029242186807, "grad_norm": 0.1021294891834259, "learning_rate": 3.056149097815765e-05, "loss": 0.0165, "step": 52900 }, { "epoch": 0.39110316075810886, "grad_norm": 0.0787302777171135, "learning_rate": 3.0557781339031336e-05, "loss": 0.0197, "step": 52910 }, { "epoch": 0.3911770793294107, "grad_norm": 0.08401231467723846, "learning_rate": 3.055407169990503e-05, "loss": 0.0187, "step": 52920 }, { "epoch": 0.39125099790071255, "grad_norm": 0.09179053455591202, "learning_rate": 3.055036206077873e-05, "loss": 0.0179, "step": 52930 }, { "epoch": 0.39132491647201445, "grad_norm": 0.09800249338150024, "learning_rate": 3.0546652421652424e-05, "loss": 0.0184, "step": 52940 }, { "epoch": 0.3913988350433163, "grad_norm": 0.07815613597631454, "learning_rate": 3.054294278252612e-05, "loss": 0.0183, "step": 52950 }, { "epoch": 0.39147275361461814, "grad_norm": 0.08728394657373428, "learning_rate": 3.053923314339981e-05, "loss": 0.017, "step": 52960 }, { "epoch": 0.39154667218592, "grad_norm": 0.11514657735824585, "learning_rate": 3.0535523504273505e-05, "loss": 0.0173, "step": 52970 }, { "epoch": 0.39162059075722183, "grad_norm": 0.08455294370651245, "learning_rate": 3.05318138651472e-05, "loss": 0.0199, "step": 52980 }, { "epoch": 0.3916945093285237, "grad_norm": 0.06525518745183945, "learning_rate": 3.052810422602089e-05, "loss": 0.017, "step": 52990 }, { "epoch": 0.3917684278998256, "grad_norm": 0.06673488020896912, "learning_rate": 3.0524394586894586e-05, "loss": 0.0182, "step": 53000 }, { "epoch": 0.3918423464711274, "grad_norm": 0.09368862211704254, "learning_rate": 3.052068494776828e-05, "loss": 0.0175, "step": 53010 }, { "epoch": 0.39191626504242927, "grad_norm": 0.09598971903324127, "learning_rate": 3.0516975308641975e-05, "loss": 0.0188, "step": 53020 }, { "epoch": 0.3919901836137311, "grad_norm": 0.06615760177373886, "learning_rate": 3.0513265669515674e-05, "loss": 0.0186, "step": 53030 }, { "epoch": 0.39206410218503296, "grad_norm": 0.08294472843408585, "learning_rate": 3.0509556030389363e-05, "loss": 0.0198, "step": 53040 }, { "epoch": 0.3921380207563348, "grad_norm": 0.10596353560686111, "learning_rate": 3.050584639126306e-05, "loss": 0.0186, "step": 53050 }, { "epoch": 0.39221193932763665, "grad_norm": 0.07891129702329636, "learning_rate": 3.0502136752136755e-05, "loss": 0.0185, "step": 53060 }, { "epoch": 0.39228585789893855, "grad_norm": 0.08801103383302689, "learning_rate": 3.0498427113010448e-05, "loss": 0.0199, "step": 53070 }, { "epoch": 0.3923597764702404, "grad_norm": 0.06925628334283829, "learning_rate": 3.0494717473884144e-05, "loss": 0.0202, "step": 53080 }, { "epoch": 0.39243369504154224, "grad_norm": 0.08155371993780136, "learning_rate": 3.0491007834757833e-05, "loss": 0.0207, "step": 53090 }, { "epoch": 0.3925076136128441, "grad_norm": 0.07738133519887924, "learning_rate": 3.0487298195631532e-05, "loss": 0.0202, "step": 53100 }, { "epoch": 0.39258153218414593, "grad_norm": 0.06856966018676758, "learning_rate": 3.0483588556505228e-05, "loss": 0.0183, "step": 53110 }, { "epoch": 0.3926554507554478, "grad_norm": 0.07440409064292908, "learning_rate": 3.0479878917378917e-05, "loss": 0.0197, "step": 53120 }, { "epoch": 0.3927293693267497, "grad_norm": 0.07958266139030457, "learning_rate": 3.0476169278252613e-05, "loss": 0.0183, "step": 53130 }, { "epoch": 0.3928032878980515, "grad_norm": 0.05523325875401497, "learning_rate": 3.0472459639126306e-05, "loss": 0.0184, "step": 53140 }, { "epoch": 0.39287720646935337, "grad_norm": 0.07466422021389008, "learning_rate": 3.0468750000000002e-05, "loss": 0.0183, "step": 53150 }, { "epoch": 0.3929511250406552, "grad_norm": 0.10524232685565948, "learning_rate": 3.0465040360873698e-05, "loss": 0.0184, "step": 53160 }, { "epoch": 0.39302504361195706, "grad_norm": 0.06600213795900345, "learning_rate": 3.0461330721747387e-05, "loss": 0.02, "step": 53170 }, { "epoch": 0.3930989621832589, "grad_norm": 0.08506739139556885, "learning_rate": 3.0457621082621087e-05, "loss": 0.0179, "step": 53180 }, { "epoch": 0.39317288075456075, "grad_norm": 0.07443349063396454, "learning_rate": 3.0453911443494776e-05, "loss": 0.0167, "step": 53190 }, { "epoch": 0.39324679932586265, "grad_norm": 0.07037489116191864, "learning_rate": 3.045020180436847e-05, "loss": 0.0183, "step": 53200 }, { "epoch": 0.3933207178971645, "grad_norm": 0.08329469710588455, "learning_rate": 3.0446492165242168e-05, "loss": 0.0151, "step": 53210 }, { "epoch": 0.39339463646846634, "grad_norm": 0.0837264209985733, "learning_rate": 3.044278252611586e-05, "loss": 0.0157, "step": 53220 }, { "epoch": 0.3934685550397682, "grad_norm": 0.0812053307890892, "learning_rate": 3.0439072886989556e-05, "loss": 0.0194, "step": 53230 }, { "epoch": 0.39354247361107003, "grad_norm": 0.09882737696170807, "learning_rate": 3.0435363247863245e-05, "loss": 0.0186, "step": 53240 }, { "epoch": 0.3936163921823719, "grad_norm": 0.08482872694730759, "learning_rate": 3.0431653608736945e-05, "loss": 0.0196, "step": 53250 }, { "epoch": 0.3936903107536738, "grad_norm": 0.07293414324522018, "learning_rate": 3.042794396961064e-05, "loss": 0.0189, "step": 53260 }, { "epoch": 0.3937642293249756, "grad_norm": 0.08307154476642609, "learning_rate": 3.042423433048433e-05, "loss": 0.0218, "step": 53270 }, { "epoch": 0.39383814789627747, "grad_norm": 0.10339733213186264, "learning_rate": 3.0420524691358026e-05, "loss": 0.0188, "step": 53280 }, { "epoch": 0.3939120664675793, "grad_norm": 0.056059882044792175, "learning_rate": 3.0416815052231722e-05, "loss": 0.0169, "step": 53290 }, { "epoch": 0.39398598503888116, "grad_norm": 0.08329902589321136, "learning_rate": 3.0413105413105414e-05, "loss": 0.0173, "step": 53300 }, { "epoch": 0.394059903610183, "grad_norm": 0.06202562153339386, "learning_rate": 3.040939577397911e-05, "loss": 0.0187, "step": 53310 }, { "epoch": 0.39413382218148485, "grad_norm": 0.07509329169988632, "learning_rate": 3.04056861348528e-05, "loss": 0.0164, "step": 53320 }, { "epoch": 0.39420774075278675, "grad_norm": 0.09368224442005157, "learning_rate": 3.04019764957265e-05, "loss": 0.0208, "step": 53330 }, { "epoch": 0.3942816593240886, "grad_norm": 0.06418855488300323, "learning_rate": 3.0398266856600195e-05, "loss": 0.0185, "step": 53340 }, { "epoch": 0.39435557789539044, "grad_norm": 0.07906321436166763, "learning_rate": 3.0394557217473884e-05, "loss": 0.021, "step": 53350 }, { "epoch": 0.3944294964666923, "grad_norm": 0.09090852737426758, "learning_rate": 3.039084757834758e-05, "loss": 0.018, "step": 53360 }, { "epoch": 0.39450341503799413, "grad_norm": 0.12433502078056335, "learning_rate": 3.0387137939221273e-05, "loss": 0.0155, "step": 53370 }, { "epoch": 0.394577333609296, "grad_norm": 0.08517279475927353, "learning_rate": 3.038342830009497e-05, "loss": 0.0201, "step": 53380 }, { "epoch": 0.3946512521805979, "grad_norm": 0.0872577503323555, "learning_rate": 3.0379718660968665e-05, "loss": 0.0205, "step": 53390 }, { "epoch": 0.3947251707518997, "grad_norm": 0.11957985907793045, "learning_rate": 3.0376009021842354e-05, "loss": 0.0175, "step": 53400 }, { "epoch": 0.39479908932320157, "grad_norm": 0.07976693660020828, "learning_rate": 3.0372299382716053e-05, "loss": 0.0185, "step": 53410 }, { "epoch": 0.3948730078945034, "grad_norm": 0.08289938420057297, "learning_rate": 3.0368589743589742e-05, "loss": 0.018, "step": 53420 }, { "epoch": 0.39494692646580526, "grad_norm": 0.10434596985578537, "learning_rate": 3.0364880104463438e-05, "loss": 0.0184, "step": 53430 }, { "epoch": 0.3950208450371071, "grad_norm": 0.08924178779125214, "learning_rate": 3.0361170465337134e-05, "loss": 0.0202, "step": 53440 }, { "epoch": 0.39509476360840895, "grad_norm": 0.05690544471144676, "learning_rate": 3.0357460826210827e-05, "loss": 0.0178, "step": 53450 }, { "epoch": 0.39516868217971085, "grad_norm": 0.11244209110736847, "learning_rate": 3.0353751187084523e-05, "loss": 0.0216, "step": 53460 }, { "epoch": 0.3952426007510127, "grad_norm": 0.08175085484981537, "learning_rate": 3.0350041547958212e-05, "loss": 0.0172, "step": 53470 }, { "epoch": 0.39531651932231454, "grad_norm": 0.10606854408979416, "learning_rate": 3.034633190883191e-05, "loss": 0.0195, "step": 53480 }, { "epoch": 0.3953904378936164, "grad_norm": 0.07251104712486267, "learning_rate": 3.0342622269705607e-05, "loss": 0.0247, "step": 53490 }, { "epoch": 0.39546435646491823, "grad_norm": 0.07370023429393768, "learning_rate": 3.0338912630579296e-05, "loss": 0.0215, "step": 53500 }, { "epoch": 0.3955382750362201, "grad_norm": 0.08910705149173737, "learning_rate": 3.0335202991452992e-05, "loss": 0.0189, "step": 53510 }, { "epoch": 0.395612193607522, "grad_norm": 0.06901726871728897, "learning_rate": 3.033149335232669e-05, "loss": 0.0195, "step": 53520 }, { "epoch": 0.3956861121788238, "grad_norm": 0.08980687707662582, "learning_rate": 3.032778371320038e-05, "loss": 0.0193, "step": 53530 }, { "epoch": 0.39576003075012567, "grad_norm": 0.0794142335653305, "learning_rate": 3.0324074074074077e-05, "loss": 0.0198, "step": 53540 }, { "epoch": 0.3958339493214275, "grad_norm": 0.07814609259366989, "learning_rate": 3.0320364434947766e-05, "loss": 0.0205, "step": 53550 }, { "epoch": 0.39590786789272936, "grad_norm": 0.07413819432258606, "learning_rate": 3.0316654795821465e-05, "loss": 0.018, "step": 53560 }, { "epoch": 0.3959817864640312, "grad_norm": 0.08249972015619278, "learning_rate": 3.031294515669516e-05, "loss": 0.016, "step": 53570 }, { "epoch": 0.39605570503533305, "grad_norm": 0.08316448330879211, "learning_rate": 3.030923551756885e-05, "loss": 0.0181, "step": 53580 }, { "epoch": 0.39612962360663495, "grad_norm": 0.055324457585811615, "learning_rate": 3.0305525878442547e-05, "loss": 0.0165, "step": 53590 }, { "epoch": 0.3962035421779368, "grad_norm": 0.07442904263734818, "learning_rate": 3.030181623931624e-05, "loss": 0.0186, "step": 53600 }, { "epoch": 0.39627746074923864, "grad_norm": 0.09970244020223618, "learning_rate": 3.0298106600189935e-05, "loss": 0.0171, "step": 53610 }, { "epoch": 0.3963513793205405, "grad_norm": 0.06909671425819397, "learning_rate": 3.029439696106363e-05, "loss": 0.0188, "step": 53620 }, { "epoch": 0.39642529789184233, "grad_norm": 0.08010876178741455, "learning_rate": 3.0290687321937324e-05, "loss": 0.0168, "step": 53630 }, { "epoch": 0.3964992164631442, "grad_norm": 0.072038933634758, "learning_rate": 3.028697768281102e-05, "loss": 0.0174, "step": 53640 }, { "epoch": 0.3965731350344461, "grad_norm": 0.04955233260989189, "learning_rate": 3.028326804368471e-05, "loss": 0.0167, "step": 53650 }, { "epoch": 0.3966470536057479, "grad_norm": 0.09004979580640793, "learning_rate": 3.0279558404558405e-05, "loss": 0.0192, "step": 53660 }, { "epoch": 0.39672097217704977, "grad_norm": 0.07755633443593979, "learning_rate": 3.02758487654321e-05, "loss": 0.0184, "step": 53670 }, { "epoch": 0.3967948907483516, "grad_norm": 0.06454595178365707, "learning_rate": 3.0272139126305793e-05, "loss": 0.0165, "step": 53680 }, { "epoch": 0.39686880931965346, "grad_norm": 0.09947235137224197, "learning_rate": 3.026842948717949e-05, "loss": 0.0174, "step": 53690 }, { "epoch": 0.3969427278909553, "grad_norm": 0.10691077262163162, "learning_rate": 3.026471984805318e-05, "loss": 0.0183, "step": 53700 }, { "epoch": 0.39701664646225715, "grad_norm": 0.1857234388589859, "learning_rate": 3.0261010208926878e-05, "loss": 0.0171, "step": 53710 }, { "epoch": 0.39709056503355905, "grad_norm": 0.07437913864850998, "learning_rate": 3.0257300569800574e-05, "loss": 0.0187, "step": 53720 }, { "epoch": 0.3971644836048609, "grad_norm": 0.09386677294969559, "learning_rate": 3.0253590930674263e-05, "loss": 0.0193, "step": 53730 }, { "epoch": 0.39723840217616274, "grad_norm": 0.06534472852945328, "learning_rate": 3.024988129154796e-05, "loss": 0.0173, "step": 53740 }, { "epoch": 0.3973123207474646, "grad_norm": 0.09258140623569489, "learning_rate": 3.0246171652421655e-05, "loss": 0.0181, "step": 53750 }, { "epoch": 0.39738623931876643, "grad_norm": 0.09052963554859161, "learning_rate": 3.0242462013295348e-05, "loss": 0.0187, "step": 53760 }, { "epoch": 0.3974601578900683, "grad_norm": 0.06417059898376465, "learning_rate": 3.0238752374169044e-05, "loss": 0.0168, "step": 53770 }, { "epoch": 0.3975340764613702, "grad_norm": 0.10106581449508667, "learning_rate": 3.0235042735042736e-05, "loss": 0.0211, "step": 53780 }, { "epoch": 0.397607995032672, "grad_norm": 0.10131117701530457, "learning_rate": 3.0231333095916432e-05, "loss": 0.0192, "step": 53790 }, { "epoch": 0.39768191360397387, "grad_norm": 0.1002507209777832, "learning_rate": 3.0227623456790128e-05, "loss": 0.0202, "step": 53800 }, { "epoch": 0.3977558321752757, "grad_norm": 0.09136881679296494, "learning_rate": 3.0223913817663817e-05, "loss": 0.0182, "step": 53810 }, { "epoch": 0.39782975074657756, "grad_norm": 0.09476058185100555, "learning_rate": 3.0220204178537513e-05, "loss": 0.0183, "step": 53820 }, { "epoch": 0.3979036693178794, "grad_norm": 0.08330568671226501, "learning_rate": 3.0216494539411206e-05, "loss": 0.0202, "step": 53830 }, { "epoch": 0.39797758788918125, "grad_norm": 0.06252734363079071, "learning_rate": 3.0212784900284902e-05, "loss": 0.0162, "step": 53840 }, { "epoch": 0.39805150646048315, "grad_norm": 0.11372721940279007, "learning_rate": 3.0209075261158598e-05, "loss": 0.0187, "step": 53850 }, { "epoch": 0.398125425031785, "grad_norm": 0.09041762351989746, "learning_rate": 3.020536562203229e-05, "loss": 0.0239, "step": 53860 }, { "epoch": 0.39819934360308684, "grad_norm": 0.07595834136009216, "learning_rate": 3.0201655982905986e-05, "loss": 0.0175, "step": 53870 }, { "epoch": 0.3982732621743887, "grad_norm": 0.10471068322658539, "learning_rate": 3.0197946343779675e-05, "loss": 0.0181, "step": 53880 }, { "epoch": 0.39834718074569053, "grad_norm": 0.08905491232872009, "learning_rate": 3.019423670465337e-05, "loss": 0.0169, "step": 53890 }, { "epoch": 0.3984210993169924, "grad_norm": 0.10129161924123764, "learning_rate": 3.0190527065527067e-05, "loss": 0.0203, "step": 53900 }, { "epoch": 0.3984950178882943, "grad_norm": 0.08781618624925613, "learning_rate": 3.018681742640076e-05, "loss": 0.0177, "step": 53910 }, { "epoch": 0.3985689364595961, "grad_norm": 0.05924617126584053, "learning_rate": 3.0183107787274456e-05, "loss": 0.0184, "step": 53920 }, { "epoch": 0.39864285503089797, "grad_norm": 0.10525096952915192, "learning_rate": 3.017939814814815e-05, "loss": 0.0204, "step": 53930 }, { "epoch": 0.3987167736021998, "grad_norm": 0.09503110498189926, "learning_rate": 3.0175688509021844e-05, "loss": 0.019, "step": 53940 }, { "epoch": 0.39879069217350166, "grad_norm": 0.0813848227262497, "learning_rate": 3.017197886989554e-05, "loss": 0.0194, "step": 53950 }, { "epoch": 0.3988646107448035, "grad_norm": 0.10762012004852295, "learning_rate": 3.016826923076923e-05, "loss": 0.0177, "step": 53960 }, { "epoch": 0.3989385293161054, "grad_norm": 0.13751506805419922, "learning_rate": 3.0164559591642926e-05, "loss": 0.0177, "step": 53970 }, { "epoch": 0.39901244788740725, "grad_norm": 0.0912880152463913, "learning_rate": 3.0160849952516625e-05, "loss": 0.0198, "step": 53980 }, { "epoch": 0.3990863664587091, "grad_norm": 0.06373404711484909, "learning_rate": 3.0157140313390314e-05, "loss": 0.0175, "step": 53990 }, { "epoch": 0.39916028503001094, "grad_norm": 0.08119054138660431, "learning_rate": 3.015343067426401e-05, "loss": 0.0155, "step": 54000 }, { "epoch": 0.3992342036013128, "grad_norm": 0.07526402920484543, "learning_rate": 3.0149721035137703e-05, "loss": 0.0175, "step": 54010 }, { "epoch": 0.39930812217261463, "grad_norm": 0.0822368636727333, "learning_rate": 3.01460113960114e-05, "loss": 0.0165, "step": 54020 }, { "epoch": 0.3993820407439165, "grad_norm": 0.07308053225278854, "learning_rate": 3.0142301756885095e-05, "loss": 0.0176, "step": 54030 }, { "epoch": 0.3994559593152184, "grad_norm": 0.11768469959497452, "learning_rate": 3.0138592117758784e-05, "loss": 0.0206, "step": 54040 }, { "epoch": 0.3995298778865202, "grad_norm": 0.1068728119134903, "learning_rate": 3.013488247863248e-05, "loss": 0.0204, "step": 54050 }, { "epoch": 0.39960379645782207, "grad_norm": 0.08505821973085403, "learning_rate": 3.0131172839506172e-05, "loss": 0.0184, "step": 54060 }, { "epoch": 0.3996777150291239, "grad_norm": 0.10482881218194962, "learning_rate": 3.012746320037987e-05, "loss": 0.0197, "step": 54070 }, { "epoch": 0.39975163360042576, "grad_norm": 0.07285966724157333, "learning_rate": 3.0123753561253564e-05, "loss": 0.0171, "step": 54080 }, { "epoch": 0.3998255521717276, "grad_norm": 0.0907202884554863, "learning_rate": 3.0120043922127257e-05, "loss": 0.0174, "step": 54090 }, { "epoch": 0.3998994707430295, "grad_norm": 0.07394234091043472, "learning_rate": 3.0116334283000953e-05, "loss": 0.0185, "step": 54100 }, { "epoch": 0.39997338931433135, "grad_norm": 0.06811315566301346, "learning_rate": 3.0112624643874642e-05, "loss": 0.0167, "step": 54110 }, { "epoch": 0.4000473078856332, "grad_norm": 0.0763428807258606, "learning_rate": 3.0108915004748338e-05, "loss": 0.017, "step": 54120 }, { "epoch": 0.40012122645693504, "grad_norm": 0.07965312153100967, "learning_rate": 3.0105205365622034e-05, "loss": 0.0203, "step": 54130 }, { "epoch": 0.4001951450282369, "grad_norm": 0.1002349779009819, "learning_rate": 3.0101495726495727e-05, "loss": 0.0171, "step": 54140 }, { "epoch": 0.40026906359953873, "grad_norm": 0.07759466022253036, "learning_rate": 3.0097786087369423e-05, "loss": 0.0169, "step": 54150 }, { "epoch": 0.4003429821708406, "grad_norm": 0.06216445192694664, "learning_rate": 3.0094076448243115e-05, "loss": 0.0176, "step": 54160 }, { "epoch": 0.4004169007421425, "grad_norm": 0.07447425276041031, "learning_rate": 3.009036680911681e-05, "loss": 0.0164, "step": 54170 }, { "epoch": 0.4004908193134443, "grad_norm": 0.06533452123403549, "learning_rate": 3.0086657169990507e-05, "loss": 0.0177, "step": 54180 }, { "epoch": 0.40056473788474617, "grad_norm": 0.07110826671123505, "learning_rate": 3.0082947530864196e-05, "loss": 0.017, "step": 54190 }, { "epoch": 0.400638656456048, "grad_norm": 0.08527481555938721, "learning_rate": 3.0079237891737892e-05, "loss": 0.0182, "step": 54200 }, { "epoch": 0.40071257502734986, "grad_norm": 0.07174352556467056, "learning_rate": 3.007552825261159e-05, "loss": 0.0194, "step": 54210 }, { "epoch": 0.4007864935986517, "grad_norm": 0.09062623977661133, "learning_rate": 3.007181861348528e-05, "loss": 0.0188, "step": 54220 }, { "epoch": 0.4008604121699536, "grad_norm": 0.08902092278003693, "learning_rate": 3.0068108974358977e-05, "loss": 0.017, "step": 54230 }, { "epoch": 0.40093433074125545, "grad_norm": 0.11530375480651855, "learning_rate": 3.006439933523267e-05, "loss": 0.0204, "step": 54240 }, { "epoch": 0.4010082493125573, "grad_norm": 0.06766349822282791, "learning_rate": 3.0060689696106365e-05, "loss": 0.0155, "step": 54250 }, { "epoch": 0.40108216788385914, "grad_norm": 0.07901377230882645, "learning_rate": 3.005698005698006e-05, "loss": 0.0159, "step": 54260 }, { "epoch": 0.401156086455161, "grad_norm": 0.11029311269521713, "learning_rate": 3.005327041785375e-05, "loss": 0.0182, "step": 54270 }, { "epoch": 0.40123000502646283, "grad_norm": 0.09365560859441757, "learning_rate": 3.0049560778727446e-05, "loss": 0.0207, "step": 54280 }, { "epoch": 0.4013039235977647, "grad_norm": 0.06550610065460205, "learning_rate": 3.004585113960114e-05, "loss": 0.0153, "step": 54290 }, { "epoch": 0.4013778421690666, "grad_norm": 0.06464651226997375, "learning_rate": 3.0042141500474835e-05, "loss": 0.0171, "step": 54300 }, { "epoch": 0.4014517607403684, "grad_norm": 0.0901111364364624, "learning_rate": 3.003843186134853e-05, "loss": 0.0199, "step": 54310 }, { "epoch": 0.40152567931167027, "grad_norm": 0.097609743475914, "learning_rate": 3.0034722222222223e-05, "loss": 0.0182, "step": 54320 }, { "epoch": 0.4015995978829721, "grad_norm": 0.0751517117023468, "learning_rate": 3.003101258309592e-05, "loss": 0.0202, "step": 54330 }, { "epoch": 0.40167351645427396, "grad_norm": 0.08785123378038406, "learning_rate": 3.002730294396961e-05, "loss": 0.0205, "step": 54340 }, { "epoch": 0.4017474350255758, "grad_norm": 0.07688210904598236, "learning_rate": 3.0023593304843305e-05, "loss": 0.019, "step": 54350 }, { "epoch": 0.4018213535968777, "grad_norm": 0.0797678530216217, "learning_rate": 3.0019883665717004e-05, "loss": 0.0177, "step": 54360 }, { "epoch": 0.40189527216817955, "grad_norm": 0.08396304398775101, "learning_rate": 3.0016174026590693e-05, "loss": 0.0165, "step": 54370 }, { "epoch": 0.4019691907394814, "grad_norm": 0.07126887887716293, "learning_rate": 3.001246438746439e-05, "loss": 0.0161, "step": 54380 }, { "epoch": 0.40204310931078324, "grad_norm": 0.09733361005783081, "learning_rate": 3.000875474833808e-05, "loss": 0.021, "step": 54390 }, { "epoch": 0.4021170278820851, "grad_norm": 0.07474087178707123, "learning_rate": 3.0005045109211778e-05, "loss": 0.0205, "step": 54400 }, { "epoch": 0.40219094645338693, "grad_norm": 0.08255986124277115, "learning_rate": 3.0001335470085474e-05, "loss": 0.018, "step": 54410 }, { "epoch": 0.4022648650246888, "grad_norm": 0.0718206912279129, "learning_rate": 2.9997625830959163e-05, "loss": 0.0185, "step": 54420 }, { "epoch": 0.4023387835959907, "grad_norm": 0.07734379172325134, "learning_rate": 2.999391619183286e-05, "loss": 0.0165, "step": 54430 }, { "epoch": 0.4024127021672925, "grad_norm": 0.06918898969888687, "learning_rate": 2.9990206552706558e-05, "loss": 0.0174, "step": 54440 }, { "epoch": 0.40248662073859437, "grad_norm": 0.08389578759670258, "learning_rate": 2.9986496913580247e-05, "loss": 0.0161, "step": 54450 }, { "epoch": 0.4025605393098962, "grad_norm": 0.0872507318854332, "learning_rate": 2.9982787274453943e-05, "loss": 0.0177, "step": 54460 }, { "epoch": 0.40263445788119806, "grad_norm": 0.09838316589593887, "learning_rate": 2.9979077635327636e-05, "loss": 0.0189, "step": 54470 }, { "epoch": 0.4027083764524999, "grad_norm": 0.06993544846773148, "learning_rate": 2.9975367996201332e-05, "loss": 0.0193, "step": 54480 }, { "epoch": 0.4027822950238018, "grad_norm": 0.07519206404685974, "learning_rate": 2.9971658357075028e-05, "loss": 0.018, "step": 54490 }, { "epoch": 0.40285621359510365, "grad_norm": 0.09367363154888153, "learning_rate": 2.9967948717948717e-05, "loss": 0.0169, "step": 54500 }, { "epoch": 0.4029301321664055, "grad_norm": 0.09509658068418503, "learning_rate": 2.9964239078822416e-05, "loss": 0.017, "step": 54510 }, { "epoch": 0.40300405073770734, "grad_norm": 0.0789688378572464, "learning_rate": 2.9960529439696106e-05, "loss": 0.0172, "step": 54520 }, { "epoch": 0.4030779693090092, "grad_norm": 0.07211098819971085, "learning_rate": 2.99568198005698e-05, "loss": 0.0155, "step": 54530 }, { "epoch": 0.40315188788031103, "grad_norm": 0.0857715830206871, "learning_rate": 2.9953110161443497e-05, "loss": 0.0183, "step": 54540 }, { "epoch": 0.4032258064516129, "grad_norm": 0.10314106941223145, "learning_rate": 2.994940052231719e-05, "loss": 0.0178, "step": 54550 }, { "epoch": 0.4032997250229148, "grad_norm": 0.08754850178956985, "learning_rate": 2.9945690883190886e-05, "loss": 0.0176, "step": 54560 }, { "epoch": 0.4033736435942166, "grad_norm": 0.10044217109680176, "learning_rate": 2.9941981244064575e-05, "loss": 0.0192, "step": 54570 }, { "epoch": 0.40344756216551847, "grad_norm": 0.09112729877233505, "learning_rate": 2.993827160493827e-05, "loss": 0.0174, "step": 54580 }, { "epoch": 0.4035214807368203, "grad_norm": 0.08507723361253738, "learning_rate": 2.993456196581197e-05, "loss": 0.0188, "step": 54590 }, { "epoch": 0.40359539930812216, "grad_norm": 0.08809395879507065, "learning_rate": 2.993085232668566e-05, "loss": 0.017, "step": 54600 }, { "epoch": 0.403669317879424, "grad_norm": 0.09437878429889679, "learning_rate": 2.9927142687559356e-05, "loss": 0.0178, "step": 54610 }, { "epoch": 0.4037432364507259, "grad_norm": 0.09309881180524826, "learning_rate": 2.9923433048433048e-05, "loss": 0.0197, "step": 54620 }, { "epoch": 0.40381715502202775, "grad_norm": 0.07976792752742767, "learning_rate": 2.9919723409306744e-05, "loss": 0.0184, "step": 54630 }, { "epoch": 0.4038910735933296, "grad_norm": 0.05811803787946701, "learning_rate": 2.991601377018044e-05, "loss": 0.0184, "step": 54640 }, { "epoch": 0.40396499216463144, "grad_norm": 0.07902861386537552, "learning_rate": 2.991230413105413e-05, "loss": 0.0185, "step": 54650 }, { "epoch": 0.4040389107359333, "grad_norm": 0.0864984542131424, "learning_rate": 2.990859449192783e-05, "loss": 0.0174, "step": 54660 }, { "epoch": 0.40411282930723513, "grad_norm": 0.08877560496330261, "learning_rate": 2.9904884852801525e-05, "loss": 0.0161, "step": 54670 }, { "epoch": 0.404186747878537, "grad_norm": 0.10576141625642776, "learning_rate": 2.9901175213675214e-05, "loss": 0.018, "step": 54680 }, { "epoch": 0.4042606664498389, "grad_norm": 0.10026395320892334, "learning_rate": 2.989746557454891e-05, "loss": 0.0186, "step": 54690 }, { "epoch": 0.4043345850211407, "grad_norm": 0.06324445456266403, "learning_rate": 2.9893755935422602e-05, "loss": 0.0189, "step": 54700 }, { "epoch": 0.40440850359244257, "grad_norm": 0.09299612790346146, "learning_rate": 2.98900462962963e-05, "loss": 0.0184, "step": 54710 }, { "epoch": 0.4044824221637444, "grad_norm": 0.07469546794891357, "learning_rate": 2.9886336657169994e-05, "loss": 0.0172, "step": 54720 }, { "epoch": 0.40455634073504626, "grad_norm": 0.11556144058704376, "learning_rate": 2.9882627018043684e-05, "loss": 0.022, "step": 54730 }, { "epoch": 0.4046302593063481, "grad_norm": 0.08120883256196976, "learning_rate": 2.9878917378917383e-05, "loss": 0.0181, "step": 54740 }, { "epoch": 0.40470417787765, "grad_norm": 0.08154403418302536, "learning_rate": 2.9875207739791072e-05, "loss": 0.0176, "step": 54750 }, { "epoch": 0.40477809644895185, "grad_norm": 0.09956072270870209, "learning_rate": 2.9871498100664768e-05, "loss": 0.0165, "step": 54760 }, { "epoch": 0.4048520150202537, "grad_norm": 0.09262163192033768, "learning_rate": 2.9867788461538464e-05, "loss": 0.0161, "step": 54770 }, { "epoch": 0.40492593359155554, "grad_norm": 0.09680727869272232, "learning_rate": 2.9864078822412157e-05, "loss": 0.0196, "step": 54780 }, { "epoch": 0.4049998521628574, "grad_norm": 0.0783548429608345, "learning_rate": 2.9860369183285853e-05, "loss": 0.0208, "step": 54790 }, { "epoch": 0.40507377073415923, "grad_norm": 0.06337272375822067, "learning_rate": 2.9856659544159542e-05, "loss": 0.0171, "step": 54800 }, { "epoch": 0.4051476893054611, "grad_norm": 0.13881878554821014, "learning_rate": 2.985294990503324e-05, "loss": 0.0201, "step": 54810 }, { "epoch": 0.405221607876763, "grad_norm": 0.0798603743314743, "learning_rate": 2.9849240265906937e-05, "loss": 0.0203, "step": 54820 }, { "epoch": 0.4052955264480648, "grad_norm": 0.13624916970729828, "learning_rate": 2.9845530626780626e-05, "loss": 0.0183, "step": 54830 }, { "epoch": 0.40536944501936667, "grad_norm": 0.09620372951030731, "learning_rate": 2.9841820987654322e-05, "loss": 0.0171, "step": 54840 }, { "epoch": 0.4054433635906685, "grad_norm": 0.15520921349525452, "learning_rate": 2.9838111348528015e-05, "loss": 0.0192, "step": 54850 }, { "epoch": 0.40551728216197036, "grad_norm": 0.06886200606822968, "learning_rate": 2.983440170940171e-05, "loss": 0.018, "step": 54860 }, { "epoch": 0.4055912007332722, "grad_norm": 0.07065249234437943, "learning_rate": 2.9830692070275407e-05, "loss": 0.0208, "step": 54870 }, { "epoch": 0.4056651193045741, "grad_norm": 0.09980648010969162, "learning_rate": 2.9826982431149096e-05, "loss": 0.0208, "step": 54880 }, { "epoch": 0.40573903787587595, "grad_norm": 0.07251753658056259, "learning_rate": 2.9823272792022795e-05, "loss": 0.018, "step": 54890 }, { "epoch": 0.4058129564471778, "grad_norm": 0.08873682469129562, "learning_rate": 2.981956315289649e-05, "loss": 0.0193, "step": 54900 }, { "epoch": 0.40588687501847964, "grad_norm": 0.09693753719329834, "learning_rate": 2.981585351377018e-05, "loss": 0.0185, "step": 54910 }, { "epoch": 0.4059607935897815, "grad_norm": 0.08978056907653809, "learning_rate": 2.9812143874643876e-05, "loss": 0.0185, "step": 54920 }, { "epoch": 0.40603471216108333, "grad_norm": 0.0747421607375145, "learning_rate": 2.980843423551757e-05, "loss": 0.0184, "step": 54930 }, { "epoch": 0.4061086307323852, "grad_norm": 0.11142931133508682, "learning_rate": 2.9804724596391265e-05, "loss": 0.0195, "step": 54940 }, { "epoch": 0.4061825493036871, "grad_norm": 0.07661478221416473, "learning_rate": 2.980101495726496e-05, "loss": 0.0201, "step": 54950 }, { "epoch": 0.4062564678749889, "grad_norm": 0.04711652547121048, "learning_rate": 2.9797305318138654e-05, "loss": 0.0165, "step": 54960 }, { "epoch": 0.40633038644629077, "grad_norm": 0.11913223564624786, "learning_rate": 2.979359567901235e-05, "loss": 0.0183, "step": 54970 }, { "epoch": 0.4064043050175926, "grad_norm": 0.08878781646490097, "learning_rate": 2.978988603988604e-05, "loss": 0.0164, "step": 54980 }, { "epoch": 0.40647822358889446, "grad_norm": 0.07752066105604172, "learning_rate": 2.9786176400759735e-05, "loss": 0.0175, "step": 54990 }, { "epoch": 0.4065521421601963, "grad_norm": 0.08861127495765686, "learning_rate": 2.978246676163343e-05, "loss": 0.02, "step": 55000 }, { "epoch": 0.4066260607314982, "grad_norm": 0.0983710065484047, "learning_rate": 2.9778757122507123e-05, "loss": 0.0206, "step": 55010 }, { "epoch": 0.40669997930280005, "grad_norm": 0.10089907050132751, "learning_rate": 2.977504748338082e-05, "loss": 0.0187, "step": 55020 }, { "epoch": 0.4067738978741019, "grad_norm": 0.0748949870467186, "learning_rate": 2.977133784425451e-05, "loss": 0.0198, "step": 55030 }, { "epoch": 0.40684781644540374, "grad_norm": 0.09311806410551071, "learning_rate": 2.9767628205128208e-05, "loss": 0.0178, "step": 55040 }, { "epoch": 0.4069217350167056, "grad_norm": 0.09688904881477356, "learning_rate": 2.9763918566001904e-05, "loss": 0.0174, "step": 55050 }, { "epoch": 0.40699565358800743, "grad_norm": 0.0686623826622963, "learning_rate": 2.9760208926875593e-05, "loss": 0.0165, "step": 55060 }, { "epoch": 0.4070695721593093, "grad_norm": 0.08978747576475143, "learning_rate": 2.975649928774929e-05, "loss": 0.015, "step": 55070 }, { "epoch": 0.4071434907306112, "grad_norm": 0.0859551951289177, "learning_rate": 2.975278964862298e-05, "loss": 0.0168, "step": 55080 }, { "epoch": 0.407217409301913, "grad_norm": 0.06242772564291954, "learning_rate": 2.9749080009496677e-05, "loss": 0.0171, "step": 55090 }, { "epoch": 0.40729132787321487, "grad_norm": 0.06802783161401749, "learning_rate": 2.9745370370370373e-05, "loss": 0.0168, "step": 55100 }, { "epoch": 0.4073652464445167, "grad_norm": 0.06960264593362808, "learning_rate": 2.9741660731244066e-05, "loss": 0.0172, "step": 55110 }, { "epoch": 0.40743916501581856, "grad_norm": 0.07149052619934082, "learning_rate": 2.9737951092117762e-05, "loss": 0.0189, "step": 55120 }, { "epoch": 0.4075130835871204, "grad_norm": 0.0835036188364029, "learning_rate": 2.9734241452991458e-05, "loss": 0.0165, "step": 55130 }, { "epoch": 0.4075870021584223, "grad_norm": 0.09829945862293243, "learning_rate": 2.9730531813865147e-05, "loss": 0.0189, "step": 55140 }, { "epoch": 0.40766092072972415, "grad_norm": 0.2645808458328247, "learning_rate": 2.9726822174738843e-05, "loss": 0.0192, "step": 55150 }, { "epoch": 0.407734839301026, "grad_norm": 0.08904475718736649, "learning_rate": 2.9723112535612536e-05, "loss": 0.0181, "step": 55160 }, { "epoch": 0.40780875787232784, "grad_norm": 0.0872875228524208, "learning_rate": 2.971940289648623e-05, "loss": 0.0202, "step": 55170 }, { "epoch": 0.4078826764436297, "grad_norm": 0.0831475630402565, "learning_rate": 2.9715693257359928e-05, "loss": 0.0196, "step": 55180 }, { "epoch": 0.40795659501493153, "grad_norm": 0.0917244628071785, "learning_rate": 2.971198361823362e-05, "loss": 0.0208, "step": 55190 }, { "epoch": 0.4080305135862334, "grad_norm": 0.07903681695461273, "learning_rate": 2.9708273979107316e-05, "loss": 0.0183, "step": 55200 }, { "epoch": 0.4081044321575353, "grad_norm": 0.0862165316939354, "learning_rate": 2.9704564339981005e-05, "loss": 0.0189, "step": 55210 }, { "epoch": 0.4081783507288371, "grad_norm": 0.09082507342100143, "learning_rate": 2.97008547008547e-05, "loss": 0.0176, "step": 55220 }, { "epoch": 0.40825226930013897, "grad_norm": 0.07527061551809311, "learning_rate": 2.9697145061728397e-05, "loss": 0.0149, "step": 55230 }, { "epoch": 0.4083261878714408, "grad_norm": 0.11855072528123856, "learning_rate": 2.969343542260209e-05, "loss": 0.02, "step": 55240 }, { "epoch": 0.40840010644274266, "grad_norm": 0.07656430453062057, "learning_rate": 2.9689725783475786e-05, "loss": 0.0188, "step": 55250 }, { "epoch": 0.4084740250140445, "grad_norm": 0.11509408801794052, "learning_rate": 2.968601614434948e-05, "loss": 0.0196, "step": 55260 }, { "epoch": 0.4085479435853464, "grad_norm": 0.0942547544836998, "learning_rate": 2.9682306505223174e-05, "loss": 0.0181, "step": 55270 }, { "epoch": 0.40862186215664825, "grad_norm": 0.09625694900751114, "learning_rate": 2.967859686609687e-05, "loss": 0.0189, "step": 55280 }, { "epoch": 0.4086957807279501, "grad_norm": 0.07003076374530792, "learning_rate": 2.967488722697056e-05, "loss": 0.0166, "step": 55290 }, { "epoch": 0.40876969929925194, "grad_norm": 0.091498002409935, "learning_rate": 2.9671177587844255e-05, "loss": 0.0187, "step": 55300 }, { "epoch": 0.4088436178705538, "grad_norm": 0.11632296442985535, "learning_rate": 2.9667467948717948e-05, "loss": 0.0199, "step": 55310 }, { "epoch": 0.40891753644185563, "grad_norm": 0.0970294401049614, "learning_rate": 2.9663758309591644e-05, "loss": 0.0198, "step": 55320 }, { "epoch": 0.4089914550131575, "grad_norm": 0.11141879856586456, "learning_rate": 2.966004867046534e-05, "loss": 0.017, "step": 55330 }, { "epoch": 0.4090653735844594, "grad_norm": 0.07932562381029129, "learning_rate": 2.9656339031339033e-05, "loss": 0.0171, "step": 55340 }, { "epoch": 0.4091392921557612, "grad_norm": 0.07699783146381378, "learning_rate": 2.965262939221273e-05, "loss": 0.0161, "step": 55350 }, { "epoch": 0.40921321072706307, "grad_norm": 0.09021428227424622, "learning_rate": 2.9648919753086424e-05, "loss": 0.017, "step": 55360 }, { "epoch": 0.4092871292983649, "grad_norm": 0.09824664145708084, "learning_rate": 2.9645210113960114e-05, "loss": 0.0192, "step": 55370 }, { "epoch": 0.40936104786966676, "grad_norm": 0.0917426124215126, "learning_rate": 2.964150047483381e-05, "loss": 0.0203, "step": 55380 }, { "epoch": 0.4094349664409686, "grad_norm": 0.0881839171051979, "learning_rate": 2.9637790835707502e-05, "loss": 0.0191, "step": 55390 }, { "epoch": 0.4095088850122705, "grad_norm": 0.07064758241176605, "learning_rate": 2.9634081196581198e-05, "loss": 0.0179, "step": 55400 }, { "epoch": 0.40958280358357235, "grad_norm": 0.07534152269363403, "learning_rate": 2.9630371557454894e-05, "loss": 0.0177, "step": 55410 }, { "epoch": 0.4096567221548742, "grad_norm": 0.09132669866085052, "learning_rate": 2.9626661918328587e-05, "loss": 0.0168, "step": 55420 }, { "epoch": 0.40973064072617604, "grad_norm": 0.08473557233810425, "learning_rate": 2.9622952279202283e-05, "loss": 0.0192, "step": 55430 }, { "epoch": 0.4098045592974779, "grad_norm": 0.07497388124465942, "learning_rate": 2.9619242640075972e-05, "loss": 0.018, "step": 55440 }, { "epoch": 0.40987847786877973, "grad_norm": 0.07634835690259933, "learning_rate": 2.9615533000949668e-05, "loss": 0.0169, "step": 55450 }, { "epoch": 0.4099523964400816, "grad_norm": 0.1072976365685463, "learning_rate": 2.9611823361823364e-05, "loss": 0.0183, "step": 55460 }, { "epoch": 0.4100263150113835, "grad_norm": 0.08737269788980484, "learning_rate": 2.9608113722697056e-05, "loss": 0.0168, "step": 55470 }, { "epoch": 0.4101002335826853, "grad_norm": 0.08957663178443909, "learning_rate": 2.9604404083570752e-05, "loss": 0.0191, "step": 55480 }, { "epoch": 0.41017415215398717, "grad_norm": 0.0685604140162468, "learning_rate": 2.9600694444444445e-05, "loss": 0.0159, "step": 55490 }, { "epoch": 0.410248070725289, "grad_norm": 0.08200903236865997, "learning_rate": 2.959698480531814e-05, "loss": 0.0187, "step": 55500 }, { "epoch": 0.41032198929659086, "grad_norm": 0.08628015220165253, "learning_rate": 2.9593275166191837e-05, "loss": 0.0196, "step": 55510 }, { "epoch": 0.4103959078678927, "grad_norm": 0.0769340842962265, "learning_rate": 2.9589565527065526e-05, "loss": 0.0189, "step": 55520 }, { "epoch": 0.4104698264391946, "grad_norm": 0.08223969489336014, "learning_rate": 2.9585855887939222e-05, "loss": 0.0169, "step": 55530 }, { "epoch": 0.41054374501049645, "grad_norm": 0.0858936756849289, "learning_rate": 2.9582146248812915e-05, "loss": 0.0185, "step": 55540 }, { "epoch": 0.4106176635817983, "grad_norm": 0.0734199583530426, "learning_rate": 2.957843660968661e-05, "loss": 0.019, "step": 55550 }, { "epoch": 0.41069158215310014, "grad_norm": 0.08149746805429459, "learning_rate": 2.9574726970560307e-05, "loss": 0.0144, "step": 55560 }, { "epoch": 0.410765500724402, "grad_norm": 0.1099155992269516, "learning_rate": 2.9571017331434e-05, "loss": 0.0166, "step": 55570 }, { "epoch": 0.41083941929570383, "grad_norm": 0.11669523268938065, "learning_rate": 2.9567307692307695e-05, "loss": 0.018, "step": 55580 }, { "epoch": 0.4109133378670057, "grad_norm": 0.11657500267028809, "learning_rate": 2.956359805318139e-05, "loss": 0.0214, "step": 55590 }, { "epoch": 0.4109872564383076, "grad_norm": 0.08685796707868576, "learning_rate": 2.955988841405508e-05, "loss": 0.0181, "step": 55600 }, { "epoch": 0.4110611750096094, "grad_norm": 0.06064879149198532, "learning_rate": 2.9556178774928776e-05, "loss": 0.0199, "step": 55610 }, { "epoch": 0.41113509358091127, "grad_norm": 0.0598427839577198, "learning_rate": 2.955246913580247e-05, "loss": 0.0181, "step": 55620 }, { "epoch": 0.4112090121522131, "grad_norm": 0.06348814815282822, "learning_rate": 2.9548759496676165e-05, "loss": 0.0181, "step": 55630 }, { "epoch": 0.41128293072351496, "grad_norm": 0.07413514703512192, "learning_rate": 2.954504985754986e-05, "loss": 0.0192, "step": 55640 }, { "epoch": 0.4113568492948168, "grad_norm": 0.0760175809264183, "learning_rate": 2.9541340218423553e-05, "loss": 0.0164, "step": 55650 }, { "epoch": 0.4114307678661187, "grad_norm": 0.05769358202815056, "learning_rate": 2.953763057929725e-05, "loss": 0.0184, "step": 55660 }, { "epoch": 0.41150468643742055, "grad_norm": 0.08280821144580841, "learning_rate": 2.953392094017094e-05, "loss": 0.0168, "step": 55670 }, { "epoch": 0.4115786050087224, "grad_norm": 0.07674267143011093, "learning_rate": 2.9530211301044634e-05, "loss": 0.0162, "step": 55680 }, { "epoch": 0.41165252358002424, "grad_norm": 0.1518862247467041, "learning_rate": 2.9526501661918334e-05, "loss": 0.0197, "step": 55690 }, { "epoch": 0.4117264421513261, "grad_norm": 0.048735495656728745, "learning_rate": 2.9522792022792023e-05, "loss": 0.0181, "step": 55700 }, { "epoch": 0.41180036072262793, "grad_norm": 0.10562967509031296, "learning_rate": 2.951908238366572e-05, "loss": 0.0164, "step": 55710 }, { "epoch": 0.4118742792939298, "grad_norm": 0.1096380278468132, "learning_rate": 2.951537274453941e-05, "loss": 0.0205, "step": 55720 }, { "epoch": 0.4119481978652317, "grad_norm": 0.07708892971277237, "learning_rate": 2.9511663105413107e-05, "loss": 0.0177, "step": 55730 }, { "epoch": 0.4120221164365335, "grad_norm": 0.10286412388086319, "learning_rate": 2.9507953466286803e-05, "loss": 0.0177, "step": 55740 }, { "epoch": 0.41209603500783537, "grad_norm": 0.06790533661842346, "learning_rate": 2.9504243827160493e-05, "loss": 0.0171, "step": 55750 }, { "epoch": 0.4121699535791372, "grad_norm": 0.08822519332170486, "learning_rate": 2.950053418803419e-05, "loss": 0.0166, "step": 55760 }, { "epoch": 0.41224387215043906, "grad_norm": 0.08776558935642242, "learning_rate": 2.949682454890788e-05, "loss": 0.018, "step": 55770 }, { "epoch": 0.4123177907217409, "grad_norm": 0.12816807627677917, "learning_rate": 2.9493114909781577e-05, "loss": 0.0217, "step": 55780 }, { "epoch": 0.4123917092930428, "grad_norm": 0.07932336628437042, "learning_rate": 2.9489405270655273e-05, "loss": 0.0166, "step": 55790 }, { "epoch": 0.41246562786434465, "grad_norm": 0.0879678800702095, "learning_rate": 2.9485695631528966e-05, "loss": 0.0181, "step": 55800 }, { "epoch": 0.4125395464356465, "grad_norm": 0.20386385917663574, "learning_rate": 2.948198599240266e-05, "loss": 0.0167, "step": 55810 }, { "epoch": 0.41261346500694834, "grad_norm": 0.1288457214832306, "learning_rate": 2.9478276353276358e-05, "loss": 0.0188, "step": 55820 }, { "epoch": 0.4126873835782502, "grad_norm": 0.06845947355031967, "learning_rate": 2.9474566714150047e-05, "loss": 0.02, "step": 55830 }, { "epoch": 0.41276130214955203, "grad_norm": 0.08572788536548615, "learning_rate": 2.9470857075023746e-05, "loss": 0.0172, "step": 55840 }, { "epoch": 0.41283522072085393, "grad_norm": 0.0841023325920105, "learning_rate": 2.9467147435897435e-05, "loss": 0.0196, "step": 55850 }, { "epoch": 0.4129091392921558, "grad_norm": 0.0607466846704483, "learning_rate": 2.946343779677113e-05, "loss": 0.0181, "step": 55860 }, { "epoch": 0.4129830578634576, "grad_norm": 0.10207097232341766, "learning_rate": 2.9459728157644827e-05, "loss": 0.0186, "step": 55870 }, { "epoch": 0.41305697643475947, "grad_norm": 0.08442854881286621, "learning_rate": 2.945601851851852e-05, "loss": 0.0156, "step": 55880 }, { "epoch": 0.4131308950060613, "grad_norm": 0.06380892544984818, "learning_rate": 2.9452308879392216e-05, "loss": 0.0185, "step": 55890 }, { "epoch": 0.41320481357736316, "grad_norm": 0.07364775240421295, "learning_rate": 2.9448599240265905e-05, "loss": 0.0184, "step": 55900 }, { "epoch": 0.413278732148665, "grad_norm": 0.04778318852186203, "learning_rate": 2.94448896011396e-05, "loss": 0.0153, "step": 55910 }, { "epoch": 0.4133526507199669, "grad_norm": 0.10692009329795837, "learning_rate": 2.94411799620133e-05, "loss": 0.0191, "step": 55920 }, { "epoch": 0.41342656929126875, "grad_norm": 0.10312798619270325, "learning_rate": 2.943747032288699e-05, "loss": 0.0179, "step": 55930 }, { "epoch": 0.4135004878625706, "grad_norm": 0.07171052694320679, "learning_rate": 2.9433760683760685e-05, "loss": 0.0219, "step": 55940 }, { "epoch": 0.41357440643387244, "grad_norm": 0.09593594819307327, "learning_rate": 2.9430051044634378e-05, "loss": 0.0183, "step": 55950 }, { "epoch": 0.4136483250051743, "grad_norm": 0.0906202644109726, "learning_rate": 2.9426341405508074e-05, "loss": 0.0154, "step": 55960 }, { "epoch": 0.41372224357647613, "grad_norm": 0.06475282460451126, "learning_rate": 2.942263176638177e-05, "loss": 0.0177, "step": 55970 }, { "epoch": 0.41379616214777803, "grad_norm": 0.08594939112663269, "learning_rate": 2.941892212725546e-05, "loss": 0.0175, "step": 55980 }, { "epoch": 0.4138700807190799, "grad_norm": 0.0895848423242569, "learning_rate": 2.941521248812916e-05, "loss": 0.0176, "step": 55990 }, { "epoch": 0.4139439992903817, "grad_norm": 0.07915901392698288, "learning_rate": 2.9411502849002848e-05, "loss": 0.0164, "step": 56000 }, { "epoch": 0.41401791786168357, "grad_norm": 0.07280030101537704, "learning_rate": 2.9407793209876544e-05, "loss": 0.0181, "step": 56010 }, { "epoch": 0.4140918364329854, "grad_norm": 0.09190954267978668, "learning_rate": 2.940408357075024e-05, "loss": 0.0181, "step": 56020 }, { "epoch": 0.41416575500428726, "grad_norm": 0.1099943295121193, "learning_rate": 2.9400373931623932e-05, "loss": 0.019, "step": 56030 }, { "epoch": 0.4142396735755891, "grad_norm": 0.10087905079126358, "learning_rate": 2.9396664292497628e-05, "loss": 0.0195, "step": 56040 }, { "epoch": 0.414313592146891, "grad_norm": 0.061354316771030426, "learning_rate": 2.9392954653371324e-05, "loss": 0.0173, "step": 56050 }, { "epoch": 0.41438751071819285, "grad_norm": 0.08477345108985901, "learning_rate": 2.9389245014245013e-05, "loss": 0.0156, "step": 56060 }, { "epoch": 0.4144614292894947, "grad_norm": 0.08678663522005081, "learning_rate": 2.9385535375118713e-05, "loss": 0.0182, "step": 56070 }, { "epoch": 0.41453534786079654, "grad_norm": 0.07783781737089157, "learning_rate": 2.9381825735992402e-05, "loss": 0.0168, "step": 56080 }, { "epoch": 0.4146092664320984, "grad_norm": 0.11021009087562561, "learning_rate": 2.9378116096866098e-05, "loss": 0.0183, "step": 56090 }, { "epoch": 0.41468318500340023, "grad_norm": 0.0836622565984726, "learning_rate": 2.9374406457739794e-05, "loss": 0.0159, "step": 56100 }, { "epoch": 0.41475710357470214, "grad_norm": 0.1709643006324768, "learning_rate": 2.9370696818613486e-05, "loss": 0.0182, "step": 56110 }, { "epoch": 0.414831022146004, "grad_norm": 0.12158837914466858, "learning_rate": 2.9366987179487182e-05, "loss": 0.0176, "step": 56120 }, { "epoch": 0.4149049407173058, "grad_norm": 0.08476385474205017, "learning_rate": 2.936327754036087e-05, "loss": 0.0159, "step": 56130 }, { "epoch": 0.41497885928860767, "grad_norm": 0.0901803970336914, "learning_rate": 2.9359567901234568e-05, "loss": 0.0183, "step": 56140 }, { "epoch": 0.4150527778599095, "grad_norm": 0.07867557555437088, "learning_rate": 2.9355858262108267e-05, "loss": 0.0183, "step": 56150 }, { "epoch": 0.41512669643121136, "grad_norm": 0.106829933822155, "learning_rate": 2.9352148622981956e-05, "loss": 0.0178, "step": 56160 }, { "epoch": 0.4152006150025132, "grad_norm": 0.07354728132486343, "learning_rate": 2.9348438983855652e-05, "loss": 0.0176, "step": 56170 }, { "epoch": 0.4152745335738151, "grad_norm": 0.11522946506738663, "learning_rate": 2.9344729344729345e-05, "loss": 0.0177, "step": 56180 }, { "epoch": 0.41534845214511695, "grad_norm": 0.09632186591625214, "learning_rate": 2.934101970560304e-05, "loss": 0.0218, "step": 56190 }, { "epoch": 0.4154223707164188, "grad_norm": 0.06593929976224899, "learning_rate": 2.9337310066476737e-05, "loss": 0.0169, "step": 56200 }, { "epoch": 0.41549628928772064, "grad_norm": 0.08895174413919449, "learning_rate": 2.9333600427350426e-05, "loss": 0.0193, "step": 56210 }, { "epoch": 0.4155702078590225, "grad_norm": 0.0696493610739708, "learning_rate": 2.9329890788224125e-05, "loss": 0.0195, "step": 56220 }, { "epoch": 0.41564412643032433, "grad_norm": 0.08290284126996994, "learning_rate": 2.9326181149097814e-05, "loss": 0.0204, "step": 56230 }, { "epoch": 0.41571804500162624, "grad_norm": 0.09379585832357407, "learning_rate": 2.932247150997151e-05, "loss": 0.0198, "step": 56240 }, { "epoch": 0.4157919635729281, "grad_norm": 0.08410067856311798, "learning_rate": 2.9318761870845206e-05, "loss": 0.0182, "step": 56250 }, { "epoch": 0.4158658821442299, "grad_norm": 0.09510476887226105, "learning_rate": 2.93150522317189e-05, "loss": 0.0154, "step": 56260 }, { "epoch": 0.41593980071553177, "grad_norm": 0.0716487243771553, "learning_rate": 2.9311342592592595e-05, "loss": 0.0163, "step": 56270 }, { "epoch": 0.4160137192868336, "grad_norm": 0.08590196073055267, "learning_rate": 2.930763295346629e-05, "loss": 0.0196, "step": 56280 }, { "epoch": 0.41608763785813546, "grad_norm": 0.09150087088346481, "learning_rate": 2.930392331433998e-05, "loss": 0.02, "step": 56290 }, { "epoch": 0.4161615564294373, "grad_norm": 0.07709812372922897, "learning_rate": 2.930021367521368e-05, "loss": 0.0184, "step": 56300 }, { "epoch": 0.4162354750007392, "grad_norm": 0.05744616687297821, "learning_rate": 2.929650403608737e-05, "loss": 0.0174, "step": 56310 }, { "epoch": 0.41630939357204105, "grad_norm": 0.08779750764369965, "learning_rate": 2.9292794396961064e-05, "loss": 0.0163, "step": 56320 }, { "epoch": 0.4163833121433429, "grad_norm": 0.08599414676427841, "learning_rate": 2.928908475783476e-05, "loss": 0.0161, "step": 56330 }, { "epoch": 0.41645723071464474, "grad_norm": 0.06551361083984375, "learning_rate": 2.9285375118708453e-05, "loss": 0.0159, "step": 56340 }, { "epoch": 0.4165311492859466, "grad_norm": 0.0936962142586708, "learning_rate": 2.928166547958215e-05, "loss": 0.0186, "step": 56350 }, { "epoch": 0.41660506785724843, "grad_norm": 0.09517485648393631, "learning_rate": 2.9277955840455838e-05, "loss": 0.0186, "step": 56360 }, { "epoch": 0.41667898642855034, "grad_norm": 0.08648562431335449, "learning_rate": 2.9274246201329538e-05, "loss": 0.018, "step": 56370 }, { "epoch": 0.4167529049998522, "grad_norm": 0.08034750819206238, "learning_rate": 2.9270536562203234e-05, "loss": 0.0165, "step": 56380 }, { "epoch": 0.416826823571154, "grad_norm": 0.07642500102519989, "learning_rate": 2.9266826923076923e-05, "loss": 0.018, "step": 56390 }, { "epoch": 0.41690074214245587, "grad_norm": 0.08972880244255066, "learning_rate": 2.926311728395062e-05, "loss": 0.018, "step": 56400 }, { "epoch": 0.4169746607137577, "grad_norm": 0.07712242007255554, "learning_rate": 2.925940764482431e-05, "loss": 0.0184, "step": 56410 }, { "epoch": 0.41704857928505956, "grad_norm": 0.08815699815750122, "learning_rate": 2.9255698005698007e-05, "loss": 0.0193, "step": 56420 }, { "epoch": 0.4171224978563614, "grad_norm": 0.08716235309839249, "learning_rate": 2.9251988366571703e-05, "loss": 0.0204, "step": 56430 }, { "epoch": 0.4171964164276633, "grad_norm": 0.10045552253723145, "learning_rate": 2.9248278727445392e-05, "loss": 0.0182, "step": 56440 }, { "epoch": 0.41727033499896515, "grad_norm": 0.0753190815448761, "learning_rate": 2.9244569088319092e-05, "loss": 0.0201, "step": 56450 }, { "epoch": 0.417344253570267, "grad_norm": 0.07696306705474854, "learning_rate": 2.924085944919278e-05, "loss": 0.0177, "step": 56460 }, { "epoch": 0.41741817214156884, "grad_norm": 0.08237896114587784, "learning_rate": 2.9237149810066477e-05, "loss": 0.0206, "step": 56470 }, { "epoch": 0.4174920907128707, "grad_norm": 0.06589295715093613, "learning_rate": 2.9233440170940173e-05, "loss": 0.0219, "step": 56480 }, { "epoch": 0.41756600928417253, "grad_norm": 0.059493862092494965, "learning_rate": 2.9229730531813865e-05, "loss": 0.0196, "step": 56490 }, { "epoch": 0.41763992785547444, "grad_norm": 0.07635711878538132, "learning_rate": 2.922602089268756e-05, "loss": 0.019, "step": 56500 }, { "epoch": 0.4177138464267763, "grad_norm": 0.07192423194646835, "learning_rate": 2.9222311253561257e-05, "loss": 0.0174, "step": 56510 }, { "epoch": 0.4177877649980781, "grad_norm": 0.1241598054766655, "learning_rate": 2.921860161443495e-05, "loss": 0.0187, "step": 56520 }, { "epoch": 0.41786168356937997, "grad_norm": 0.0738677978515625, "learning_rate": 2.9214891975308646e-05, "loss": 0.0207, "step": 56530 }, { "epoch": 0.4179356021406818, "grad_norm": 0.09094661474227905, "learning_rate": 2.9211182336182335e-05, "loss": 0.0209, "step": 56540 }, { "epoch": 0.41800952071198366, "grad_norm": 0.07835207134485245, "learning_rate": 2.920747269705603e-05, "loss": 0.0175, "step": 56550 }, { "epoch": 0.4180834392832855, "grad_norm": 0.05614442005753517, "learning_rate": 2.9203763057929727e-05, "loss": 0.0173, "step": 56560 }, { "epoch": 0.4181573578545874, "grad_norm": 0.0806455984711647, "learning_rate": 2.920005341880342e-05, "loss": 0.0183, "step": 56570 }, { "epoch": 0.41823127642588925, "grad_norm": 0.10308822244405746, "learning_rate": 2.9196343779677116e-05, "loss": 0.0188, "step": 56580 }, { "epoch": 0.4183051949971911, "grad_norm": 0.10335249453783035, "learning_rate": 2.9192634140550805e-05, "loss": 0.0187, "step": 56590 }, { "epoch": 0.41837911356849294, "grad_norm": 0.08708662539720535, "learning_rate": 2.9188924501424504e-05, "loss": 0.0183, "step": 56600 }, { "epoch": 0.4184530321397948, "grad_norm": 0.08335819840431213, "learning_rate": 2.91852148622982e-05, "loss": 0.0191, "step": 56610 }, { "epoch": 0.41852695071109663, "grad_norm": 0.07016626000404358, "learning_rate": 2.918150522317189e-05, "loss": 0.0186, "step": 56620 }, { "epoch": 0.41860086928239854, "grad_norm": 0.11232458800077438, "learning_rate": 2.9177795584045585e-05, "loss": 0.0189, "step": 56630 }, { "epoch": 0.4186747878537004, "grad_norm": 0.07821941375732422, "learning_rate": 2.9174085944919278e-05, "loss": 0.0178, "step": 56640 }, { "epoch": 0.4187487064250022, "grad_norm": 0.07746779173612595, "learning_rate": 2.9170376305792974e-05, "loss": 0.0164, "step": 56650 }, { "epoch": 0.41882262499630407, "grad_norm": 0.10343199223279953, "learning_rate": 2.916666666666667e-05, "loss": 0.0174, "step": 56660 }, { "epoch": 0.4188965435676059, "grad_norm": 0.07732991129159927, "learning_rate": 2.9162957027540362e-05, "loss": 0.0186, "step": 56670 }, { "epoch": 0.41897046213890776, "grad_norm": 0.09331337362527847, "learning_rate": 2.9159247388414058e-05, "loss": 0.0183, "step": 56680 }, { "epoch": 0.4190443807102096, "grad_norm": 0.06747223436832428, "learning_rate": 2.9155537749287747e-05, "loss": 0.0163, "step": 56690 }, { "epoch": 0.4191182992815115, "grad_norm": 0.0804247334599495, "learning_rate": 2.9151828110161443e-05, "loss": 0.0176, "step": 56700 }, { "epoch": 0.41919221785281335, "grad_norm": 0.0986623466014862, "learning_rate": 2.914811847103514e-05, "loss": 0.0179, "step": 56710 }, { "epoch": 0.4192661364241152, "grad_norm": 0.08656472712755203, "learning_rate": 2.9144408831908832e-05, "loss": 0.0198, "step": 56720 }, { "epoch": 0.41934005499541704, "grad_norm": 0.093471959233284, "learning_rate": 2.9140699192782528e-05, "loss": 0.0188, "step": 56730 }, { "epoch": 0.4194139735667189, "grad_norm": 0.09669660776853561, "learning_rate": 2.9136989553656224e-05, "loss": 0.0178, "step": 56740 }, { "epoch": 0.41948789213802073, "grad_norm": 0.08791998028755188, "learning_rate": 2.9133279914529917e-05, "loss": 0.0193, "step": 56750 }, { "epoch": 0.41956181070932264, "grad_norm": 0.08240630477666855, "learning_rate": 2.9129570275403612e-05, "loss": 0.0167, "step": 56760 }, { "epoch": 0.4196357292806245, "grad_norm": 0.0766507163643837, "learning_rate": 2.91258606362773e-05, "loss": 0.0175, "step": 56770 }, { "epoch": 0.4197096478519263, "grad_norm": 0.08248403668403625, "learning_rate": 2.9122150997150998e-05, "loss": 0.0172, "step": 56780 }, { "epoch": 0.41978356642322817, "grad_norm": 0.07111876457929611, "learning_rate": 2.9118441358024694e-05, "loss": 0.0171, "step": 56790 }, { "epoch": 0.41985748499453, "grad_norm": 0.10291147977113724, "learning_rate": 2.9114731718898386e-05, "loss": 0.0203, "step": 56800 }, { "epoch": 0.41993140356583186, "grad_norm": 0.09333407133817673, "learning_rate": 2.9111022079772082e-05, "loss": 0.0201, "step": 56810 }, { "epoch": 0.4200053221371337, "grad_norm": 0.08243946731090546, "learning_rate": 2.9107312440645775e-05, "loss": 0.0192, "step": 56820 }, { "epoch": 0.4200792407084356, "grad_norm": 0.07780137658119202, "learning_rate": 2.910360280151947e-05, "loss": 0.0199, "step": 56830 }, { "epoch": 0.42015315927973745, "grad_norm": 0.0727728083729744, "learning_rate": 2.9099893162393167e-05, "loss": 0.0165, "step": 56840 }, { "epoch": 0.4202270778510393, "grad_norm": 0.08291462063789368, "learning_rate": 2.9096183523266856e-05, "loss": 0.0169, "step": 56850 }, { "epoch": 0.42030099642234114, "grad_norm": 0.07736489921808243, "learning_rate": 2.9092473884140552e-05, "loss": 0.0197, "step": 56860 }, { "epoch": 0.420374914993643, "grad_norm": 0.06759285926818848, "learning_rate": 2.9088764245014244e-05, "loss": 0.0158, "step": 56870 }, { "epoch": 0.42044883356494483, "grad_norm": 0.08929736167192459, "learning_rate": 2.908505460588794e-05, "loss": 0.0179, "step": 56880 }, { "epoch": 0.42052275213624674, "grad_norm": 0.07094036787748337, "learning_rate": 2.9081344966761636e-05, "loss": 0.0193, "step": 56890 }, { "epoch": 0.4205966707075486, "grad_norm": 0.09569530189037323, "learning_rate": 2.907763532763533e-05, "loss": 0.0209, "step": 56900 }, { "epoch": 0.4206705892788504, "grad_norm": 0.09825005382299423, "learning_rate": 2.9073925688509025e-05, "loss": 0.0192, "step": 56910 }, { "epoch": 0.42074450785015227, "grad_norm": 0.11080006510019302, "learning_rate": 2.9070216049382714e-05, "loss": 0.0175, "step": 56920 }, { "epoch": 0.4208184264214541, "grad_norm": 0.09301717579364777, "learning_rate": 2.906650641025641e-05, "loss": 0.0194, "step": 56930 }, { "epoch": 0.42089234499275596, "grad_norm": 0.09418229013681412, "learning_rate": 2.9062796771130106e-05, "loss": 0.0149, "step": 56940 }, { "epoch": 0.4209662635640578, "grad_norm": 0.09676161408424377, "learning_rate": 2.90590871320038e-05, "loss": 0.0179, "step": 56950 }, { "epoch": 0.4210401821353597, "grad_norm": 0.07113750278949738, "learning_rate": 2.9055377492877495e-05, "loss": 0.0183, "step": 56960 }, { "epoch": 0.42111410070666155, "grad_norm": 0.06378157436847687, "learning_rate": 2.905166785375119e-05, "loss": 0.0167, "step": 56970 }, { "epoch": 0.4211880192779634, "grad_norm": 0.06854438036680222, "learning_rate": 2.9047958214624883e-05, "loss": 0.0164, "step": 56980 }, { "epoch": 0.42126193784926524, "grad_norm": 0.08005507290363312, "learning_rate": 2.904424857549858e-05, "loss": 0.0169, "step": 56990 }, { "epoch": 0.4213358564205671, "grad_norm": 0.09289931505918503, "learning_rate": 2.9040538936372268e-05, "loss": 0.0185, "step": 57000 }, { "epoch": 0.42140977499186894, "grad_norm": 0.08229994028806686, "learning_rate": 2.9036829297245964e-05, "loss": 0.0172, "step": 57010 }, { "epoch": 0.42148369356317084, "grad_norm": 0.0777730792760849, "learning_rate": 2.903311965811966e-05, "loss": 0.0181, "step": 57020 }, { "epoch": 0.4215576121344727, "grad_norm": 0.12674884498119354, "learning_rate": 2.9029410018993353e-05, "loss": 0.0186, "step": 57030 }, { "epoch": 0.4216315307057745, "grad_norm": 0.07563526928424835, "learning_rate": 2.902570037986705e-05, "loss": 0.0182, "step": 57040 }, { "epoch": 0.42170544927707637, "grad_norm": 0.07964854687452316, "learning_rate": 2.902199074074074e-05, "loss": 0.017, "step": 57050 }, { "epoch": 0.4217793678483782, "grad_norm": 0.07769711315631866, "learning_rate": 2.9018281101614437e-05, "loss": 0.0168, "step": 57060 }, { "epoch": 0.42185328641968006, "grad_norm": 0.0685645267367363, "learning_rate": 2.9014571462488133e-05, "loss": 0.0203, "step": 57070 }, { "epoch": 0.4219272049909819, "grad_norm": 0.07990463823080063, "learning_rate": 2.9010861823361822e-05, "loss": 0.0201, "step": 57080 }, { "epoch": 0.4220011235622838, "grad_norm": 0.08478770405054092, "learning_rate": 2.900715218423552e-05, "loss": 0.0172, "step": 57090 }, { "epoch": 0.42207504213358565, "grad_norm": 0.06837252527475357, "learning_rate": 2.900344254510921e-05, "loss": 0.0237, "step": 57100 }, { "epoch": 0.4221489607048875, "grad_norm": 0.08297235518693924, "learning_rate": 2.8999732905982907e-05, "loss": 0.0204, "step": 57110 }, { "epoch": 0.42222287927618934, "grad_norm": 0.10914500057697296, "learning_rate": 2.8996023266856603e-05, "loss": 0.0172, "step": 57120 }, { "epoch": 0.4222967978474912, "grad_norm": 0.07343258708715439, "learning_rate": 2.8992313627730296e-05, "loss": 0.0165, "step": 57130 }, { "epoch": 0.42237071641879304, "grad_norm": 0.09652519226074219, "learning_rate": 2.898860398860399e-05, "loss": 0.0192, "step": 57140 }, { "epoch": 0.42244463499009494, "grad_norm": 0.072759710252285, "learning_rate": 2.898489434947768e-05, "loss": 0.0171, "step": 57150 }, { "epoch": 0.4225185535613968, "grad_norm": 0.09772517532110214, "learning_rate": 2.8981184710351377e-05, "loss": 0.0192, "step": 57160 }, { "epoch": 0.4225924721326986, "grad_norm": 0.08044306188821793, "learning_rate": 2.8977475071225073e-05, "loss": 0.0189, "step": 57170 }, { "epoch": 0.42266639070400047, "grad_norm": 0.1011492982506752, "learning_rate": 2.8973765432098765e-05, "loss": 0.0184, "step": 57180 }, { "epoch": 0.4227403092753023, "grad_norm": 0.08796268701553345, "learning_rate": 2.897005579297246e-05, "loss": 0.0209, "step": 57190 }, { "epoch": 0.42281422784660416, "grad_norm": 0.09029748290777206, "learning_rate": 2.8966346153846157e-05, "loss": 0.0181, "step": 57200 }, { "epoch": 0.422888146417906, "grad_norm": 0.09961996972560883, "learning_rate": 2.896263651471985e-05, "loss": 0.0161, "step": 57210 }, { "epoch": 0.4229620649892079, "grad_norm": 0.23429933190345764, "learning_rate": 2.8958926875593546e-05, "loss": 0.019, "step": 57220 }, { "epoch": 0.42303598356050975, "grad_norm": 0.11873859912157059, "learning_rate": 2.8955217236467235e-05, "loss": 0.0222, "step": 57230 }, { "epoch": 0.4231099021318116, "grad_norm": 0.0865563228726387, "learning_rate": 2.895150759734093e-05, "loss": 0.0181, "step": 57240 }, { "epoch": 0.42318382070311344, "grad_norm": 0.14462807774543762, "learning_rate": 2.894779795821463e-05, "loss": 0.0176, "step": 57250 }, { "epoch": 0.4232577392744153, "grad_norm": 0.0750247985124588, "learning_rate": 2.894408831908832e-05, "loss": 0.0182, "step": 57260 }, { "epoch": 0.42333165784571714, "grad_norm": 0.0788230374455452, "learning_rate": 2.8940378679962015e-05, "loss": 0.0198, "step": 57270 }, { "epoch": 0.42340557641701904, "grad_norm": 0.08862007409334183, "learning_rate": 2.8936669040835708e-05, "loss": 0.017, "step": 57280 }, { "epoch": 0.4234794949883209, "grad_norm": 0.09727243334054947, "learning_rate": 2.8932959401709404e-05, "loss": 0.0178, "step": 57290 }, { "epoch": 0.4235534135596227, "grad_norm": 0.08772630244493484, "learning_rate": 2.89292497625831e-05, "loss": 0.0182, "step": 57300 }, { "epoch": 0.42362733213092457, "grad_norm": 0.07961007952690125, "learning_rate": 2.892554012345679e-05, "loss": 0.0181, "step": 57310 }, { "epoch": 0.4237012507022264, "grad_norm": 0.07917802780866623, "learning_rate": 2.8921830484330485e-05, "loss": 0.0178, "step": 57320 }, { "epoch": 0.42377516927352826, "grad_norm": 0.08240603655576706, "learning_rate": 2.8918120845204178e-05, "loss": 0.0192, "step": 57330 }, { "epoch": 0.4238490878448301, "grad_norm": 0.11383040994405746, "learning_rate": 2.8914411206077874e-05, "loss": 0.0196, "step": 57340 }, { "epoch": 0.423923006416132, "grad_norm": 0.09868566691875458, "learning_rate": 2.891070156695157e-05, "loss": 0.0194, "step": 57350 }, { "epoch": 0.42399692498743385, "grad_norm": 0.1159425750374794, "learning_rate": 2.8906991927825262e-05, "loss": 0.0196, "step": 57360 }, { "epoch": 0.4240708435587357, "grad_norm": 0.07365076243877411, "learning_rate": 2.8903282288698958e-05, "loss": 0.016, "step": 57370 }, { "epoch": 0.42414476213003754, "grad_norm": 0.07025440782308578, "learning_rate": 2.8899572649572647e-05, "loss": 0.0173, "step": 57380 }, { "epoch": 0.4242186807013394, "grad_norm": 0.08012454211711884, "learning_rate": 2.8895863010446343e-05, "loss": 0.0183, "step": 57390 }, { "epoch": 0.42429259927264124, "grad_norm": 0.087140753865242, "learning_rate": 2.8892153371320043e-05, "loss": 0.0189, "step": 57400 }, { "epoch": 0.42436651784394314, "grad_norm": 0.1057136058807373, "learning_rate": 2.8888443732193732e-05, "loss": 0.0193, "step": 57410 }, { "epoch": 0.424440436415245, "grad_norm": 0.08255941420793533, "learning_rate": 2.8884734093067428e-05, "loss": 0.018, "step": 57420 }, { "epoch": 0.4245143549865468, "grad_norm": 0.07151252031326294, "learning_rate": 2.8881024453941124e-05, "loss": 0.0178, "step": 57430 }, { "epoch": 0.42458827355784867, "grad_norm": 0.08573801070451736, "learning_rate": 2.8877314814814816e-05, "loss": 0.0172, "step": 57440 }, { "epoch": 0.4246621921291505, "grad_norm": 0.11681819707155228, "learning_rate": 2.8873605175688512e-05, "loss": 0.0208, "step": 57450 }, { "epoch": 0.42473611070045236, "grad_norm": 0.0816139355301857, "learning_rate": 2.88698955365622e-05, "loss": 0.0222, "step": 57460 }, { "epoch": 0.4248100292717542, "grad_norm": 0.06773439049720764, "learning_rate": 2.8866185897435897e-05, "loss": 0.018, "step": 57470 }, { "epoch": 0.4248839478430561, "grad_norm": 0.06856571137905121, "learning_rate": 2.8862476258309597e-05, "loss": 0.0174, "step": 57480 }, { "epoch": 0.42495786641435795, "grad_norm": 0.09887688606977463, "learning_rate": 2.8858766619183286e-05, "loss": 0.0194, "step": 57490 }, { "epoch": 0.4250317849856598, "grad_norm": 0.09817370027303696, "learning_rate": 2.8855056980056982e-05, "loss": 0.0205, "step": 57500 }, { "epoch": 0.42510570355696164, "grad_norm": 0.09108339995145798, "learning_rate": 2.8851347340930674e-05, "loss": 0.017, "step": 57510 }, { "epoch": 0.4251796221282635, "grad_norm": 0.08016406744718552, "learning_rate": 2.884763770180437e-05, "loss": 0.0189, "step": 57520 }, { "epoch": 0.42525354069956534, "grad_norm": 0.07251052558422089, "learning_rate": 2.8843928062678066e-05, "loss": 0.0176, "step": 57530 }, { "epoch": 0.42532745927086724, "grad_norm": 0.0984114408493042, "learning_rate": 2.8840218423551756e-05, "loss": 0.0204, "step": 57540 }, { "epoch": 0.4254013778421691, "grad_norm": 0.0761522427201271, "learning_rate": 2.8836508784425455e-05, "loss": 0.0185, "step": 57550 }, { "epoch": 0.4254752964134709, "grad_norm": 0.07486771047115326, "learning_rate": 2.8832799145299144e-05, "loss": 0.0186, "step": 57560 }, { "epoch": 0.4255492149847728, "grad_norm": 0.07122237235307693, "learning_rate": 2.882908950617284e-05, "loss": 0.0168, "step": 57570 }, { "epoch": 0.4256231335560746, "grad_norm": 0.08447059243917465, "learning_rate": 2.8825379867046536e-05, "loss": 0.0209, "step": 57580 }, { "epoch": 0.42569705212737646, "grad_norm": 0.09231428802013397, "learning_rate": 2.882167022792023e-05, "loss": 0.0179, "step": 57590 }, { "epoch": 0.4257709706986783, "grad_norm": 0.09985219687223434, "learning_rate": 2.8817960588793925e-05, "loss": 0.0175, "step": 57600 }, { "epoch": 0.4258448892699802, "grad_norm": 0.08764240145683289, "learning_rate": 2.8814250949667614e-05, "loss": 0.0188, "step": 57610 }, { "epoch": 0.42591880784128205, "grad_norm": 0.1160874217748642, "learning_rate": 2.881054131054131e-05, "loss": 0.0165, "step": 57620 }, { "epoch": 0.4259927264125839, "grad_norm": 0.09844554215669632, "learning_rate": 2.880683167141501e-05, "loss": 0.0203, "step": 57630 }, { "epoch": 0.42606664498388575, "grad_norm": 0.12586788833141327, "learning_rate": 2.88031220322887e-05, "loss": 0.0173, "step": 57640 }, { "epoch": 0.4261405635551876, "grad_norm": 0.07267776131629944, "learning_rate": 2.8799412393162394e-05, "loss": 0.0197, "step": 57650 }, { "epoch": 0.42621448212648944, "grad_norm": 0.11026757210493088, "learning_rate": 2.879570275403609e-05, "loss": 0.0195, "step": 57660 }, { "epoch": 0.42628840069779134, "grad_norm": 0.08216289430856705, "learning_rate": 2.8791993114909783e-05, "loss": 0.019, "step": 57670 }, { "epoch": 0.4263623192690932, "grad_norm": 0.07248413562774658, "learning_rate": 2.878828347578348e-05, "loss": 0.0191, "step": 57680 }, { "epoch": 0.426436237840395, "grad_norm": 0.0735250785946846, "learning_rate": 2.8784573836657168e-05, "loss": 0.0158, "step": 57690 }, { "epoch": 0.4265101564116969, "grad_norm": 0.08926840871572495, "learning_rate": 2.8780864197530867e-05, "loss": 0.0174, "step": 57700 }, { "epoch": 0.4265840749829987, "grad_norm": 0.0892130509018898, "learning_rate": 2.8777154558404563e-05, "loss": 0.0167, "step": 57710 }, { "epoch": 0.42665799355430056, "grad_norm": 0.08792570978403091, "learning_rate": 2.8773444919278253e-05, "loss": 0.0197, "step": 57720 }, { "epoch": 0.42673191212560246, "grad_norm": 0.08881917595863342, "learning_rate": 2.876973528015195e-05, "loss": 0.018, "step": 57730 }, { "epoch": 0.4268058306969043, "grad_norm": 0.08950222283601761, "learning_rate": 2.876602564102564e-05, "loss": 0.0174, "step": 57740 }, { "epoch": 0.42687974926820615, "grad_norm": 0.0893772542476654, "learning_rate": 2.8762316001899337e-05, "loss": 0.0218, "step": 57750 }, { "epoch": 0.426953667839508, "grad_norm": 0.09056003391742706, "learning_rate": 2.8758606362773033e-05, "loss": 0.0193, "step": 57760 }, { "epoch": 0.42702758641080985, "grad_norm": 0.11356296390295029, "learning_rate": 2.8754896723646722e-05, "loss": 0.0212, "step": 57770 }, { "epoch": 0.4271015049821117, "grad_norm": 0.06659174710512161, "learning_rate": 2.875118708452042e-05, "loss": 0.0192, "step": 57780 }, { "epoch": 0.42717542355341354, "grad_norm": 0.09439995139837265, "learning_rate": 2.874747744539411e-05, "loss": 0.0207, "step": 57790 }, { "epoch": 0.42724934212471544, "grad_norm": 0.07848507910966873, "learning_rate": 2.8743767806267807e-05, "loss": 0.0182, "step": 57800 }, { "epoch": 0.4273232606960173, "grad_norm": 0.07541602104902267, "learning_rate": 2.8740058167141503e-05, "loss": 0.0217, "step": 57810 }, { "epoch": 0.4273971792673191, "grad_norm": 0.10256219655275345, "learning_rate": 2.8736348528015195e-05, "loss": 0.0207, "step": 57820 }, { "epoch": 0.427471097838621, "grad_norm": 0.08818720281124115, "learning_rate": 2.873263888888889e-05, "loss": 0.0173, "step": 57830 }, { "epoch": 0.4275450164099228, "grad_norm": 0.06569219380617142, "learning_rate": 2.872892924976258e-05, "loss": 0.019, "step": 57840 }, { "epoch": 0.42761893498122466, "grad_norm": 0.05568908900022507, "learning_rate": 2.872521961063628e-05, "loss": 0.0217, "step": 57850 }, { "epoch": 0.42769285355252656, "grad_norm": 0.08012406527996063, "learning_rate": 2.8721509971509976e-05, "loss": 0.0176, "step": 57860 }, { "epoch": 0.4277667721238284, "grad_norm": 0.07560218870639801, "learning_rate": 2.8717800332383665e-05, "loss": 0.0165, "step": 57870 }, { "epoch": 0.42784069069513025, "grad_norm": 0.0802362784743309, "learning_rate": 2.871409069325736e-05, "loss": 0.017, "step": 57880 }, { "epoch": 0.4279146092664321, "grad_norm": 0.0996783971786499, "learning_rate": 2.8710381054131057e-05, "loss": 0.018, "step": 57890 }, { "epoch": 0.42798852783773395, "grad_norm": 0.06278420239686966, "learning_rate": 2.870667141500475e-05, "loss": 0.0183, "step": 57900 }, { "epoch": 0.4280624464090358, "grad_norm": 0.09411557763814926, "learning_rate": 2.8702961775878445e-05, "loss": 0.0184, "step": 57910 }, { "epoch": 0.42813636498033764, "grad_norm": 0.1253633052110672, "learning_rate": 2.8699252136752135e-05, "loss": 0.0186, "step": 57920 }, { "epoch": 0.42821028355163954, "grad_norm": 0.06833294779062271, "learning_rate": 2.8695542497625834e-05, "loss": 0.0194, "step": 57930 }, { "epoch": 0.4282842021229414, "grad_norm": 0.08020038157701492, "learning_rate": 2.869183285849953e-05, "loss": 0.0196, "step": 57940 }, { "epoch": 0.4283581206942432, "grad_norm": 0.09458767622709274, "learning_rate": 2.868812321937322e-05, "loss": 0.0175, "step": 57950 }, { "epoch": 0.4284320392655451, "grad_norm": 0.07971557974815369, "learning_rate": 2.8684413580246915e-05, "loss": 0.018, "step": 57960 }, { "epoch": 0.4285059578368469, "grad_norm": 0.07327605783939362, "learning_rate": 2.8680703941120608e-05, "loss": 0.0178, "step": 57970 }, { "epoch": 0.42857987640814876, "grad_norm": 0.08138163387775421, "learning_rate": 2.8676994301994304e-05, "loss": 0.0177, "step": 57980 }, { "epoch": 0.42865379497945066, "grad_norm": 0.06313429772853851, "learning_rate": 2.8673284662868e-05, "loss": 0.0181, "step": 57990 }, { "epoch": 0.4287277135507525, "grad_norm": 0.09797652810811996, "learning_rate": 2.866957502374169e-05, "loss": 0.0179, "step": 58000 }, { "epoch": 0.42880163212205435, "grad_norm": 0.06211160123348236, "learning_rate": 2.8665865384615388e-05, "loss": 0.0162, "step": 58010 }, { "epoch": 0.4288755506933562, "grad_norm": 0.0639074519276619, "learning_rate": 2.8662155745489077e-05, "loss": 0.0171, "step": 58020 }, { "epoch": 0.42894946926465805, "grad_norm": 0.0698406845331192, "learning_rate": 2.8658446106362773e-05, "loss": 0.0185, "step": 58030 }, { "epoch": 0.4290233878359599, "grad_norm": 0.09111010283231735, "learning_rate": 2.865473646723647e-05, "loss": 0.0184, "step": 58040 }, { "epoch": 0.42909730640726174, "grad_norm": 0.057579610496759415, "learning_rate": 2.8651026828110162e-05, "loss": 0.0184, "step": 58050 }, { "epoch": 0.42917122497856364, "grad_norm": 0.08983967453241348, "learning_rate": 2.8647317188983858e-05, "loss": 0.0173, "step": 58060 }, { "epoch": 0.4292451435498655, "grad_norm": 0.07949908822774887, "learning_rate": 2.8643607549857547e-05, "loss": 0.0138, "step": 58070 }, { "epoch": 0.4293190621211673, "grad_norm": 0.11375358700752258, "learning_rate": 2.8639897910731246e-05, "loss": 0.02, "step": 58080 }, { "epoch": 0.4293929806924692, "grad_norm": 0.07009422779083252, "learning_rate": 2.8636188271604942e-05, "loss": 0.0155, "step": 58090 }, { "epoch": 0.429466899263771, "grad_norm": 0.07353521138429642, "learning_rate": 2.863247863247863e-05, "loss": 0.0176, "step": 58100 }, { "epoch": 0.42954081783507286, "grad_norm": 0.0705774649977684, "learning_rate": 2.8628768993352327e-05, "loss": 0.0191, "step": 58110 }, { "epoch": 0.42961473640637476, "grad_norm": 0.07235126942396164, "learning_rate": 2.8625059354226023e-05, "loss": 0.0205, "step": 58120 }, { "epoch": 0.4296886549776766, "grad_norm": 0.07953356206417084, "learning_rate": 2.8621349715099716e-05, "loss": 0.0201, "step": 58130 }, { "epoch": 0.42976257354897845, "grad_norm": 0.09690140932798386, "learning_rate": 2.8617640075973412e-05, "loss": 0.0179, "step": 58140 }, { "epoch": 0.4298364921202803, "grad_norm": 0.09740715473890305, "learning_rate": 2.86139304368471e-05, "loss": 0.0201, "step": 58150 }, { "epoch": 0.42991041069158215, "grad_norm": 0.12054110318422318, "learning_rate": 2.86102207977208e-05, "loss": 0.0207, "step": 58160 }, { "epoch": 0.429984329262884, "grad_norm": 0.08145933598279953, "learning_rate": 2.8606511158594496e-05, "loss": 0.02, "step": 58170 }, { "epoch": 0.43005824783418584, "grad_norm": 0.06964617967605591, "learning_rate": 2.8602801519468186e-05, "loss": 0.0201, "step": 58180 }, { "epoch": 0.43013216640548774, "grad_norm": 0.09489192813634872, "learning_rate": 2.859909188034188e-05, "loss": 0.0183, "step": 58190 }, { "epoch": 0.4302060849767896, "grad_norm": 0.06682495027780533, "learning_rate": 2.8595382241215574e-05, "loss": 0.0169, "step": 58200 }, { "epoch": 0.4302800035480914, "grad_norm": 0.08641725778579712, "learning_rate": 2.859167260208927e-05, "loss": 0.0193, "step": 58210 }, { "epoch": 0.4303539221193933, "grad_norm": 0.0909150168299675, "learning_rate": 2.8587962962962966e-05, "loss": 0.0188, "step": 58220 }, { "epoch": 0.4304278406906951, "grad_norm": 0.07001394778490067, "learning_rate": 2.858425332383666e-05, "loss": 0.0182, "step": 58230 }, { "epoch": 0.43050175926199696, "grad_norm": 0.0732155293226242, "learning_rate": 2.8580543684710355e-05, "loss": 0.0217, "step": 58240 }, { "epoch": 0.43057567783329886, "grad_norm": 0.07230205088853836, "learning_rate": 2.8576834045584044e-05, "loss": 0.0177, "step": 58250 }, { "epoch": 0.4306495964046007, "grad_norm": 0.08895523101091385, "learning_rate": 2.857312440645774e-05, "loss": 0.0194, "step": 58260 }, { "epoch": 0.43072351497590255, "grad_norm": 0.07853704690933228, "learning_rate": 2.8569414767331436e-05, "loss": 0.0182, "step": 58270 }, { "epoch": 0.4307974335472044, "grad_norm": 0.10844014585018158, "learning_rate": 2.856570512820513e-05, "loss": 0.02, "step": 58280 }, { "epoch": 0.43087135211850625, "grad_norm": 0.08113034069538116, "learning_rate": 2.8561995489078824e-05, "loss": 0.0177, "step": 58290 }, { "epoch": 0.4309452706898081, "grad_norm": 0.08964955061674118, "learning_rate": 2.8558285849952514e-05, "loss": 0.0177, "step": 58300 }, { "epoch": 0.43101918926110994, "grad_norm": 0.07938321679830551, "learning_rate": 2.8554576210826213e-05, "loss": 0.0179, "step": 58310 }, { "epoch": 0.43109310783241184, "grad_norm": 0.07115291059017181, "learning_rate": 2.855086657169991e-05, "loss": 0.0177, "step": 58320 }, { "epoch": 0.4311670264037137, "grad_norm": 0.06883087009191513, "learning_rate": 2.8547156932573598e-05, "loss": 0.0194, "step": 58330 }, { "epoch": 0.4312409449750155, "grad_norm": 0.09511996060609818, "learning_rate": 2.8543447293447294e-05, "loss": 0.02, "step": 58340 }, { "epoch": 0.4313148635463174, "grad_norm": 0.13075599074363708, "learning_rate": 2.853973765432099e-05, "loss": 0.0222, "step": 58350 }, { "epoch": 0.4313887821176192, "grad_norm": 0.06507648527622223, "learning_rate": 2.8536028015194683e-05, "loss": 0.0159, "step": 58360 }, { "epoch": 0.43146270068892106, "grad_norm": 0.09812938421964645, "learning_rate": 2.853231837606838e-05, "loss": 0.0166, "step": 58370 }, { "epoch": 0.43153661926022296, "grad_norm": 0.08186332136392593, "learning_rate": 2.852860873694207e-05, "loss": 0.0215, "step": 58380 }, { "epoch": 0.4316105378315248, "grad_norm": 0.07731013000011444, "learning_rate": 2.8524899097815767e-05, "loss": 0.0202, "step": 58390 }, { "epoch": 0.43168445640282666, "grad_norm": 0.094798244535923, "learning_rate": 2.8521189458689463e-05, "loss": 0.0171, "step": 58400 }, { "epoch": 0.4317583749741285, "grad_norm": 0.110369011759758, "learning_rate": 2.8517479819563152e-05, "loss": 0.0198, "step": 58410 }, { "epoch": 0.43183229354543035, "grad_norm": 0.10678127408027649, "learning_rate": 2.8513770180436848e-05, "loss": 0.0169, "step": 58420 }, { "epoch": 0.4319062121167322, "grad_norm": 0.09165314584970474, "learning_rate": 2.851006054131054e-05, "loss": 0.02, "step": 58430 }, { "epoch": 0.43198013068803404, "grad_norm": 0.06183528155088425, "learning_rate": 2.8506350902184237e-05, "loss": 0.0204, "step": 58440 }, { "epoch": 0.43205404925933594, "grad_norm": 0.0831647589802742, "learning_rate": 2.8502641263057933e-05, "loss": 0.0208, "step": 58450 }, { "epoch": 0.4321279678306378, "grad_norm": 0.09043709933757782, "learning_rate": 2.8498931623931625e-05, "loss": 0.0183, "step": 58460 }, { "epoch": 0.43220188640193963, "grad_norm": 0.07497021555900574, "learning_rate": 2.849522198480532e-05, "loss": 0.0181, "step": 58470 }, { "epoch": 0.4322758049732415, "grad_norm": 0.09095343947410583, "learning_rate": 2.849151234567901e-05, "loss": 0.0193, "step": 58480 }, { "epoch": 0.4323497235445433, "grad_norm": 0.09853128343820572, "learning_rate": 2.8487802706552706e-05, "loss": 0.0186, "step": 58490 }, { "epoch": 0.43242364211584516, "grad_norm": 0.08832711726427078, "learning_rate": 2.8484093067426402e-05, "loss": 0.0159, "step": 58500 }, { "epoch": 0.43249756068714706, "grad_norm": 0.07951664924621582, "learning_rate": 2.8480383428300095e-05, "loss": 0.02, "step": 58510 }, { "epoch": 0.4325714792584489, "grad_norm": 0.08814312517642975, "learning_rate": 2.847667378917379e-05, "loss": 0.0187, "step": 58520 }, { "epoch": 0.43264539782975076, "grad_norm": 0.09159665554761887, "learning_rate": 2.8472964150047487e-05, "loss": 0.0197, "step": 58530 }, { "epoch": 0.4327193164010526, "grad_norm": 0.0654560849070549, "learning_rate": 2.846925451092118e-05, "loss": 0.0194, "step": 58540 }, { "epoch": 0.43279323497235445, "grad_norm": 0.06398443132638931, "learning_rate": 2.8465544871794875e-05, "loss": 0.0148, "step": 58550 }, { "epoch": 0.4328671535436563, "grad_norm": 0.10145048797130585, "learning_rate": 2.8461835232668565e-05, "loss": 0.0164, "step": 58560 }, { "epoch": 0.43294107211495814, "grad_norm": 0.07037309557199478, "learning_rate": 2.845812559354226e-05, "loss": 0.0171, "step": 58570 }, { "epoch": 0.43301499068626004, "grad_norm": 0.0784565657377243, "learning_rate": 2.845441595441596e-05, "loss": 0.0188, "step": 58580 }, { "epoch": 0.4330889092575619, "grad_norm": 0.10798093676567078, "learning_rate": 2.845070631528965e-05, "loss": 0.0171, "step": 58590 }, { "epoch": 0.43316282782886373, "grad_norm": 0.14126542210578918, "learning_rate": 2.8446996676163345e-05, "loss": 0.0192, "step": 58600 }, { "epoch": 0.4332367464001656, "grad_norm": 0.06410064548254013, "learning_rate": 2.8443287037037038e-05, "loss": 0.0185, "step": 58610 }, { "epoch": 0.4333106649714674, "grad_norm": 0.10798434168100357, "learning_rate": 2.8439577397910734e-05, "loss": 0.0187, "step": 58620 }, { "epoch": 0.43338458354276926, "grad_norm": 0.09027906507253647, "learning_rate": 2.843586775878443e-05, "loss": 0.0188, "step": 58630 }, { "epoch": 0.43345850211407116, "grad_norm": 0.06169156730175018, "learning_rate": 2.843215811965812e-05, "loss": 0.0181, "step": 58640 }, { "epoch": 0.433532420685373, "grad_norm": 0.0730864405632019, "learning_rate": 2.8428448480531815e-05, "loss": 0.017, "step": 58650 }, { "epoch": 0.43360633925667486, "grad_norm": 0.08099040389060974, "learning_rate": 2.8424738841405507e-05, "loss": 0.0187, "step": 58660 }, { "epoch": 0.4336802578279767, "grad_norm": 0.06756632030010223, "learning_rate": 2.8421029202279203e-05, "loss": 0.0157, "step": 58670 }, { "epoch": 0.43375417639927855, "grad_norm": 0.06606796383857727, "learning_rate": 2.84173195631529e-05, "loss": 0.016, "step": 58680 }, { "epoch": 0.4338280949705804, "grad_norm": 0.08598122000694275, "learning_rate": 2.8413609924026592e-05, "loss": 0.017, "step": 58690 }, { "epoch": 0.43390201354188224, "grad_norm": 0.07801267504692078, "learning_rate": 2.8409900284900288e-05, "loss": 0.0192, "step": 58700 }, { "epoch": 0.43397593211318414, "grad_norm": 0.11814634501934052, "learning_rate": 2.8406190645773977e-05, "loss": 0.0186, "step": 58710 }, { "epoch": 0.434049850684486, "grad_norm": 0.08184555917978287, "learning_rate": 2.8402481006647673e-05, "loss": 0.0178, "step": 58720 }, { "epoch": 0.43412376925578783, "grad_norm": 0.10462668538093567, "learning_rate": 2.839877136752137e-05, "loss": 0.016, "step": 58730 }, { "epoch": 0.4341976878270897, "grad_norm": 0.09804099798202515, "learning_rate": 2.839506172839506e-05, "loss": 0.0181, "step": 58740 }, { "epoch": 0.4342716063983915, "grad_norm": 0.06667347997426987, "learning_rate": 2.8391352089268758e-05, "loss": 0.0162, "step": 58750 }, { "epoch": 0.43434552496969336, "grad_norm": 0.07495291531085968, "learning_rate": 2.8387642450142454e-05, "loss": 0.0195, "step": 58760 }, { "epoch": 0.43441944354099526, "grad_norm": 0.07768017053604126, "learning_rate": 2.8383932811016146e-05, "loss": 0.0178, "step": 58770 }, { "epoch": 0.4344933621122971, "grad_norm": 0.09335466474294662, "learning_rate": 2.8380223171889842e-05, "loss": 0.0209, "step": 58780 }, { "epoch": 0.43456728068359896, "grad_norm": 0.0606859028339386, "learning_rate": 2.837651353276353e-05, "loss": 0.0195, "step": 58790 }, { "epoch": 0.4346411992549008, "grad_norm": 0.0708116963505745, "learning_rate": 2.8372803893637227e-05, "loss": 0.017, "step": 58800 }, { "epoch": 0.43471511782620265, "grad_norm": 0.07581387460231781, "learning_rate": 2.8369094254510927e-05, "loss": 0.0169, "step": 58810 }, { "epoch": 0.4347890363975045, "grad_norm": 0.10559257864952087, "learning_rate": 2.8365384615384616e-05, "loss": 0.016, "step": 58820 }, { "epoch": 0.43486295496880634, "grad_norm": 0.06055699661374092, "learning_rate": 2.8361674976258312e-05, "loss": 0.0178, "step": 58830 }, { "epoch": 0.43493687354010824, "grad_norm": 0.07731713354587555, "learning_rate": 2.8357965337132004e-05, "loss": 0.0185, "step": 58840 }, { "epoch": 0.4350107921114101, "grad_norm": 0.07417374104261398, "learning_rate": 2.83542556980057e-05, "loss": 0.0179, "step": 58850 }, { "epoch": 0.43508471068271193, "grad_norm": 0.08462899923324585, "learning_rate": 2.8350546058879396e-05, "loss": 0.0167, "step": 58860 }, { "epoch": 0.4351586292540138, "grad_norm": 0.16679640114307404, "learning_rate": 2.8346836419753085e-05, "loss": 0.0206, "step": 58870 }, { "epoch": 0.4352325478253156, "grad_norm": 0.08410433679819107, "learning_rate": 2.834312678062678e-05, "loss": 0.0173, "step": 58880 }, { "epoch": 0.43530646639661746, "grad_norm": 0.08903812617063522, "learning_rate": 2.8339417141500474e-05, "loss": 0.0206, "step": 58890 }, { "epoch": 0.43538038496791936, "grad_norm": 0.08129949867725372, "learning_rate": 2.833570750237417e-05, "loss": 0.0188, "step": 58900 }, { "epoch": 0.4354543035392212, "grad_norm": 0.0971398800611496, "learning_rate": 2.8331997863247866e-05, "loss": 0.0175, "step": 58910 }, { "epoch": 0.43552822211052306, "grad_norm": 0.05689968168735504, "learning_rate": 2.832828822412156e-05, "loss": 0.0185, "step": 58920 }, { "epoch": 0.4356021406818249, "grad_norm": 0.06802728027105331, "learning_rate": 2.8324578584995254e-05, "loss": 0.0172, "step": 58930 }, { "epoch": 0.43567605925312675, "grad_norm": 0.07055903226137161, "learning_rate": 2.8320868945868944e-05, "loss": 0.0158, "step": 58940 }, { "epoch": 0.4357499778244286, "grad_norm": 0.08107168227434158, "learning_rate": 2.831715930674264e-05, "loss": 0.0201, "step": 58950 }, { "epoch": 0.43582389639573044, "grad_norm": 0.08352484554052353, "learning_rate": 2.831344966761634e-05, "loss": 0.02, "step": 58960 }, { "epoch": 0.43589781496703234, "grad_norm": 0.08295414596796036, "learning_rate": 2.8309740028490028e-05, "loss": 0.0176, "step": 58970 }, { "epoch": 0.4359717335383342, "grad_norm": 0.0799393355846405, "learning_rate": 2.8306030389363724e-05, "loss": 0.0171, "step": 58980 }, { "epoch": 0.43604565210963603, "grad_norm": 0.060312747955322266, "learning_rate": 2.830232075023742e-05, "loss": 0.0172, "step": 58990 }, { "epoch": 0.4361195706809379, "grad_norm": 0.11155330389738083, "learning_rate": 2.8298611111111113e-05, "loss": 0.0206, "step": 59000 }, { "epoch": 0.4361934892522397, "grad_norm": 0.08497704565525055, "learning_rate": 2.829490147198481e-05, "loss": 0.0157, "step": 59010 }, { "epoch": 0.43626740782354156, "grad_norm": 0.08191950619220734, "learning_rate": 2.8291191832858498e-05, "loss": 0.0186, "step": 59020 }, { "epoch": 0.43634132639484347, "grad_norm": 0.09134622663259506, "learning_rate": 2.8287482193732194e-05, "loss": 0.0178, "step": 59030 }, { "epoch": 0.4364152449661453, "grad_norm": 0.061829108744859695, "learning_rate": 2.8283772554605893e-05, "loss": 0.0176, "step": 59040 }, { "epoch": 0.43648916353744716, "grad_norm": 0.07822591811418533, "learning_rate": 2.8280062915479582e-05, "loss": 0.0158, "step": 59050 }, { "epoch": 0.436563082108749, "grad_norm": 0.07874293625354767, "learning_rate": 2.827635327635328e-05, "loss": 0.018, "step": 59060 }, { "epoch": 0.43663700068005085, "grad_norm": 0.09312419593334198, "learning_rate": 2.827264363722697e-05, "loss": 0.0191, "step": 59070 }, { "epoch": 0.4367109192513527, "grad_norm": 0.08162961900234222, "learning_rate": 2.8268933998100667e-05, "loss": 0.0188, "step": 59080 }, { "epoch": 0.43678483782265454, "grad_norm": 0.06353994458913803, "learning_rate": 2.8265224358974363e-05, "loss": 0.0174, "step": 59090 }, { "epoch": 0.43685875639395644, "grad_norm": 0.08950216323137283, "learning_rate": 2.8261514719848052e-05, "loss": 0.0191, "step": 59100 }, { "epoch": 0.4369326749652583, "grad_norm": 0.06819460541009903, "learning_rate": 2.825780508072175e-05, "loss": 0.0155, "step": 59110 }, { "epoch": 0.43700659353656013, "grad_norm": 0.09616361558437347, "learning_rate": 2.825409544159544e-05, "loss": 0.0176, "step": 59120 }, { "epoch": 0.437080512107862, "grad_norm": 0.10181725025177002, "learning_rate": 2.8250385802469137e-05, "loss": 0.0205, "step": 59130 }, { "epoch": 0.4371544306791638, "grad_norm": 0.12475231289863586, "learning_rate": 2.8246676163342832e-05, "loss": 0.0194, "step": 59140 }, { "epoch": 0.43722834925046566, "grad_norm": 0.06839167326688766, "learning_rate": 2.8242966524216525e-05, "loss": 0.018, "step": 59150 }, { "epoch": 0.43730226782176757, "grad_norm": 0.07144494354724884, "learning_rate": 2.823925688509022e-05, "loss": 0.0219, "step": 59160 }, { "epoch": 0.4373761863930694, "grad_norm": 0.08841611444950104, "learning_rate": 2.823554724596391e-05, "loss": 0.0189, "step": 59170 }, { "epoch": 0.43745010496437126, "grad_norm": 0.07900545001029968, "learning_rate": 2.8231837606837606e-05, "loss": 0.019, "step": 59180 }, { "epoch": 0.4375240235356731, "grad_norm": 0.0752866268157959, "learning_rate": 2.8228127967711306e-05, "loss": 0.0168, "step": 59190 }, { "epoch": 0.43759794210697495, "grad_norm": 0.08983873575925827, "learning_rate": 2.8224418328584995e-05, "loss": 0.0163, "step": 59200 }, { "epoch": 0.4376718606782768, "grad_norm": 0.08169794827699661, "learning_rate": 2.822070868945869e-05, "loss": 0.0175, "step": 59210 }, { "epoch": 0.43774577924957864, "grad_norm": 0.08061221241950989, "learning_rate": 2.8216999050332387e-05, "loss": 0.0197, "step": 59220 }, { "epoch": 0.43781969782088054, "grad_norm": 0.0905076116323471, "learning_rate": 2.821328941120608e-05, "loss": 0.0185, "step": 59230 }, { "epoch": 0.4378936163921824, "grad_norm": 0.07927828282117844, "learning_rate": 2.8209579772079775e-05, "loss": 0.0176, "step": 59240 }, { "epoch": 0.43796753496348423, "grad_norm": 0.07110433280467987, "learning_rate": 2.8205870132953464e-05, "loss": 0.0181, "step": 59250 }, { "epoch": 0.4380414535347861, "grad_norm": 0.08170510083436966, "learning_rate": 2.8202160493827164e-05, "loss": 0.0169, "step": 59260 }, { "epoch": 0.4381153721060879, "grad_norm": 0.0884263664484024, "learning_rate": 2.819845085470086e-05, "loss": 0.0201, "step": 59270 }, { "epoch": 0.43818929067738976, "grad_norm": 0.0725260078907013, "learning_rate": 2.819474121557455e-05, "loss": 0.0198, "step": 59280 }, { "epoch": 0.43826320924869167, "grad_norm": 0.12197090685367584, "learning_rate": 2.8191031576448245e-05, "loss": 0.0175, "step": 59290 }, { "epoch": 0.4383371278199935, "grad_norm": 0.09408222138881683, "learning_rate": 2.8187321937321937e-05, "loss": 0.0192, "step": 59300 }, { "epoch": 0.43841104639129536, "grad_norm": 0.07606709003448486, "learning_rate": 2.8183612298195633e-05, "loss": 0.0166, "step": 59310 }, { "epoch": 0.4384849649625972, "grad_norm": 0.12872089445590973, "learning_rate": 2.817990265906933e-05, "loss": 0.0203, "step": 59320 }, { "epoch": 0.43855888353389905, "grad_norm": 0.07069522142410278, "learning_rate": 2.817619301994302e-05, "loss": 0.0201, "step": 59330 }, { "epoch": 0.4386328021052009, "grad_norm": 0.10497741401195526, "learning_rate": 2.8172483380816718e-05, "loss": 0.0177, "step": 59340 }, { "epoch": 0.43870672067650274, "grad_norm": 0.0735153928399086, "learning_rate": 2.8168773741690407e-05, "loss": 0.0193, "step": 59350 }, { "epoch": 0.43878063924780464, "grad_norm": 0.06192095950245857, "learning_rate": 2.8165064102564103e-05, "loss": 0.0209, "step": 59360 }, { "epoch": 0.4388545578191065, "grad_norm": 0.08346061408519745, "learning_rate": 2.81613544634378e-05, "loss": 0.0179, "step": 59370 }, { "epoch": 0.43892847639040833, "grad_norm": 0.08042008429765701, "learning_rate": 2.815764482431149e-05, "loss": 0.0174, "step": 59380 }, { "epoch": 0.4390023949617102, "grad_norm": 0.082971952855587, "learning_rate": 2.8153935185185188e-05, "loss": 0.0181, "step": 59390 }, { "epoch": 0.439076313533012, "grad_norm": 0.08295343071222305, "learning_rate": 2.8150225546058877e-05, "loss": 0.019, "step": 59400 }, { "epoch": 0.43915023210431386, "grad_norm": 0.07845450192689896, "learning_rate": 2.8146515906932576e-05, "loss": 0.0181, "step": 59410 }, { "epoch": 0.43922415067561577, "grad_norm": 0.059175584465265274, "learning_rate": 2.8142806267806272e-05, "loss": 0.0168, "step": 59420 }, { "epoch": 0.4392980692469176, "grad_norm": 0.07663719356060028, "learning_rate": 2.813909662867996e-05, "loss": 0.0161, "step": 59430 }, { "epoch": 0.43937198781821946, "grad_norm": 0.06469923257827759, "learning_rate": 2.8135386989553657e-05, "loss": 0.0193, "step": 59440 }, { "epoch": 0.4394459063895213, "grad_norm": 0.09643541276454926, "learning_rate": 2.8131677350427353e-05, "loss": 0.0197, "step": 59450 }, { "epoch": 0.43951982496082315, "grad_norm": 0.08100558072328568, "learning_rate": 2.8127967711301046e-05, "loss": 0.0173, "step": 59460 }, { "epoch": 0.439593743532125, "grad_norm": 0.08516040444374084, "learning_rate": 2.8124258072174742e-05, "loss": 0.0199, "step": 59470 }, { "epoch": 0.43966766210342684, "grad_norm": 0.07431662082672119, "learning_rate": 2.812054843304843e-05, "loss": 0.0192, "step": 59480 }, { "epoch": 0.43974158067472874, "grad_norm": 0.06716176122426987, "learning_rate": 2.811683879392213e-05, "loss": 0.0189, "step": 59490 }, { "epoch": 0.4398154992460306, "grad_norm": 0.11926719546318054, "learning_rate": 2.8113129154795826e-05, "loss": 0.0174, "step": 59500 }, { "epoch": 0.43988941781733243, "grad_norm": 0.07346758246421814, "learning_rate": 2.8109419515669516e-05, "loss": 0.0171, "step": 59510 }, { "epoch": 0.4399633363886343, "grad_norm": 0.09154437482357025, "learning_rate": 2.810570987654321e-05, "loss": 0.0183, "step": 59520 }, { "epoch": 0.4400372549599361, "grad_norm": 0.08122612535953522, "learning_rate": 2.8102000237416904e-05, "loss": 0.018, "step": 59530 }, { "epoch": 0.44011117353123796, "grad_norm": 0.10390602797269821, "learning_rate": 2.80982905982906e-05, "loss": 0.0192, "step": 59540 }, { "epoch": 0.44018509210253987, "grad_norm": 0.08174613118171692, "learning_rate": 2.8094580959164296e-05, "loss": 0.0185, "step": 59550 }, { "epoch": 0.4402590106738417, "grad_norm": 0.09750137478113174, "learning_rate": 2.809087132003799e-05, "loss": 0.0193, "step": 59560 }, { "epoch": 0.44033292924514356, "grad_norm": 0.08275096863508224, "learning_rate": 2.8087161680911685e-05, "loss": 0.0187, "step": 59570 }, { "epoch": 0.4404068478164454, "grad_norm": 0.07879745960235596, "learning_rate": 2.8083452041785374e-05, "loss": 0.0193, "step": 59580 }, { "epoch": 0.44048076638774725, "grad_norm": 0.08212180435657501, "learning_rate": 2.807974240265907e-05, "loss": 0.0212, "step": 59590 }, { "epoch": 0.4405546849590491, "grad_norm": 0.09315769374370575, "learning_rate": 2.8076032763532766e-05, "loss": 0.0193, "step": 59600 }, { "epoch": 0.440628603530351, "grad_norm": 0.0979996994137764, "learning_rate": 2.8072323124406458e-05, "loss": 0.0167, "step": 59610 }, { "epoch": 0.44070252210165284, "grad_norm": 0.08392339199781418, "learning_rate": 2.8068613485280154e-05, "loss": 0.0184, "step": 59620 }, { "epoch": 0.4407764406729547, "grad_norm": 0.07921017706394196, "learning_rate": 2.8064903846153843e-05, "loss": 0.0151, "step": 59630 }, { "epoch": 0.44085035924425653, "grad_norm": 0.08482234179973602, "learning_rate": 2.8061194207027543e-05, "loss": 0.0195, "step": 59640 }, { "epoch": 0.4409242778155584, "grad_norm": 0.057127732783555984, "learning_rate": 2.805748456790124e-05, "loss": 0.0194, "step": 59650 }, { "epoch": 0.4409981963868602, "grad_norm": 0.08310925215482712, "learning_rate": 2.8053774928774928e-05, "loss": 0.0181, "step": 59660 }, { "epoch": 0.44107211495816206, "grad_norm": 0.16174446046352386, "learning_rate": 2.8050065289648624e-05, "loss": 0.0172, "step": 59670 }, { "epoch": 0.44114603352946397, "grad_norm": 0.09357830137014389, "learning_rate": 2.804635565052232e-05, "loss": 0.0161, "step": 59680 }, { "epoch": 0.4412199521007658, "grad_norm": 0.07025822252035141, "learning_rate": 2.8042646011396012e-05, "loss": 0.018, "step": 59690 }, { "epoch": 0.44129387067206766, "grad_norm": 0.06780959665775299, "learning_rate": 2.803893637226971e-05, "loss": 0.0158, "step": 59700 }, { "epoch": 0.4413677892433695, "grad_norm": 0.0780034065246582, "learning_rate": 2.80352267331434e-05, "loss": 0.0178, "step": 59710 }, { "epoch": 0.44144170781467135, "grad_norm": 0.1173756867647171, "learning_rate": 2.8031517094017097e-05, "loss": 0.0183, "step": 59720 }, { "epoch": 0.4415156263859732, "grad_norm": 0.07703826576471329, "learning_rate": 2.8027807454890793e-05, "loss": 0.0184, "step": 59730 }, { "epoch": 0.4415895449572751, "grad_norm": 0.07214858382940292, "learning_rate": 2.8024097815764482e-05, "loss": 0.0185, "step": 59740 }, { "epoch": 0.44166346352857694, "grad_norm": 0.06699478626251221, "learning_rate": 2.8020388176638178e-05, "loss": 0.018, "step": 59750 }, { "epoch": 0.4417373820998788, "grad_norm": 0.08458127826452255, "learning_rate": 2.801667853751187e-05, "loss": 0.017, "step": 59760 }, { "epoch": 0.44181130067118063, "grad_norm": 0.05389302968978882, "learning_rate": 2.8012968898385567e-05, "loss": 0.0171, "step": 59770 }, { "epoch": 0.4418852192424825, "grad_norm": 0.07580448687076569, "learning_rate": 2.8009259259259263e-05, "loss": 0.0162, "step": 59780 }, { "epoch": 0.4419591378137843, "grad_norm": 0.06884662061929703, "learning_rate": 2.8005549620132955e-05, "loss": 0.0185, "step": 59790 }, { "epoch": 0.44203305638508616, "grad_norm": 0.07314516603946686, "learning_rate": 2.800183998100665e-05, "loss": 0.0198, "step": 59800 }, { "epoch": 0.44210697495638807, "grad_norm": 0.062169380486011505, "learning_rate": 2.799813034188034e-05, "loss": 0.0175, "step": 59810 }, { "epoch": 0.4421808935276899, "grad_norm": 0.09424956887960434, "learning_rate": 2.7994420702754036e-05, "loss": 0.0192, "step": 59820 }, { "epoch": 0.44225481209899176, "grad_norm": 0.08688298612833023, "learning_rate": 2.7990711063627732e-05, "loss": 0.0183, "step": 59830 }, { "epoch": 0.4423287306702936, "grad_norm": 0.07653351128101349, "learning_rate": 2.7987001424501425e-05, "loss": 0.0177, "step": 59840 }, { "epoch": 0.44240264924159545, "grad_norm": 0.08118182420730591, "learning_rate": 2.798329178537512e-05, "loss": 0.0196, "step": 59850 }, { "epoch": 0.4424765678128973, "grad_norm": 0.1382313370704651, "learning_rate": 2.7979582146248813e-05, "loss": 0.0182, "step": 59860 }, { "epoch": 0.4425504863841992, "grad_norm": 0.09287630766630173, "learning_rate": 2.797587250712251e-05, "loss": 0.0188, "step": 59870 }, { "epoch": 0.44262440495550104, "grad_norm": 0.1005270704627037, "learning_rate": 2.7972162867996205e-05, "loss": 0.0186, "step": 59880 }, { "epoch": 0.4426983235268029, "grad_norm": 0.0674884170293808, "learning_rate": 2.7968453228869894e-05, "loss": 0.0191, "step": 59890 }, { "epoch": 0.44277224209810473, "grad_norm": 0.12094124406576157, "learning_rate": 2.796474358974359e-05, "loss": 0.0178, "step": 59900 }, { "epoch": 0.4428461606694066, "grad_norm": 0.06646429002285004, "learning_rate": 2.7961033950617286e-05, "loss": 0.0171, "step": 59910 }, { "epoch": 0.4429200792407084, "grad_norm": 0.06779291480779648, "learning_rate": 2.795732431149098e-05, "loss": 0.0179, "step": 59920 }, { "epoch": 0.44299399781201027, "grad_norm": 0.09634333848953247, "learning_rate": 2.7953614672364675e-05, "loss": 0.0187, "step": 59930 }, { "epoch": 0.44306791638331217, "grad_norm": 0.06265617907047272, "learning_rate": 2.7949905033238368e-05, "loss": 0.0177, "step": 59940 }, { "epoch": 0.443141834954614, "grad_norm": 0.06748286634683609, "learning_rate": 2.7946195394112064e-05, "loss": 0.0179, "step": 59950 }, { "epoch": 0.44321575352591586, "grad_norm": 0.10775701701641083, "learning_rate": 2.794248575498576e-05, "loss": 0.0165, "step": 59960 }, { "epoch": 0.4432896720972177, "grad_norm": 0.06812064349651337, "learning_rate": 2.793877611585945e-05, "loss": 0.0193, "step": 59970 }, { "epoch": 0.44336359066851955, "grad_norm": 0.10246866941452026, "learning_rate": 2.7935066476733145e-05, "loss": 0.0176, "step": 59980 }, { "epoch": 0.4434375092398214, "grad_norm": 0.0841251090168953, "learning_rate": 2.7931356837606837e-05, "loss": 0.019, "step": 59990 }, { "epoch": 0.4435114278111233, "grad_norm": 0.07392372936010361, "learning_rate": 2.7927647198480533e-05, "loss": 0.0177, "step": 60000 }, { "epoch": 0.4435114278111233, "eval_f1": 0.6183157522063305, "eval_loss": 0.01798221655189991, "eval_precision": 0.49327030049358794, "eval_recall": 0.8282895570479518, "eval_runtime": 2668.5091, "eval_samples_per_second": 202.785, "eval_steps_per_second": 3.169, "step": 60000 }, { "epoch": 0.44358534638242514, "grad_norm": 0.08475377410650253, "learning_rate": 2.792393755935423e-05, "loss": 0.0168, "step": 60010 }, { "epoch": 0.443659264953727, "grad_norm": 0.06998006999492645, "learning_rate": 2.7920227920227922e-05, "loss": 0.0165, "step": 60020 }, { "epoch": 0.44373318352502883, "grad_norm": 0.0669107660651207, "learning_rate": 2.7916518281101618e-05, "loss": 0.0149, "step": 60030 }, { "epoch": 0.4438071020963307, "grad_norm": 0.09289882332086563, "learning_rate": 2.7912808641975307e-05, "loss": 0.0174, "step": 60040 }, { "epoch": 0.4438810206676325, "grad_norm": 0.07927216589450836, "learning_rate": 2.7909099002849003e-05, "loss": 0.0181, "step": 60050 }, { "epoch": 0.44395493923893437, "grad_norm": 0.07733915001153946, "learning_rate": 2.79053893637227e-05, "loss": 0.0214, "step": 60060 }, { "epoch": 0.44402885781023627, "grad_norm": 0.06538470089435577, "learning_rate": 2.790167972459639e-05, "loss": 0.0154, "step": 60070 }, { "epoch": 0.4441027763815381, "grad_norm": 0.08921214938163757, "learning_rate": 2.7897970085470087e-05, "loss": 0.0208, "step": 60080 }, { "epoch": 0.44417669495283996, "grad_norm": 0.09106167405843735, "learning_rate": 2.789426044634378e-05, "loss": 0.0161, "step": 60090 }, { "epoch": 0.4442506135241418, "grad_norm": 0.05333057418465614, "learning_rate": 2.7890550807217476e-05, "loss": 0.0163, "step": 60100 }, { "epoch": 0.44432453209544365, "grad_norm": 0.08506513386964798, "learning_rate": 2.7886841168091172e-05, "loss": 0.0187, "step": 60110 }, { "epoch": 0.4443984506667455, "grad_norm": 0.06936834007501602, "learning_rate": 2.788313152896486e-05, "loss": 0.0166, "step": 60120 }, { "epoch": 0.4444723692380474, "grad_norm": 0.07555869221687317, "learning_rate": 2.7879421889838557e-05, "loss": 0.0149, "step": 60130 }, { "epoch": 0.44454628780934924, "grad_norm": 0.08665190637111664, "learning_rate": 2.7875712250712256e-05, "loss": 0.0189, "step": 60140 }, { "epoch": 0.4446202063806511, "grad_norm": 0.08473851531744003, "learning_rate": 2.7872002611585946e-05, "loss": 0.0182, "step": 60150 }, { "epoch": 0.44469412495195293, "grad_norm": 0.09297851473093033, "learning_rate": 2.786829297245964e-05, "loss": 0.0209, "step": 60160 }, { "epoch": 0.4447680435232548, "grad_norm": 0.08520183712244034, "learning_rate": 2.7864583333333334e-05, "loss": 0.0198, "step": 60170 }, { "epoch": 0.4448419620945566, "grad_norm": 0.0894201397895813, "learning_rate": 2.786087369420703e-05, "loss": 0.0202, "step": 60180 }, { "epoch": 0.44491588066585847, "grad_norm": 0.063407301902771, "learning_rate": 2.7857164055080726e-05, "loss": 0.0199, "step": 60190 }, { "epoch": 0.44498979923716037, "grad_norm": 0.08296534419059753, "learning_rate": 2.7853454415954415e-05, "loss": 0.0185, "step": 60200 }, { "epoch": 0.4450637178084622, "grad_norm": 0.09972469508647919, "learning_rate": 2.784974477682811e-05, "loss": 0.0194, "step": 60210 }, { "epoch": 0.44513763637976406, "grad_norm": 0.07479657977819443, "learning_rate": 2.7846035137701804e-05, "loss": 0.0213, "step": 60220 }, { "epoch": 0.4452115549510659, "grad_norm": 0.09490302205085754, "learning_rate": 2.78423254985755e-05, "loss": 0.0187, "step": 60230 }, { "epoch": 0.44528547352236775, "grad_norm": 0.08586356043815613, "learning_rate": 2.7838615859449196e-05, "loss": 0.0185, "step": 60240 }, { "epoch": 0.4453593920936696, "grad_norm": 0.0650666207075119, "learning_rate": 2.783490622032289e-05, "loss": 0.0192, "step": 60250 }, { "epoch": 0.4454333106649715, "grad_norm": 0.05304092913866043, "learning_rate": 2.7831196581196584e-05, "loss": 0.0174, "step": 60260 }, { "epoch": 0.44550722923627334, "grad_norm": 0.07560031116008759, "learning_rate": 2.7827486942070273e-05, "loss": 0.0176, "step": 60270 }, { "epoch": 0.4455811478075752, "grad_norm": 0.05344974994659424, "learning_rate": 2.782377730294397e-05, "loss": 0.0177, "step": 60280 }, { "epoch": 0.44565506637887703, "grad_norm": 0.07482201606035233, "learning_rate": 2.782006766381767e-05, "loss": 0.0181, "step": 60290 }, { "epoch": 0.4457289849501789, "grad_norm": 0.06870289891958237, "learning_rate": 2.7816358024691358e-05, "loss": 0.0186, "step": 60300 }, { "epoch": 0.4458029035214807, "grad_norm": 0.08564918488264084, "learning_rate": 2.7812648385565054e-05, "loss": 0.0181, "step": 60310 }, { "epoch": 0.44587682209278257, "grad_norm": 0.058514028787612915, "learning_rate": 2.7808938746438747e-05, "loss": 0.0166, "step": 60320 }, { "epoch": 0.44595074066408447, "grad_norm": 0.07964633405208588, "learning_rate": 2.7805229107312443e-05, "loss": 0.0192, "step": 60330 }, { "epoch": 0.4460246592353863, "grad_norm": 0.07813328504562378, "learning_rate": 2.780151946818614e-05, "loss": 0.0186, "step": 60340 }, { "epoch": 0.44609857780668816, "grad_norm": 0.10391835123300552, "learning_rate": 2.7797809829059828e-05, "loss": 0.0191, "step": 60350 }, { "epoch": 0.44617249637799, "grad_norm": 0.0750124379992485, "learning_rate": 2.7794100189933524e-05, "loss": 0.0192, "step": 60360 }, { "epoch": 0.44624641494929185, "grad_norm": 0.07106636464595795, "learning_rate": 2.7790390550807223e-05, "loss": 0.0184, "step": 60370 }, { "epoch": 0.4463203335205937, "grad_norm": 0.10053660720586777, "learning_rate": 2.7786680911680912e-05, "loss": 0.0193, "step": 60380 }, { "epoch": 0.4463942520918956, "grad_norm": 0.07444123178720474, "learning_rate": 2.7782971272554608e-05, "loss": 0.0186, "step": 60390 }, { "epoch": 0.44646817066319744, "grad_norm": 0.07024513185024261, "learning_rate": 2.77792616334283e-05, "loss": 0.0198, "step": 60400 }, { "epoch": 0.4465420892344993, "grad_norm": 0.07613866776227951, "learning_rate": 2.7775551994301997e-05, "loss": 0.0172, "step": 60410 }, { "epoch": 0.44661600780580113, "grad_norm": 0.08213798701763153, "learning_rate": 2.7771842355175693e-05, "loss": 0.0188, "step": 60420 }, { "epoch": 0.446689926377103, "grad_norm": 0.10978935658931732, "learning_rate": 2.7768132716049382e-05, "loss": 0.0203, "step": 60430 }, { "epoch": 0.4467638449484048, "grad_norm": 0.07833638787269592, "learning_rate": 2.776442307692308e-05, "loss": 0.0182, "step": 60440 }, { "epoch": 0.44683776351970667, "grad_norm": 0.07589908689260483, "learning_rate": 2.776071343779677e-05, "loss": 0.0194, "step": 60450 }, { "epoch": 0.44691168209100857, "grad_norm": 0.09376027435064316, "learning_rate": 2.7757003798670466e-05, "loss": 0.0205, "step": 60460 }, { "epoch": 0.4469856006623104, "grad_norm": 0.09234382212162018, "learning_rate": 2.7753294159544162e-05, "loss": 0.0177, "step": 60470 }, { "epoch": 0.44705951923361226, "grad_norm": 0.09196515381336212, "learning_rate": 2.7749584520417855e-05, "loss": 0.0183, "step": 60480 }, { "epoch": 0.4471334378049141, "grad_norm": 0.07494409382343292, "learning_rate": 2.774587488129155e-05, "loss": 0.0164, "step": 60490 }, { "epoch": 0.44720735637621595, "grad_norm": 0.07437138259410858, "learning_rate": 2.774216524216524e-05, "loss": 0.019, "step": 60500 }, { "epoch": 0.4472812749475178, "grad_norm": 0.06940478831529617, "learning_rate": 2.7738455603038936e-05, "loss": 0.0193, "step": 60510 }, { "epoch": 0.4473551935188197, "grad_norm": 0.08766179531812668, "learning_rate": 2.7734745963912635e-05, "loss": 0.017, "step": 60520 }, { "epoch": 0.44742911209012154, "grad_norm": 0.08161088824272156, "learning_rate": 2.7731036324786325e-05, "loss": 0.0186, "step": 60530 }, { "epoch": 0.4475030306614234, "grad_norm": 0.08334532380104065, "learning_rate": 2.772732668566002e-05, "loss": 0.0181, "step": 60540 }, { "epoch": 0.44757694923272523, "grad_norm": 0.08141965419054031, "learning_rate": 2.7723617046533713e-05, "loss": 0.0157, "step": 60550 }, { "epoch": 0.4476508678040271, "grad_norm": 0.07597937434911728, "learning_rate": 2.771990740740741e-05, "loss": 0.0175, "step": 60560 }, { "epoch": 0.4477247863753289, "grad_norm": 0.09826496243476868, "learning_rate": 2.7716197768281105e-05, "loss": 0.0191, "step": 60570 }, { "epoch": 0.44779870494663077, "grad_norm": 0.07023176550865173, "learning_rate": 2.7712488129154794e-05, "loss": 0.0184, "step": 60580 }, { "epoch": 0.44787262351793267, "grad_norm": 0.07832328975200653, "learning_rate": 2.7708778490028494e-05, "loss": 0.0178, "step": 60590 }, { "epoch": 0.4479465420892345, "grad_norm": 0.11813930422067642, "learning_rate": 2.770506885090219e-05, "loss": 0.0174, "step": 60600 }, { "epoch": 0.44802046066053636, "grad_norm": 0.08903170377016068, "learning_rate": 2.770135921177588e-05, "loss": 0.0183, "step": 60610 }, { "epoch": 0.4480943792318382, "grad_norm": 0.10360957682132721, "learning_rate": 2.7697649572649575e-05, "loss": 0.0189, "step": 60620 }, { "epoch": 0.44816829780314005, "grad_norm": 0.060528405010700226, "learning_rate": 2.7693939933523267e-05, "loss": 0.019, "step": 60630 }, { "epoch": 0.4482422163744419, "grad_norm": 0.07136612385511398, "learning_rate": 2.7690230294396963e-05, "loss": 0.0183, "step": 60640 }, { "epoch": 0.4483161349457438, "grad_norm": 0.08754134178161621, "learning_rate": 2.768652065527066e-05, "loss": 0.0182, "step": 60650 }, { "epoch": 0.44839005351704564, "grad_norm": 0.08291976898908615, "learning_rate": 2.768281101614435e-05, "loss": 0.0181, "step": 60660 }, { "epoch": 0.4484639720883475, "grad_norm": 0.0794183537364006, "learning_rate": 2.7679101377018048e-05, "loss": 0.0184, "step": 60670 }, { "epoch": 0.44853789065964933, "grad_norm": 0.05834812670946121, "learning_rate": 2.7675391737891737e-05, "loss": 0.0184, "step": 60680 }, { "epoch": 0.4486118092309512, "grad_norm": 0.06786110252141953, "learning_rate": 2.7671682098765433e-05, "loss": 0.0157, "step": 60690 }, { "epoch": 0.448685727802253, "grad_norm": 0.06590061634778976, "learning_rate": 2.766797245963913e-05, "loss": 0.0156, "step": 60700 }, { "epoch": 0.44875964637355487, "grad_norm": 0.07546421140432358, "learning_rate": 2.766426282051282e-05, "loss": 0.0176, "step": 60710 }, { "epoch": 0.44883356494485677, "grad_norm": 0.06898737698793411, "learning_rate": 2.7660553181386517e-05, "loss": 0.0175, "step": 60720 }, { "epoch": 0.4489074835161586, "grad_norm": 0.08143886923789978, "learning_rate": 2.7656843542260207e-05, "loss": 0.0187, "step": 60730 }, { "epoch": 0.44898140208746046, "grad_norm": 0.08089787513017654, "learning_rate": 2.7653133903133903e-05, "loss": 0.0178, "step": 60740 }, { "epoch": 0.4490553206587623, "grad_norm": 0.08173342049121857, "learning_rate": 2.7649424264007602e-05, "loss": 0.0174, "step": 60750 }, { "epoch": 0.44912923923006415, "grad_norm": 0.07083459198474884, "learning_rate": 2.764571462488129e-05, "loss": 0.0176, "step": 60760 }, { "epoch": 0.449203157801366, "grad_norm": 0.08658960461616516, "learning_rate": 2.7642004985754987e-05, "loss": 0.0172, "step": 60770 }, { "epoch": 0.4492770763726679, "grad_norm": 0.1044207438826561, "learning_rate": 2.763829534662868e-05, "loss": 0.0182, "step": 60780 }, { "epoch": 0.44935099494396974, "grad_norm": 0.08168955147266388, "learning_rate": 2.7634585707502376e-05, "loss": 0.0188, "step": 60790 }, { "epoch": 0.4494249135152716, "grad_norm": 0.07407943159341812, "learning_rate": 2.763087606837607e-05, "loss": 0.0175, "step": 60800 }, { "epoch": 0.44949883208657343, "grad_norm": 0.09754371643066406, "learning_rate": 2.762716642924976e-05, "loss": 0.0186, "step": 60810 }, { "epoch": 0.4495727506578753, "grad_norm": 0.13254565000534058, "learning_rate": 2.762345679012346e-05, "loss": 0.0203, "step": 60820 }, { "epoch": 0.4496466692291771, "grad_norm": 0.0885326936841011, "learning_rate": 2.7619747150997156e-05, "loss": 0.018, "step": 60830 }, { "epoch": 0.44972058780047897, "grad_norm": 0.08535313606262207, "learning_rate": 2.7616037511870845e-05, "loss": 0.0192, "step": 60840 }, { "epoch": 0.44979450637178087, "grad_norm": 0.08815433084964752, "learning_rate": 2.761232787274454e-05, "loss": 0.0198, "step": 60850 }, { "epoch": 0.4498684249430827, "grad_norm": 0.16081440448760986, "learning_rate": 2.7608618233618234e-05, "loss": 0.0175, "step": 60860 }, { "epoch": 0.44994234351438456, "grad_norm": 0.06050868704915047, "learning_rate": 2.760490859449193e-05, "loss": 0.018, "step": 60870 }, { "epoch": 0.4500162620856864, "grad_norm": 0.08731406927108765, "learning_rate": 2.7601198955365626e-05, "loss": 0.0209, "step": 60880 }, { "epoch": 0.45009018065698825, "grad_norm": 0.08373596519231796, "learning_rate": 2.7597489316239315e-05, "loss": 0.0179, "step": 60890 }, { "epoch": 0.4501640992282901, "grad_norm": 0.10616223514080048, "learning_rate": 2.7593779677113014e-05, "loss": 0.0177, "step": 60900 }, { "epoch": 0.450238017799592, "grad_norm": 0.06489362567663193, "learning_rate": 2.7590070037986704e-05, "loss": 0.0174, "step": 60910 }, { "epoch": 0.45031193637089384, "grad_norm": 0.07929858565330505, "learning_rate": 2.75863603988604e-05, "loss": 0.02, "step": 60920 }, { "epoch": 0.4503858549421957, "grad_norm": 0.08207009732723236, "learning_rate": 2.7582650759734095e-05, "loss": 0.0182, "step": 60930 }, { "epoch": 0.45045977351349753, "grad_norm": 0.06570993363857269, "learning_rate": 2.7578941120607788e-05, "loss": 0.0176, "step": 60940 }, { "epoch": 0.4505336920847994, "grad_norm": 0.08798123151063919, "learning_rate": 2.7575231481481484e-05, "loss": 0.0155, "step": 60950 }, { "epoch": 0.4506076106561012, "grad_norm": 0.09389277547597885, "learning_rate": 2.7571521842355173e-05, "loss": 0.0168, "step": 60960 }, { "epoch": 0.45068152922740307, "grad_norm": 0.05513633042573929, "learning_rate": 2.7567812203228873e-05, "loss": 0.0163, "step": 60970 }, { "epoch": 0.45075544779870497, "grad_norm": 0.0851559042930603, "learning_rate": 2.756410256410257e-05, "loss": 0.0177, "step": 60980 }, { "epoch": 0.4508293663700068, "grad_norm": 0.08380331844091415, "learning_rate": 2.7560392924976258e-05, "loss": 0.0181, "step": 60990 }, { "epoch": 0.45090328494130866, "grad_norm": 0.08465258032083511, "learning_rate": 2.7556683285849954e-05, "loss": 0.0178, "step": 61000 }, { "epoch": 0.4509772035126105, "grad_norm": 0.07717887312173843, "learning_rate": 2.7552973646723646e-05, "loss": 0.0176, "step": 61010 }, { "epoch": 0.45105112208391235, "grad_norm": 0.10460257530212402, "learning_rate": 2.7549264007597342e-05, "loss": 0.0191, "step": 61020 }, { "epoch": 0.4511250406552142, "grad_norm": 0.08913242071866989, "learning_rate": 2.7545554368471038e-05, "loss": 0.0194, "step": 61030 }, { "epoch": 0.4511989592265161, "grad_norm": 0.059481166303157806, "learning_rate": 2.7541844729344727e-05, "loss": 0.0172, "step": 61040 }, { "epoch": 0.45127287779781794, "grad_norm": 0.0636981949210167, "learning_rate": 2.7538135090218427e-05, "loss": 0.0181, "step": 61050 }, { "epoch": 0.4513467963691198, "grad_norm": 0.08157453685998917, "learning_rate": 2.7534425451092123e-05, "loss": 0.0193, "step": 61060 }, { "epoch": 0.45142071494042163, "grad_norm": 0.08309981226921082, "learning_rate": 2.7530715811965812e-05, "loss": 0.0169, "step": 61070 }, { "epoch": 0.4514946335117235, "grad_norm": 0.08034002035856247, "learning_rate": 2.7527006172839508e-05, "loss": 0.0189, "step": 61080 }, { "epoch": 0.4515685520830253, "grad_norm": 0.08259232342243195, "learning_rate": 2.75232965337132e-05, "loss": 0.0171, "step": 61090 }, { "epoch": 0.45164247065432717, "grad_norm": 0.10203670710325241, "learning_rate": 2.7519586894586896e-05, "loss": 0.0193, "step": 61100 }, { "epoch": 0.45171638922562907, "grad_norm": 0.08340781182050705, "learning_rate": 2.7515877255460592e-05, "loss": 0.0179, "step": 61110 }, { "epoch": 0.4517903077969309, "grad_norm": 0.11332692950963974, "learning_rate": 2.7512167616334285e-05, "loss": 0.0221, "step": 61120 }, { "epoch": 0.45186422636823276, "grad_norm": 0.07806441187858582, "learning_rate": 2.750845797720798e-05, "loss": 0.0176, "step": 61130 }, { "epoch": 0.4519381449395346, "grad_norm": 0.0680718943476677, "learning_rate": 2.750474833808167e-05, "loss": 0.0173, "step": 61140 }, { "epoch": 0.45201206351083645, "grad_norm": 0.07029244303703308, "learning_rate": 2.7501038698955366e-05, "loss": 0.0149, "step": 61150 }, { "epoch": 0.4520859820821383, "grad_norm": 0.08795084804296494, "learning_rate": 2.7497329059829062e-05, "loss": 0.0221, "step": 61160 }, { "epoch": 0.4521599006534402, "grad_norm": 0.08229811489582062, "learning_rate": 2.7493619420702755e-05, "loss": 0.0172, "step": 61170 }, { "epoch": 0.45223381922474204, "grad_norm": 0.07843748480081558, "learning_rate": 2.748990978157645e-05, "loss": 0.0183, "step": 61180 }, { "epoch": 0.4523077377960439, "grad_norm": 0.11385442316532135, "learning_rate": 2.748620014245014e-05, "loss": 0.0184, "step": 61190 }, { "epoch": 0.45238165636734573, "grad_norm": 0.08859772235155106, "learning_rate": 2.748249050332384e-05, "loss": 0.0185, "step": 61200 }, { "epoch": 0.4524555749386476, "grad_norm": 0.07936745882034302, "learning_rate": 2.7478780864197535e-05, "loss": 0.0178, "step": 61210 }, { "epoch": 0.4525294935099494, "grad_norm": 0.07612825930118561, "learning_rate": 2.7475071225071224e-05, "loss": 0.0189, "step": 61220 }, { "epoch": 0.45260341208125127, "grad_norm": 0.0908486470580101, "learning_rate": 2.747136158594492e-05, "loss": 0.0182, "step": 61230 }, { "epoch": 0.45267733065255317, "grad_norm": 0.059742387384176254, "learning_rate": 2.7467651946818613e-05, "loss": 0.0187, "step": 61240 }, { "epoch": 0.452751249223855, "grad_norm": 0.08679183572530746, "learning_rate": 2.746394230769231e-05, "loss": 0.0179, "step": 61250 }, { "epoch": 0.45282516779515686, "grad_norm": 0.08411939442157745, "learning_rate": 2.7460232668566005e-05, "loss": 0.0185, "step": 61260 }, { "epoch": 0.4528990863664587, "grad_norm": 0.08937957137823105, "learning_rate": 2.7456523029439697e-05, "loss": 0.0188, "step": 61270 }, { "epoch": 0.45297300493776055, "grad_norm": 0.0620824359357357, "learning_rate": 2.7452813390313393e-05, "loss": 0.0191, "step": 61280 }, { "epoch": 0.4530469235090624, "grad_norm": 0.06696116924285889, "learning_rate": 2.744910375118709e-05, "loss": 0.0188, "step": 61290 }, { "epoch": 0.4531208420803643, "grad_norm": 0.08053241670131683, "learning_rate": 2.744539411206078e-05, "loss": 0.0159, "step": 61300 }, { "epoch": 0.45319476065166614, "grad_norm": 0.07864797115325928, "learning_rate": 2.7441684472934474e-05, "loss": 0.0171, "step": 61310 }, { "epoch": 0.453268679222968, "grad_norm": 0.08154179155826569, "learning_rate": 2.7437974833808167e-05, "loss": 0.0181, "step": 61320 }, { "epoch": 0.45334259779426983, "grad_norm": 0.07514984160661697, "learning_rate": 2.7434265194681863e-05, "loss": 0.0186, "step": 61330 }, { "epoch": 0.4534165163655717, "grad_norm": 0.07200212776660919, "learning_rate": 2.743055555555556e-05, "loss": 0.0171, "step": 61340 }, { "epoch": 0.4534904349368735, "grad_norm": 0.07293741405010223, "learning_rate": 2.742684591642925e-05, "loss": 0.02, "step": 61350 }, { "epoch": 0.45356435350817537, "grad_norm": 0.0856717899441719, "learning_rate": 2.7423136277302948e-05, "loss": 0.0193, "step": 61360 }, { "epoch": 0.45363827207947727, "grad_norm": 0.0812024474143982, "learning_rate": 2.7419426638176637e-05, "loss": 0.021, "step": 61370 }, { "epoch": 0.4537121906507791, "grad_norm": 0.0680481418967247, "learning_rate": 2.7415716999050333e-05, "loss": 0.0187, "step": 61380 }, { "epoch": 0.45378610922208096, "grad_norm": 0.09765014797449112, "learning_rate": 2.741200735992403e-05, "loss": 0.015, "step": 61390 }, { "epoch": 0.4538600277933828, "grad_norm": 0.07712357491254807, "learning_rate": 2.740829772079772e-05, "loss": 0.0179, "step": 61400 }, { "epoch": 0.45393394636468465, "grad_norm": 0.10133178532123566, "learning_rate": 2.7404588081671417e-05, "loss": 0.02, "step": 61410 }, { "epoch": 0.4540078649359865, "grad_norm": 0.09235849231481552, "learning_rate": 2.740087844254511e-05, "loss": 0.0188, "step": 61420 }, { "epoch": 0.4540817835072884, "grad_norm": 0.11862324178218842, "learning_rate": 2.7397168803418806e-05, "loss": 0.0204, "step": 61430 }, { "epoch": 0.45415570207859024, "grad_norm": 0.06792068481445312, "learning_rate": 2.7393459164292502e-05, "loss": 0.017, "step": 61440 }, { "epoch": 0.4542296206498921, "grad_norm": 0.08101335167884827, "learning_rate": 2.738974952516619e-05, "loss": 0.015, "step": 61450 }, { "epoch": 0.45430353922119393, "grad_norm": 0.09211039543151855, "learning_rate": 2.7386039886039887e-05, "loss": 0.0207, "step": 61460 }, { "epoch": 0.4543774577924958, "grad_norm": 0.11689606308937073, "learning_rate": 2.738233024691358e-05, "loss": 0.02, "step": 61470 }, { "epoch": 0.4544513763637976, "grad_norm": 0.09053738415241241, "learning_rate": 2.7378620607787275e-05, "loss": 0.0166, "step": 61480 }, { "epoch": 0.4545252949350995, "grad_norm": 0.09697338938713074, "learning_rate": 2.737491096866097e-05, "loss": 0.0202, "step": 61490 }, { "epoch": 0.45459921350640137, "grad_norm": 0.06055251881480217, "learning_rate": 2.7371201329534664e-05, "loss": 0.0178, "step": 61500 }, { "epoch": 0.4546731320777032, "grad_norm": 0.06681554764509201, "learning_rate": 2.736749169040836e-05, "loss": 0.0193, "step": 61510 }, { "epoch": 0.45474705064900506, "grad_norm": 0.08950188755989075, "learning_rate": 2.7363782051282056e-05, "loss": 0.0187, "step": 61520 }, { "epoch": 0.4548209692203069, "grad_norm": 0.0626884400844574, "learning_rate": 2.7360072412155745e-05, "loss": 0.021, "step": 61530 }, { "epoch": 0.45489488779160875, "grad_norm": 0.09036307781934738, "learning_rate": 2.735636277302944e-05, "loss": 0.0216, "step": 61540 }, { "epoch": 0.4549688063629106, "grad_norm": 0.08309593796730042, "learning_rate": 2.7352653133903134e-05, "loss": 0.0161, "step": 61550 }, { "epoch": 0.4550427249342125, "grad_norm": 0.07348351180553436, "learning_rate": 2.734894349477683e-05, "loss": 0.0209, "step": 61560 }, { "epoch": 0.45511664350551434, "grad_norm": 0.0639307051897049, "learning_rate": 2.7345233855650526e-05, "loss": 0.0164, "step": 61570 }, { "epoch": 0.4551905620768162, "grad_norm": 0.07979018986225128, "learning_rate": 2.7341524216524218e-05, "loss": 0.021, "step": 61580 }, { "epoch": 0.45526448064811803, "grad_norm": 0.08083287626504898, "learning_rate": 2.7337814577397914e-05, "loss": 0.016, "step": 61590 }, { "epoch": 0.4553383992194199, "grad_norm": 0.07112161815166473, "learning_rate": 2.7334104938271603e-05, "loss": 0.018, "step": 61600 }, { "epoch": 0.4554123177907217, "grad_norm": 0.08322005718946457, "learning_rate": 2.73303952991453e-05, "loss": 0.0198, "step": 61610 }, { "epoch": 0.4554862363620236, "grad_norm": 0.08761461824178696, "learning_rate": 2.7326685660018995e-05, "loss": 0.0187, "step": 61620 }, { "epoch": 0.45556015493332547, "grad_norm": 0.11122740060091019, "learning_rate": 2.7322976020892688e-05, "loss": 0.0196, "step": 61630 }, { "epoch": 0.4556340735046273, "grad_norm": 0.10976991802453995, "learning_rate": 2.7319266381766384e-05, "loss": 0.0182, "step": 61640 }, { "epoch": 0.45570799207592916, "grad_norm": 0.0777834877371788, "learning_rate": 2.7315556742640076e-05, "loss": 0.0195, "step": 61650 }, { "epoch": 0.455781910647231, "grad_norm": 0.07317525148391724, "learning_rate": 2.7311847103513772e-05, "loss": 0.0203, "step": 61660 }, { "epoch": 0.45585582921853285, "grad_norm": 0.06810017675161362, "learning_rate": 2.7308137464387468e-05, "loss": 0.0192, "step": 61670 }, { "epoch": 0.4559297477898347, "grad_norm": 0.08570457994937897, "learning_rate": 2.7304427825261157e-05, "loss": 0.0175, "step": 61680 }, { "epoch": 0.4560036663611366, "grad_norm": 0.08259614557027817, "learning_rate": 2.7300718186134853e-05, "loss": 0.0166, "step": 61690 }, { "epoch": 0.45607758493243844, "grad_norm": 0.22294946014881134, "learning_rate": 2.7297008547008546e-05, "loss": 0.0199, "step": 61700 }, { "epoch": 0.4561515035037403, "grad_norm": 0.0658465325832367, "learning_rate": 2.7293298907882242e-05, "loss": 0.0199, "step": 61710 }, { "epoch": 0.45622542207504213, "grad_norm": 0.06378777325153351, "learning_rate": 2.7289589268755938e-05, "loss": 0.0183, "step": 61720 }, { "epoch": 0.456299340646344, "grad_norm": 0.08054514229297638, "learning_rate": 2.728587962962963e-05, "loss": 0.0171, "step": 61730 }, { "epoch": 0.4563732592176458, "grad_norm": 0.08308053761720657, "learning_rate": 2.7282169990503327e-05, "loss": 0.017, "step": 61740 }, { "epoch": 0.4564471777889477, "grad_norm": 0.08155302703380585, "learning_rate": 2.7278460351377022e-05, "loss": 0.0184, "step": 61750 }, { "epoch": 0.45652109636024957, "grad_norm": 0.10983440279960632, "learning_rate": 2.727475071225071e-05, "loss": 0.0193, "step": 61760 }, { "epoch": 0.4565950149315514, "grad_norm": 0.08785505592823029, "learning_rate": 2.7271041073124408e-05, "loss": 0.0184, "step": 61770 }, { "epoch": 0.45666893350285326, "grad_norm": 0.07460859417915344, "learning_rate": 2.72673314339981e-05, "loss": 0.0166, "step": 61780 }, { "epoch": 0.4567428520741551, "grad_norm": 0.07602991908788681, "learning_rate": 2.7263621794871796e-05, "loss": 0.0188, "step": 61790 }, { "epoch": 0.45681677064545695, "grad_norm": 0.09512347728013992, "learning_rate": 2.7259912155745492e-05, "loss": 0.0202, "step": 61800 }, { "epoch": 0.4568906892167588, "grad_norm": 0.06678026914596558, "learning_rate": 2.7256202516619185e-05, "loss": 0.0179, "step": 61810 }, { "epoch": 0.4569646077880607, "grad_norm": 0.06316298246383667, "learning_rate": 2.725249287749288e-05, "loss": 0.0154, "step": 61820 }, { "epoch": 0.45703852635936254, "grad_norm": 0.0684802457690239, "learning_rate": 2.724878323836657e-05, "loss": 0.0175, "step": 61830 }, { "epoch": 0.4571124449306644, "grad_norm": 0.07379800081253052, "learning_rate": 2.7245073599240266e-05, "loss": 0.0202, "step": 61840 }, { "epoch": 0.45718636350196623, "grad_norm": 0.09847801923751831, "learning_rate": 2.7241363960113965e-05, "loss": 0.0216, "step": 61850 }, { "epoch": 0.4572602820732681, "grad_norm": 0.06762059777975082, "learning_rate": 2.7237654320987654e-05, "loss": 0.0169, "step": 61860 }, { "epoch": 0.4573342006445699, "grad_norm": 0.08554498851299286, "learning_rate": 2.723394468186135e-05, "loss": 0.0154, "step": 61870 }, { "epoch": 0.4574081192158718, "grad_norm": 0.06983321160078049, "learning_rate": 2.7230235042735043e-05, "loss": 0.0185, "step": 61880 }, { "epoch": 0.45748203778717367, "grad_norm": 0.06776176393032074, "learning_rate": 2.722652540360874e-05, "loss": 0.0164, "step": 61890 }, { "epoch": 0.4575559563584755, "grad_norm": 0.08174600452184677, "learning_rate": 2.7222815764482435e-05, "loss": 0.0175, "step": 61900 }, { "epoch": 0.45762987492977736, "grad_norm": 0.08814748376607895, "learning_rate": 2.7219106125356124e-05, "loss": 0.0182, "step": 61910 }, { "epoch": 0.4577037935010792, "grad_norm": 0.08945896476507187, "learning_rate": 2.721539648622982e-05, "loss": 0.0186, "step": 61920 }, { "epoch": 0.45777771207238105, "grad_norm": 0.06929168850183487, "learning_rate": 2.7211686847103513e-05, "loss": 0.017, "step": 61930 }, { "epoch": 0.4578516306436829, "grad_norm": 0.08452693372964859, "learning_rate": 2.720797720797721e-05, "loss": 0.0187, "step": 61940 }, { "epoch": 0.4579255492149848, "grad_norm": 0.07486118376255035, "learning_rate": 2.7204267568850905e-05, "loss": 0.019, "step": 61950 }, { "epoch": 0.45799946778628664, "grad_norm": 0.10226127505302429, "learning_rate": 2.7200557929724597e-05, "loss": 0.0182, "step": 61960 }, { "epoch": 0.4580733863575885, "grad_norm": 0.07045385986566544, "learning_rate": 2.7196848290598293e-05, "loss": 0.0201, "step": 61970 }, { "epoch": 0.45814730492889033, "grad_norm": 0.08606812357902527, "learning_rate": 2.719313865147199e-05, "loss": 0.018, "step": 61980 }, { "epoch": 0.4582212235001922, "grad_norm": 0.05335699021816254, "learning_rate": 2.7189429012345678e-05, "loss": 0.0196, "step": 61990 }, { "epoch": 0.458295142071494, "grad_norm": 0.09203033149242401, "learning_rate": 2.7185719373219378e-05, "loss": 0.0163, "step": 62000 }, { "epoch": 0.4583690606427959, "grad_norm": 0.08226708322763443, "learning_rate": 2.7182009734093067e-05, "loss": 0.019, "step": 62010 }, { "epoch": 0.45844297921409777, "grad_norm": 0.08296380192041397, "learning_rate": 2.7178300094966763e-05, "loss": 0.0196, "step": 62020 }, { "epoch": 0.4585168977853996, "grad_norm": 0.07300078123807907, "learning_rate": 2.717459045584046e-05, "loss": 0.0208, "step": 62030 }, { "epoch": 0.45859081635670146, "grad_norm": 0.0772596001625061, "learning_rate": 2.717088081671415e-05, "loss": 0.0205, "step": 62040 }, { "epoch": 0.4586647349280033, "grad_norm": 0.0756126269698143, "learning_rate": 2.7167171177587847e-05, "loss": 0.0166, "step": 62050 }, { "epoch": 0.45873865349930515, "grad_norm": 0.0758863240480423, "learning_rate": 2.7163461538461536e-05, "loss": 0.017, "step": 62060 }, { "epoch": 0.458812572070607, "grad_norm": 0.05326606705784798, "learning_rate": 2.7159751899335232e-05, "loss": 0.0176, "step": 62070 }, { "epoch": 0.4588864906419089, "grad_norm": 0.09534876048564911, "learning_rate": 2.7156042260208932e-05, "loss": 0.0181, "step": 62080 }, { "epoch": 0.45896040921321074, "grad_norm": 0.09859345853328705, "learning_rate": 2.715233262108262e-05, "loss": 0.0182, "step": 62090 }, { "epoch": 0.4590343277845126, "grad_norm": 0.07618529349565506, "learning_rate": 2.7148622981956317e-05, "loss": 0.0172, "step": 62100 }, { "epoch": 0.45910824635581443, "grad_norm": 0.08859498053789139, "learning_rate": 2.714491334283001e-05, "loss": 0.0163, "step": 62110 }, { "epoch": 0.4591821649271163, "grad_norm": 0.07485493272542953, "learning_rate": 2.7141203703703705e-05, "loss": 0.0185, "step": 62120 }, { "epoch": 0.4592560834984181, "grad_norm": 0.08691943436861038, "learning_rate": 2.71374940645774e-05, "loss": 0.0177, "step": 62130 }, { "epoch": 0.45933000206972, "grad_norm": 0.10383077710866928, "learning_rate": 2.713378442545109e-05, "loss": 0.0177, "step": 62140 }, { "epoch": 0.45940392064102187, "grad_norm": 0.05785920470952988, "learning_rate": 2.713007478632479e-05, "loss": 0.0184, "step": 62150 }, { "epoch": 0.4594778392123237, "grad_norm": 0.0689610168337822, "learning_rate": 2.712636514719848e-05, "loss": 0.019, "step": 62160 }, { "epoch": 0.45955175778362556, "grad_norm": 0.10667440295219421, "learning_rate": 2.7122655508072175e-05, "loss": 0.0171, "step": 62170 }, { "epoch": 0.4596256763549274, "grad_norm": 0.08950226753950119, "learning_rate": 2.711894586894587e-05, "loss": 0.0173, "step": 62180 }, { "epoch": 0.45969959492622925, "grad_norm": 0.0803423747420311, "learning_rate": 2.7115236229819564e-05, "loss": 0.0182, "step": 62190 }, { "epoch": 0.4597735134975311, "grad_norm": 0.07441220432519913, "learning_rate": 2.711152659069326e-05, "loss": 0.0176, "step": 62200 }, { "epoch": 0.459847432068833, "grad_norm": 0.06424054503440857, "learning_rate": 2.7107816951566956e-05, "loss": 0.0168, "step": 62210 }, { "epoch": 0.45992135064013484, "grad_norm": 0.08061058074235916, "learning_rate": 2.7104107312440645e-05, "loss": 0.0153, "step": 62220 }, { "epoch": 0.4599952692114367, "grad_norm": 0.06604152172803879, "learning_rate": 2.7100397673314344e-05, "loss": 0.0164, "step": 62230 }, { "epoch": 0.46006918778273853, "grad_norm": 0.08139542490243912, "learning_rate": 2.7096688034188033e-05, "loss": 0.0186, "step": 62240 }, { "epoch": 0.4601431063540404, "grad_norm": 0.04587428644299507, "learning_rate": 2.709297839506173e-05, "loss": 0.0157, "step": 62250 }, { "epoch": 0.4602170249253422, "grad_norm": 0.10360642522573471, "learning_rate": 2.7089268755935425e-05, "loss": 0.0163, "step": 62260 }, { "epoch": 0.4602909434966441, "grad_norm": 0.09443079680204391, "learning_rate": 2.7085559116809118e-05, "loss": 0.0169, "step": 62270 }, { "epoch": 0.46036486206794597, "grad_norm": 0.06919102370738983, "learning_rate": 2.7081849477682814e-05, "loss": 0.0189, "step": 62280 }, { "epoch": 0.4604387806392478, "grad_norm": 0.09010902047157288, "learning_rate": 2.7078139838556503e-05, "loss": 0.0176, "step": 62290 }, { "epoch": 0.46051269921054966, "grad_norm": 0.06500197947025299, "learning_rate": 2.7074430199430202e-05, "loss": 0.017, "step": 62300 }, { "epoch": 0.4605866177818515, "grad_norm": 0.08220690488815308, "learning_rate": 2.70707205603039e-05, "loss": 0.0191, "step": 62310 }, { "epoch": 0.46066053635315335, "grad_norm": 0.08635219186544418, "learning_rate": 2.7067010921177588e-05, "loss": 0.018, "step": 62320 }, { "epoch": 0.4607344549244552, "grad_norm": 0.0828336849808693, "learning_rate": 2.7063301282051284e-05, "loss": 0.0203, "step": 62330 }, { "epoch": 0.4608083734957571, "grad_norm": 0.07880325615406036, "learning_rate": 2.7059591642924976e-05, "loss": 0.0177, "step": 62340 }, { "epoch": 0.46088229206705894, "grad_norm": 0.0864029973745346, "learning_rate": 2.7055882003798672e-05, "loss": 0.0177, "step": 62350 }, { "epoch": 0.4609562106383608, "grad_norm": 0.07870983332395554, "learning_rate": 2.7052172364672368e-05, "loss": 0.0158, "step": 62360 }, { "epoch": 0.46103012920966263, "grad_norm": 0.07564816623926163, "learning_rate": 2.7048462725546057e-05, "loss": 0.0186, "step": 62370 }, { "epoch": 0.4611040477809645, "grad_norm": 0.0991857647895813, "learning_rate": 2.7044753086419757e-05, "loss": 0.0188, "step": 62380 }, { "epoch": 0.4611779663522663, "grad_norm": 0.06728631258010864, "learning_rate": 2.7041043447293446e-05, "loss": 0.0188, "step": 62390 }, { "epoch": 0.4612518849235682, "grad_norm": 0.07630317658185959, "learning_rate": 2.7037333808167142e-05, "loss": 0.0188, "step": 62400 }, { "epoch": 0.46132580349487007, "grad_norm": 0.08242637664079666, "learning_rate": 2.7033624169040838e-05, "loss": 0.0157, "step": 62410 }, { "epoch": 0.4613997220661719, "grad_norm": 0.06703434139490128, "learning_rate": 2.702991452991453e-05, "loss": 0.0191, "step": 62420 }, { "epoch": 0.46147364063747376, "grad_norm": 0.08433108776807785, "learning_rate": 2.7026204890788226e-05, "loss": 0.0179, "step": 62430 }, { "epoch": 0.4615475592087756, "grad_norm": 0.09839016944169998, "learning_rate": 2.7022495251661922e-05, "loss": 0.018, "step": 62440 }, { "epoch": 0.46162147778007745, "grad_norm": 0.06323008984327316, "learning_rate": 2.7018785612535615e-05, "loss": 0.0177, "step": 62450 }, { "epoch": 0.4616953963513793, "grad_norm": 0.09598717838525772, "learning_rate": 2.701507597340931e-05, "loss": 0.0196, "step": 62460 }, { "epoch": 0.4617693149226812, "grad_norm": 0.0499408058822155, "learning_rate": 2.7011366334283e-05, "loss": 0.016, "step": 62470 }, { "epoch": 0.46184323349398304, "grad_norm": 0.06782973557710648, "learning_rate": 2.7007656695156696e-05, "loss": 0.0176, "step": 62480 }, { "epoch": 0.4619171520652849, "grad_norm": 0.06388473510742188, "learning_rate": 2.7003947056030392e-05, "loss": 0.0165, "step": 62490 }, { "epoch": 0.46199107063658673, "grad_norm": 0.08375228941440582, "learning_rate": 2.7000237416904084e-05, "loss": 0.0161, "step": 62500 }, { "epoch": 0.4620649892078886, "grad_norm": 0.09047287702560425, "learning_rate": 2.699652777777778e-05, "loss": 0.0216, "step": 62510 }, { "epoch": 0.4621389077791904, "grad_norm": 0.08288053423166275, "learning_rate": 2.699281813865147e-05, "loss": 0.0181, "step": 62520 }, { "epoch": 0.4622128263504923, "grad_norm": 0.08707530796527863, "learning_rate": 2.698910849952517e-05, "loss": 0.018, "step": 62530 }, { "epoch": 0.46228674492179417, "grad_norm": 0.06646700948476791, "learning_rate": 2.6985398860398865e-05, "loss": 0.019, "step": 62540 }, { "epoch": 0.462360663493096, "grad_norm": 0.08140867203474045, "learning_rate": 2.6981689221272554e-05, "loss": 0.0171, "step": 62550 }, { "epoch": 0.46243458206439786, "grad_norm": 0.0692920982837677, "learning_rate": 2.697797958214625e-05, "loss": 0.0179, "step": 62560 }, { "epoch": 0.4625085006356997, "grad_norm": 0.07965762168169022, "learning_rate": 2.6974269943019943e-05, "loss": 0.018, "step": 62570 }, { "epoch": 0.46258241920700155, "grad_norm": 0.07014095783233643, "learning_rate": 2.697056030389364e-05, "loss": 0.0174, "step": 62580 }, { "epoch": 0.4626563377783034, "grad_norm": 0.07212305814027786, "learning_rate": 2.6966850664767335e-05, "loss": 0.0203, "step": 62590 }, { "epoch": 0.4627302563496053, "grad_norm": 0.10186107456684113, "learning_rate": 2.6963141025641024e-05, "loss": 0.0193, "step": 62600 }, { "epoch": 0.46280417492090714, "grad_norm": 0.07537295669317245, "learning_rate": 2.6959431386514723e-05, "loss": 0.0173, "step": 62610 }, { "epoch": 0.462878093492209, "grad_norm": 0.08256471157073975, "learning_rate": 2.6955721747388412e-05, "loss": 0.0193, "step": 62620 }, { "epoch": 0.46295201206351083, "grad_norm": 0.08015892654657364, "learning_rate": 2.695201210826211e-05, "loss": 0.0185, "step": 62630 }, { "epoch": 0.4630259306348127, "grad_norm": 0.0764988586306572, "learning_rate": 2.6948302469135804e-05, "loss": 0.0184, "step": 62640 }, { "epoch": 0.4630998492061145, "grad_norm": 0.07432615756988525, "learning_rate": 2.6944592830009497e-05, "loss": 0.0194, "step": 62650 }, { "epoch": 0.4631737677774164, "grad_norm": 0.0674583911895752, "learning_rate": 2.6940883190883193e-05, "loss": 0.0194, "step": 62660 }, { "epoch": 0.46324768634871827, "grad_norm": 0.07448495924472809, "learning_rate": 2.693717355175689e-05, "loss": 0.0195, "step": 62670 }, { "epoch": 0.4633216049200201, "grad_norm": 0.07558556646108627, "learning_rate": 2.693346391263058e-05, "loss": 0.0208, "step": 62680 }, { "epoch": 0.46339552349132196, "grad_norm": 0.07623834908008575, "learning_rate": 2.6929754273504277e-05, "loss": 0.0209, "step": 62690 }, { "epoch": 0.4634694420626238, "grad_norm": 0.07406779378652573, "learning_rate": 2.6926044634377967e-05, "loss": 0.0182, "step": 62700 }, { "epoch": 0.46354336063392565, "grad_norm": 0.07742155343294144, "learning_rate": 2.6922334995251663e-05, "loss": 0.019, "step": 62710 }, { "epoch": 0.4636172792052275, "grad_norm": 0.07569239288568497, "learning_rate": 2.691862535612536e-05, "loss": 0.0173, "step": 62720 }, { "epoch": 0.4636911977765294, "grad_norm": 0.07936278730630875, "learning_rate": 2.691491571699905e-05, "loss": 0.0164, "step": 62730 }, { "epoch": 0.46376511634783124, "grad_norm": 0.08103445172309875, "learning_rate": 2.6911206077872747e-05, "loss": 0.0168, "step": 62740 }, { "epoch": 0.4638390349191331, "grad_norm": 0.0931391492486, "learning_rate": 2.6907496438746436e-05, "loss": 0.019, "step": 62750 }, { "epoch": 0.46391295349043493, "grad_norm": 0.07881567627191544, "learning_rate": 2.6903786799620136e-05, "loss": 0.0194, "step": 62760 }, { "epoch": 0.4639868720617368, "grad_norm": 0.08158383518457413, "learning_rate": 2.690007716049383e-05, "loss": 0.0186, "step": 62770 }, { "epoch": 0.4640607906330386, "grad_norm": 0.07594239711761475, "learning_rate": 2.689636752136752e-05, "loss": 0.0181, "step": 62780 }, { "epoch": 0.4641347092043405, "grad_norm": 0.07770699262619019, "learning_rate": 2.6892657882241217e-05, "loss": 0.0187, "step": 62790 }, { "epoch": 0.46420862777564237, "grad_norm": 0.07621417939662933, "learning_rate": 2.688894824311491e-05, "loss": 0.0191, "step": 62800 }, { "epoch": 0.4642825463469442, "grad_norm": 0.06834164261817932, "learning_rate": 2.6885238603988605e-05, "loss": 0.0182, "step": 62810 }, { "epoch": 0.46435646491824606, "grad_norm": 0.16739051043987274, "learning_rate": 2.68815289648623e-05, "loss": 0.0187, "step": 62820 }, { "epoch": 0.4644303834895479, "grad_norm": 0.09170664846897125, "learning_rate": 2.6877819325735994e-05, "loss": 0.0219, "step": 62830 }, { "epoch": 0.46450430206084975, "grad_norm": 0.09461186826229095, "learning_rate": 2.687410968660969e-05, "loss": 0.0176, "step": 62840 }, { "epoch": 0.4645782206321516, "grad_norm": 0.06674101203680038, "learning_rate": 2.687040004748338e-05, "loss": 0.0186, "step": 62850 }, { "epoch": 0.4646521392034535, "grad_norm": 0.058075059205293655, "learning_rate": 2.6866690408357075e-05, "loss": 0.0176, "step": 62860 }, { "epoch": 0.46472605777475534, "grad_norm": 0.07084377855062485, "learning_rate": 2.686298076923077e-05, "loss": 0.019, "step": 62870 }, { "epoch": 0.4647999763460572, "grad_norm": 0.11315616220235825, "learning_rate": 2.6859271130104463e-05, "loss": 0.0186, "step": 62880 }, { "epoch": 0.46487389491735903, "grad_norm": 0.09645523875951767, "learning_rate": 2.685556149097816e-05, "loss": 0.021, "step": 62890 }, { "epoch": 0.4649478134886609, "grad_norm": 0.07578028738498688, "learning_rate": 2.6851851851851855e-05, "loss": 0.0175, "step": 62900 }, { "epoch": 0.4650217320599627, "grad_norm": 0.09306954592466354, "learning_rate": 2.6848142212725548e-05, "loss": 0.018, "step": 62910 }, { "epoch": 0.4650956506312646, "grad_norm": 0.06260194629430771, "learning_rate": 2.6844432573599244e-05, "loss": 0.0188, "step": 62920 }, { "epoch": 0.46516956920256647, "grad_norm": 0.08187243342399597, "learning_rate": 2.6840722934472933e-05, "loss": 0.0174, "step": 62930 }, { "epoch": 0.4652434877738683, "grad_norm": 0.10176604241132736, "learning_rate": 2.683701329534663e-05, "loss": 0.0168, "step": 62940 }, { "epoch": 0.46531740634517016, "grad_norm": 0.10157504677772522, "learning_rate": 2.6833303656220325e-05, "loss": 0.0174, "step": 62950 }, { "epoch": 0.465391324916472, "grad_norm": 0.09087589383125305, "learning_rate": 2.6829594017094018e-05, "loss": 0.0179, "step": 62960 }, { "epoch": 0.46546524348777385, "grad_norm": 0.07912372052669525, "learning_rate": 2.6825884377967714e-05, "loss": 0.0164, "step": 62970 }, { "epoch": 0.4655391620590757, "grad_norm": 0.09425844252109528, "learning_rate": 2.6822174738841406e-05, "loss": 0.0203, "step": 62980 }, { "epoch": 0.4656130806303776, "grad_norm": 0.08376140147447586, "learning_rate": 2.6818465099715102e-05, "loss": 0.0182, "step": 62990 }, { "epoch": 0.46568699920167944, "grad_norm": 0.0748540386557579, "learning_rate": 2.6814755460588798e-05, "loss": 0.0187, "step": 63000 }, { "epoch": 0.4657609177729813, "grad_norm": 0.09936424344778061, "learning_rate": 2.6811045821462487e-05, "loss": 0.0183, "step": 63010 }, { "epoch": 0.46583483634428313, "grad_norm": 0.0781758725643158, "learning_rate": 2.6807336182336183e-05, "loss": 0.0171, "step": 63020 }, { "epoch": 0.465908754915585, "grad_norm": 0.10308913141489029, "learning_rate": 2.6803626543209876e-05, "loss": 0.0205, "step": 63030 }, { "epoch": 0.4659826734868868, "grad_norm": 0.0855834037065506, "learning_rate": 2.6799916904083572e-05, "loss": 0.0196, "step": 63040 }, { "epoch": 0.4660565920581887, "grad_norm": 0.06840619444847107, "learning_rate": 2.6796207264957268e-05, "loss": 0.0175, "step": 63050 }, { "epoch": 0.46613051062949057, "grad_norm": 0.0917033851146698, "learning_rate": 2.679249762583096e-05, "loss": 0.0176, "step": 63060 }, { "epoch": 0.4662044292007924, "grad_norm": 0.07346449047327042, "learning_rate": 2.6788787986704656e-05, "loss": 0.0178, "step": 63070 }, { "epoch": 0.46627834777209426, "grad_norm": 0.06600905954837799, "learning_rate": 2.6785078347578346e-05, "loss": 0.0178, "step": 63080 }, { "epoch": 0.4663522663433961, "grad_norm": 0.08728162944316864, "learning_rate": 2.678136870845204e-05, "loss": 0.0185, "step": 63090 }, { "epoch": 0.46642618491469795, "grad_norm": 0.08154621720314026, "learning_rate": 2.6777659069325737e-05, "loss": 0.0183, "step": 63100 }, { "epoch": 0.4665001034859998, "grad_norm": 0.06465992331504822, "learning_rate": 2.677394943019943e-05, "loss": 0.0185, "step": 63110 }, { "epoch": 0.4665740220573017, "grad_norm": 0.07442975044250488, "learning_rate": 2.6770239791073126e-05, "loss": 0.0179, "step": 63120 }, { "epoch": 0.46664794062860354, "grad_norm": 0.07067244499921799, "learning_rate": 2.6766530151946822e-05, "loss": 0.0165, "step": 63130 }, { "epoch": 0.4667218591999054, "grad_norm": 0.09633144736289978, "learning_rate": 2.6762820512820515e-05, "loss": 0.0173, "step": 63140 }, { "epoch": 0.46679577777120723, "grad_norm": 0.08338624984025955, "learning_rate": 2.675911087369421e-05, "loss": 0.0185, "step": 63150 }, { "epoch": 0.4668696963425091, "grad_norm": 0.07794088125228882, "learning_rate": 2.67554012345679e-05, "loss": 0.017, "step": 63160 }, { "epoch": 0.4669436149138109, "grad_norm": 0.08031114935874939, "learning_rate": 2.6751691595441596e-05, "loss": 0.0172, "step": 63170 }, { "epoch": 0.4670175334851128, "grad_norm": 0.07404765486717224, "learning_rate": 2.6747981956315295e-05, "loss": 0.0185, "step": 63180 }, { "epoch": 0.46709145205641467, "grad_norm": 0.06223677843809128, "learning_rate": 2.6744272317188984e-05, "loss": 0.0161, "step": 63190 }, { "epoch": 0.4671653706277165, "grad_norm": 0.07571319490671158, "learning_rate": 2.674056267806268e-05, "loss": 0.0185, "step": 63200 }, { "epoch": 0.46723928919901836, "grad_norm": 0.10422144085168839, "learning_rate": 2.6736853038936373e-05, "loss": 0.0171, "step": 63210 }, { "epoch": 0.4673132077703202, "grad_norm": 0.08425986766815186, "learning_rate": 2.673314339981007e-05, "loss": 0.0172, "step": 63220 }, { "epoch": 0.46738712634162205, "grad_norm": 0.08318064361810684, "learning_rate": 2.6729433760683765e-05, "loss": 0.0193, "step": 63230 }, { "epoch": 0.4674610449129239, "grad_norm": 0.07843171060085297, "learning_rate": 2.6725724121557454e-05, "loss": 0.0177, "step": 63240 }, { "epoch": 0.4675349634842258, "grad_norm": 0.08089744299650192, "learning_rate": 2.672201448243115e-05, "loss": 0.0176, "step": 63250 }, { "epoch": 0.46760888205552764, "grad_norm": 0.05883701890707016, "learning_rate": 2.6718304843304842e-05, "loss": 0.0149, "step": 63260 }, { "epoch": 0.4676828006268295, "grad_norm": 0.09115418046712875, "learning_rate": 2.671459520417854e-05, "loss": 0.0153, "step": 63270 }, { "epoch": 0.46775671919813133, "grad_norm": 0.08480945974588394, "learning_rate": 2.6710885565052234e-05, "loss": 0.0188, "step": 63280 }, { "epoch": 0.4678306377694332, "grad_norm": 0.08573106676340103, "learning_rate": 2.6707175925925927e-05, "loss": 0.0178, "step": 63290 }, { "epoch": 0.467904556340735, "grad_norm": 0.07894410192966461, "learning_rate": 2.6703466286799623e-05, "loss": 0.0188, "step": 63300 }, { "epoch": 0.4679784749120369, "grad_norm": 0.07888448983430862, "learning_rate": 2.6699756647673312e-05, "loss": 0.0183, "step": 63310 }, { "epoch": 0.46805239348333877, "grad_norm": 0.08809691667556763, "learning_rate": 2.6696047008547008e-05, "loss": 0.0157, "step": 63320 }, { "epoch": 0.4681263120546406, "grad_norm": 0.06534068286418915, "learning_rate": 2.6692337369420704e-05, "loss": 0.0186, "step": 63330 }, { "epoch": 0.46820023062594246, "grad_norm": 0.11392059177160263, "learning_rate": 2.6688627730294397e-05, "loss": 0.0192, "step": 63340 }, { "epoch": 0.4682741491972443, "grad_norm": 0.09349965304136276, "learning_rate": 2.6684918091168093e-05, "loss": 0.0158, "step": 63350 }, { "epoch": 0.46834806776854615, "grad_norm": 0.0638246163725853, "learning_rate": 2.668120845204179e-05, "loss": 0.0188, "step": 63360 }, { "epoch": 0.46842198633984805, "grad_norm": 0.06115562841296196, "learning_rate": 2.667749881291548e-05, "loss": 0.0187, "step": 63370 }, { "epoch": 0.4684959049111499, "grad_norm": 0.0755188837647438, "learning_rate": 2.6673789173789177e-05, "loss": 0.0184, "step": 63380 }, { "epoch": 0.46856982348245174, "grad_norm": 0.051312826573848724, "learning_rate": 2.6670079534662866e-05, "loss": 0.0169, "step": 63390 }, { "epoch": 0.4686437420537536, "grad_norm": 0.08142546564340591, "learning_rate": 2.6666369895536562e-05, "loss": 0.0193, "step": 63400 }, { "epoch": 0.46871766062505543, "grad_norm": 0.07243930548429489, "learning_rate": 2.666266025641026e-05, "loss": 0.0169, "step": 63410 }, { "epoch": 0.4687915791963573, "grad_norm": 0.09887338429689407, "learning_rate": 2.665895061728395e-05, "loss": 0.0185, "step": 63420 }, { "epoch": 0.4688654977676591, "grad_norm": 0.08401845395565033, "learning_rate": 2.6655240978157647e-05, "loss": 0.0194, "step": 63430 }, { "epoch": 0.468939416338961, "grad_norm": 0.049609072506427765, "learning_rate": 2.665153133903134e-05, "loss": 0.0178, "step": 63440 }, { "epoch": 0.46901333491026287, "grad_norm": 0.06521576642990112, "learning_rate": 2.6647821699905035e-05, "loss": 0.0167, "step": 63450 }, { "epoch": 0.4690872534815647, "grad_norm": 0.08452492952346802, "learning_rate": 2.664411206077873e-05, "loss": 0.0183, "step": 63460 }, { "epoch": 0.46916117205286656, "grad_norm": 0.10923508554697037, "learning_rate": 2.664040242165242e-05, "loss": 0.0197, "step": 63470 }, { "epoch": 0.4692350906241684, "grad_norm": 0.07494436204433441, "learning_rate": 2.6636692782526116e-05, "loss": 0.0167, "step": 63480 }, { "epoch": 0.46930900919547025, "grad_norm": 0.09253532439470291, "learning_rate": 2.663298314339981e-05, "loss": 0.0204, "step": 63490 }, { "epoch": 0.46938292776677215, "grad_norm": 0.07485422492027283, "learning_rate": 2.6629273504273505e-05, "loss": 0.0193, "step": 63500 }, { "epoch": 0.469456846338074, "grad_norm": 0.07859625667333603, "learning_rate": 2.66255638651472e-05, "loss": 0.0181, "step": 63510 }, { "epoch": 0.46953076490937584, "grad_norm": 0.06669975072145462, "learning_rate": 2.6621854226020894e-05, "loss": 0.0181, "step": 63520 }, { "epoch": 0.4696046834806777, "grad_norm": 0.08856157213449478, "learning_rate": 2.661814458689459e-05, "loss": 0.017, "step": 63530 }, { "epoch": 0.46967860205197953, "grad_norm": 0.07480046898126602, "learning_rate": 2.661443494776828e-05, "loss": 0.0184, "step": 63540 }, { "epoch": 0.4697525206232814, "grad_norm": 0.08537238091230392, "learning_rate": 2.6610725308641975e-05, "loss": 0.0164, "step": 63550 }, { "epoch": 0.4698264391945832, "grad_norm": 0.07791563868522644, "learning_rate": 2.6607015669515674e-05, "loss": 0.0169, "step": 63560 }, { "epoch": 0.4699003577658851, "grad_norm": 0.07880368083715439, "learning_rate": 2.6603306030389363e-05, "loss": 0.0165, "step": 63570 }, { "epoch": 0.46997427633718697, "grad_norm": 0.09982677549123764, "learning_rate": 2.659959639126306e-05, "loss": 0.0179, "step": 63580 }, { "epoch": 0.4700481949084888, "grad_norm": 0.058600496500730515, "learning_rate": 2.6595886752136755e-05, "loss": 0.016, "step": 63590 }, { "epoch": 0.47012211347979066, "grad_norm": 0.07470900565385818, "learning_rate": 2.6592177113010448e-05, "loss": 0.0177, "step": 63600 }, { "epoch": 0.4701960320510925, "grad_norm": 0.08938571810722351, "learning_rate": 2.6588467473884144e-05, "loss": 0.0179, "step": 63610 }, { "epoch": 0.47026995062239435, "grad_norm": 0.08260288089513779, "learning_rate": 2.6584757834757833e-05, "loss": 0.0191, "step": 63620 }, { "epoch": 0.47034386919369625, "grad_norm": 0.09578394889831543, "learning_rate": 2.658104819563153e-05, "loss": 0.0189, "step": 63630 }, { "epoch": 0.4704177877649981, "grad_norm": 0.06751023232936859, "learning_rate": 2.6577338556505228e-05, "loss": 0.0165, "step": 63640 }, { "epoch": 0.47049170633629994, "grad_norm": 0.07542560994625092, "learning_rate": 2.6573628917378917e-05, "loss": 0.0214, "step": 63650 }, { "epoch": 0.4705656249076018, "grad_norm": 0.09560023248195648, "learning_rate": 2.6569919278252613e-05, "loss": 0.02, "step": 63660 }, { "epoch": 0.47063954347890363, "grad_norm": 0.07671035826206207, "learning_rate": 2.6566209639126306e-05, "loss": 0.0159, "step": 63670 }, { "epoch": 0.4707134620502055, "grad_norm": 0.07942305505275726, "learning_rate": 2.6562500000000002e-05, "loss": 0.016, "step": 63680 }, { "epoch": 0.4707873806215073, "grad_norm": 0.09094572812318802, "learning_rate": 2.6558790360873698e-05, "loss": 0.0182, "step": 63690 }, { "epoch": 0.4708612991928092, "grad_norm": 0.05231654644012451, "learning_rate": 2.6555080721747387e-05, "loss": 0.0175, "step": 63700 }, { "epoch": 0.47093521776411107, "grad_norm": 0.056431982666254044, "learning_rate": 2.6551371082621086e-05, "loss": 0.0173, "step": 63710 }, { "epoch": 0.4710091363354129, "grad_norm": 0.13144607841968536, "learning_rate": 2.6547661443494776e-05, "loss": 0.0214, "step": 63720 }, { "epoch": 0.47108305490671476, "grad_norm": 0.08269277960062027, "learning_rate": 2.654395180436847e-05, "loss": 0.0193, "step": 63730 }, { "epoch": 0.4711569734780166, "grad_norm": 0.12055590748786926, "learning_rate": 2.6540242165242168e-05, "loss": 0.0189, "step": 63740 }, { "epoch": 0.47123089204931845, "grad_norm": 0.08253350853919983, "learning_rate": 2.653653252611586e-05, "loss": 0.0195, "step": 63750 }, { "epoch": 0.47130481062062035, "grad_norm": 0.09564819186925888, "learning_rate": 2.6532822886989556e-05, "loss": 0.0175, "step": 63760 }, { "epoch": 0.4713787291919222, "grad_norm": 0.08060647547245026, "learning_rate": 2.6529113247863245e-05, "loss": 0.0173, "step": 63770 }, { "epoch": 0.47145264776322404, "grad_norm": 0.09288964420557022, "learning_rate": 2.652540360873694e-05, "loss": 0.0183, "step": 63780 }, { "epoch": 0.4715265663345259, "grad_norm": 0.061360739171504974, "learning_rate": 2.652169396961064e-05, "loss": 0.0182, "step": 63790 }, { "epoch": 0.47160048490582773, "grad_norm": 0.08212579041719437, "learning_rate": 2.651798433048433e-05, "loss": 0.0188, "step": 63800 }, { "epoch": 0.4716744034771296, "grad_norm": 0.07967258244752884, "learning_rate": 2.6514274691358026e-05, "loss": 0.0179, "step": 63810 }, { "epoch": 0.4717483220484314, "grad_norm": 0.07890528440475464, "learning_rate": 2.6510565052231722e-05, "loss": 0.0168, "step": 63820 }, { "epoch": 0.4718222406197333, "grad_norm": 0.06696062535047531, "learning_rate": 2.6506855413105414e-05, "loss": 0.0165, "step": 63830 }, { "epoch": 0.47189615919103517, "grad_norm": 0.0909370705485344, "learning_rate": 2.650314577397911e-05, "loss": 0.019, "step": 63840 }, { "epoch": 0.471970077762337, "grad_norm": 0.06163240224123001, "learning_rate": 2.64994361348528e-05, "loss": 0.0213, "step": 63850 }, { "epoch": 0.47204399633363886, "grad_norm": 0.05091671273112297, "learning_rate": 2.64957264957265e-05, "loss": 0.0191, "step": 63860 }, { "epoch": 0.4721179149049407, "grad_norm": 0.07003694027662277, "learning_rate": 2.6492016856600195e-05, "loss": 0.0182, "step": 63870 }, { "epoch": 0.47219183347624255, "grad_norm": 0.07868874818086624, "learning_rate": 2.6488307217473884e-05, "loss": 0.0208, "step": 63880 }, { "epoch": 0.47226575204754445, "grad_norm": 0.08938395231962204, "learning_rate": 2.648459757834758e-05, "loss": 0.0187, "step": 63890 }, { "epoch": 0.4723396706188463, "grad_norm": 0.0641806572675705, "learning_rate": 2.6480887939221273e-05, "loss": 0.0189, "step": 63900 }, { "epoch": 0.47241358919014814, "grad_norm": 0.06578119844198227, "learning_rate": 2.647717830009497e-05, "loss": 0.0172, "step": 63910 }, { "epoch": 0.47248750776145, "grad_norm": 0.061670076102018356, "learning_rate": 2.6473468660968664e-05, "loss": 0.0183, "step": 63920 }, { "epoch": 0.47256142633275183, "grad_norm": 0.06204846128821373, "learning_rate": 2.6469759021842354e-05, "loss": 0.0191, "step": 63930 }, { "epoch": 0.4726353449040537, "grad_norm": 0.0723707377910614, "learning_rate": 2.6466049382716053e-05, "loss": 0.0158, "step": 63940 }, { "epoch": 0.4727092634753555, "grad_norm": 0.08021928369998932, "learning_rate": 2.6462339743589742e-05, "loss": 0.019, "step": 63950 }, { "epoch": 0.4727831820466574, "grad_norm": 0.09613403677940369, "learning_rate": 2.6458630104463438e-05, "loss": 0.0193, "step": 63960 }, { "epoch": 0.47285710061795927, "grad_norm": 0.09475836902856827, "learning_rate": 2.6454920465337134e-05, "loss": 0.0179, "step": 63970 }, { "epoch": 0.4729310191892611, "grad_norm": 0.08046221733093262, "learning_rate": 2.6451210826210827e-05, "loss": 0.0176, "step": 63980 }, { "epoch": 0.47300493776056296, "grad_norm": 0.09605638682842255, "learning_rate": 2.6447501187084523e-05, "loss": 0.0179, "step": 63990 }, { "epoch": 0.4730788563318648, "grad_norm": 0.08003576844930649, "learning_rate": 2.6443791547958212e-05, "loss": 0.0208, "step": 64000 }, { "epoch": 0.47315277490316665, "grad_norm": 0.11510287970304489, "learning_rate": 2.644008190883191e-05, "loss": 0.0172, "step": 64010 }, { "epoch": 0.47322669347446855, "grad_norm": 0.07279416173696518, "learning_rate": 2.6436372269705607e-05, "loss": 0.0179, "step": 64020 }, { "epoch": 0.4733006120457704, "grad_norm": 0.10133165121078491, "learning_rate": 2.6432662630579296e-05, "loss": 0.0184, "step": 64030 }, { "epoch": 0.47337453061707224, "grad_norm": 0.08167675882577896, "learning_rate": 2.6428952991452992e-05, "loss": 0.0179, "step": 64040 }, { "epoch": 0.4734484491883741, "grad_norm": 0.07276114076375961, "learning_rate": 2.642524335232669e-05, "loss": 0.018, "step": 64050 }, { "epoch": 0.47352236775967593, "grad_norm": 0.07859620451927185, "learning_rate": 2.642153371320038e-05, "loss": 0.0191, "step": 64060 }, { "epoch": 0.4735962863309778, "grad_norm": 0.06081186980009079, "learning_rate": 2.6417824074074077e-05, "loss": 0.0161, "step": 64070 }, { "epoch": 0.4736702049022796, "grad_norm": 0.07537996768951416, "learning_rate": 2.6414114434947766e-05, "loss": 0.0182, "step": 64080 }, { "epoch": 0.4737441234735815, "grad_norm": 0.07903088629245758, "learning_rate": 2.6410404795821465e-05, "loss": 0.0175, "step": 64090 }, { "epoch": 0.47381804204488337, "grad_norm": 0.05258747562766075, "learning_rate": 2.640669515669516e-05, "loss": 0.0182, "step": 64100 }, { "epoch": 0.4738919606161852, "grad_norm": 0.1332976073026657, "learning_rate": 2.640298551756885e-05, "loss": 0.0171, "step": 64110 }, { "epoch": 0.47396587918748706, "grad_norm": 0.12791724503040314, "learning_rate": 2.6399275878442547e-05, "loss": 0.0184, "step": 64120 }, { "epoch": 0.4740397977587889, "grad_norm": 0.0703354924917221, "learning_rate": 2.639556623931624e-05, "loss": 0.0185, "step": 64130 }, { "epoch": 0.47411371633009075, "grad_norm": 0.12382179498672485, "learning_rate": 2.6391856600189935e-05, "loss": 0.0175, "step": 64140 }, { "epoch": 0.47418763490139265, "grad_norm": 0.068964883685112, "learning_rate": 2.638814696106363e-05, "loss": 0.0171, "step": 64150 }, { "epoch": 0.4742615534726945, "grad_norm": 0.1036754921078682, "learning_rate": 2.6384437321937324e-05, "loss": 0.0182, "step": 64160 }, { "epoch": 0.47433547204399634, "grad_norm": 0.0769418254494667, "learning_rate": 2.638072768281102e-05, "loss": 0.0167, "step": 64170 }, { "epoch": 0.4744093906152982, "grad_norm": 0.09884065389633179, "learning_rate": 2.637701804368471e-05, "loss": 0.0187, "step": 64180 }, { "epoch": 0.47448330918660003, "grad_norm": 0.07469066977500916, "learning_rate": 2.6373308404558405e-05, "loss": 0.017, "step": 64190 }, { "epoch": 0.4745572277579019, "grad_norm": 0.08337382227182388, "learning_rate": 2.63695987654321e-05, "loss": 0.0189, "step": 64200 }, { "epoch": 0.4746311463292037, "grad_norm": 0.0834554135799408, "learning_rate": 2.6365889126305793e-05, "loss": 0.0149, "step": 64210 }, { "epoch": 0.4747050649005056, "grad_norm": 0.07497718185186386, "learning_rate": 2.636217948717949e-05, "loss": 0.0178, "step": 64220 }, { "epoch": 0.47477898347180747, "grad_norm": 0.10598395764827728, "learning_rate": 2.635846984805318e-05, "loss": 0.0186, "step": 64230 }, { "epoch": 0.4748529020431093, "grad_norm": 0.09169488400220871, "learning_rate": 2.6354760208926878e-05, "loss": 0.0178, "step": 64240 }, { "epoch": 0.47492682061441116, "grad_norm": 0.07592468708753586, "learning_rate": 2.6351050569800574e-05, "loss": 0.0178, "step": 64250 }, { "epoch": 0.475000739185713, "grad_norm": 0.09536651521921158, "learning_rate": 2.6347340930674263e-05, "loss": 0.0168, "step": 64260 }, { "epoch": 0.47507465775701485, "grad_norm": 0.09624287486076355, "learning_rate": 2.634363129154796e-05, "loss": 0.02, "step": 64270 }, { "epoch": 0.47514857632831675, "grad_norm": 0.10279426723718643, "learning_rate": 2.6339921652421655e-05, "loss": 0.0183, "step": 64280 }, { "epoch": 0.4752224948996186, "grad_norm": 0.0842171236872673, "learning_rate": 2.6336212013295347e-05, "loss": 0.0172, "step": 64290 }, { "epoch": 0.47529641347092044, "grad_norm": 0.07284293323755264, "learning_rate": 2.6332502374169043e-05, "loss": 0.0167, "step": 64300 }, { "epoch": 0.4753703320422223, "grad_norm": 0.07489843666553497, "learning_rate": 2.6328792735042736e-05, "loss": 0.0153, "step": 64310 }, { "epoch": 0.47544425061352413, "grad_norm": 0.07663732767105103, "learning_rate": 2.6325083095916432e-05, "loss": 0.0182, "step": 64320 }, { "epoch": 0.475518169184826, "grad_norm": 0.08088131248950958, "learning_rate": 2.6321373456790128e-05, "loss": 0.0194, "step": 64330 }, { "epoch": 0.4755920877561278, "grad_norm": 0.07426482439041138, "learning_rate": 2.6317663817663817e-05, "loss": 0.0158, "step": 64340 }, { "epoch": 0.4756660063274297, "grad_norm": 0.08960974961519241, "learning_rate": 2.6313954178537513e-05, "loss": 0.02, "step": 64350 }, { "epoch": 0.47573992489873157, "grad_norm": 0.062308117747306824, "learning_rate": 2.6310244539411206e-05, "loss": 0.0223, "step": 64360 }, { "epoch": 0.4758138434700334, "grad_norm": 0.08103105425834656, "learning_rate": 2.63065349002849e-05, "loss": 0.0193, "step": 64370 }, { "epoch": 0.47588776204133526, "grad_norm": 0.0753159448504448, "learning_rate": 2.6302825261158598e-05, "loss": 0.0173, "step": 64380 }, { "epoch": 0.4759616806126371, "grad_norm": 0.08921549469232559, "learning_rate": 2.629911562203229e-05, "loss": 0.0203, "step": 64390 }, { "epoch": 0.47603559918393895, "grad_norm": 0.055965226143598557, "learning_rate": 2.6295405982905986e-05, "loss": 0.0157, "step": 64400 }, { "epoch": 0.47610951775524085, "grad_norm": 0.05769721791148186, "learning_rate": 2.6291696343779675e-05, "loss": 0.0153, "step": 64410 }, { "epoch": 0.4761834363265427, "grad_norm": 0.1230938732624054, "learning_rate": 2.628798670465337e-05, "loss": 0.0191, "step": 64420 }, { "epoch": 0.47625735489784454, "grad_norm": 0.10393833369016647, "learning_rate": 2.6284277065527067e-05, "loss": 0.0194, "step": 64430 }, { "epoch": 0.4763312734691464, "grad_norm": 0.0997467115521431, "learning_rate": 2.628056742640076e-05, "loss": 0.0198, "step": 64440 }, { "epoch": 0.47640519204044823, "grad_norm": 0.06631512194871902, "learning_rate": 2.6276857787274456e-05, "loss": 0.0194, "step": 64450 }, { "epoch": 0.4764791106117501, "grad_norm": 0.06374955177307129, "learning_rate": 2.627314814814815e-05, "loss": 0.0161, "step": 64460 }, { "epoch": 0.4765530291830519, "grad_norm": 0.06800312548875809, "learning_rate": 2.6269438509021844e-05, "loss": 0.0174, "step": 64470 }, { "epoch": 0.4766269477543538, "grad_norm": 0.0736674815416336, "learning_rate": 2.626572886989554e-05, "loss": 0.0204, "step": 64480 }, { "epoch": 0.47670086632565567, "grad_norm": 0.07592196017503738, "learning_rate": 2.626201923076923e-05, "loss": 0.0178, "step": 64490 }, { "epoch": 0.4767747848969575, "grad_norm": 0.08925735205411911, "learning_rate": 2.6258309591642926e-05, "loss": 0.0186, "step": 64500 }, { "epoch": 0.47684870346825936, "grad_norm": 0.10544586926698685, "learning_rate": 2.625459995251662e-05, "loss": 0.0187, "step": 64510 }, { "epoch": 0.4769226220395612, "grad_norm": 0.06558398902416229, "learning_rate": 2.6250890313390314e-05, "loss": 0.0191, "step": 64520 }, { "epoch": 0.47699654061086305, "grad_norm": 0.11240635812282562, "learning_rate": 2.624718067426401e-05, "loss": 0.0176, "step": 64530 }, { "epoch": 0.47707045918216495, "grad_norm": 0.07124663889408112, "learning_rate": 2.6243471035137703e-05, "loss": 0.0172, "step": 64540 }, { "epoch": 0.4771443777534668, "grad_norm": 0.10384904593229294, "learning_rate": 2.62397613960114e-05, "loss": 0.0163, "step": 64550 }, { "epoch": 0.47721829632476864, "grad_norm": 0.11825846135616302, "learning_rate": 2.6236051756885095e-05, "loss": 0.0204, "step": 64560 }, { "epoch": 0.4772922148960705, "grad_norm": 0.09247847646474838, "learning_rate": 2.6232342117758784e-05, "loss": 0.019, "step": 64570 }, { "epoch": 0.47736613346737233, "grad_norm": 0.0669577345252037, "learning_rate": 2.622863247863248e-05, "loss": 0.0198, "step": 64580 }, { "epoch": 0.4774400520386742, "grad_norm": 0.08727841079235077, "learning_rate": 2.6224922839506172e-05, "loss": 0.0174, "step": 64590 }, { "epoch": 0.477513970609976, "grad_norm": 0.09160082042217255, "learning_rate": 2.6221213200379868e-05, "loss": 0.0194, "step": 64600 }, { "epoch": 0.4775878891812779, "grad_norm": 0.09534512460231781, "learning_rate": 2.6217503561253564e-05, "loss": 0.019, "step": 64610 }, { "epoch": 0.47766180775257977, "grad_norm": 0.05581028386950493, "learning_rate": 2.6213793922127257e-05, "loss": 0.0173, "step": 64620 }, { "epoch": 0.4777357263238816, "grad_norm": 0.07642550021409988, "learning_rate": 2.6210084283000953e-05, "loss": 0.0168, "step": 64630 }, { "epoch": 0.47780964489518346, "grad_norm": 0.05983065813779831, "learning_rate": 2.6206374643874642e-05, "loss": 0.0186, "step": 64640 }, { "epoch": 0.4778835634664853, "grad_norm": 0.08361926674842834, "learning_rate": 2.6202665004748338e-05, "loss": 0.0193, "step": 64650 }, { "epoch": 0.47795748203778715, "grad_norm": 0.08084291964769363, "learning_rate": 2.6198955365622034e-05, "loss": 0.0196, "step": 64660 }, { "epoch": 0.47803140060908905, "grad_norm": 0.08104509115219116, "learning_rate": 2.6195245726495726e-05, "loss": 0.019, "step": 64670 }, { "epoch": 0.4781053191803909, "grad_norm": 0.08967190235853195, "learning_rate": 2.6191536087369422e-05, "loss": 0.0218, "step": 64680 }, { "epoch": 0.47817923775169274, "grad_norm": 0.07969162613153458, "learning_rate": 2.6187826448243115e-05, "loss": 0.0171, "step": 64690 }, { "epoch": 0.4782531563229946, "grad_norm": 0.09354029595851898, "learning_rate": 2.618411680911681e-05, "loss": 0.0186, "step": 64700 }, { "epoch": 0.47832707489429643, "grad_norm": 0.06691830605268478, "learning_rate": 2.6180407169990507e-05, "loss": 0.0159, "step": 64710 }, { "epoch": 0.4784009934655983, "grad_norm": 0.08824547380208969, "learning_rate": 2.6176697530864196e-05, "loss": 0.0187, "step": 64720 }, { "epoch": 0.4784749120369001, "grad_norm": 0.10307306051254272, "learning_rate": 2.6172987891737892e-05, "loss": 0.0165, "step": 64730 }, { "epoch": 0.478548830608202, "grad_norm": 0.06971874833106995, "learning_rate": 2.616927825261159e-05, "loss": 0.0158, "step": 64740 }, { "epoch": 0.47862274917950387, "grad_norm": 0.10057180374860764, "learning_rate": 2.616556861348528e-05, "loss": 0.0166, "step": 64750 }, { "epoch": 0.4786966677508057, "grad_norm": 0.0871298536658287, "learning_rate": 2.6161858974358977e-05, "loss": 0.0183, "step": 64760 }, { "epoch": 0.47877058632210756, "grad_norm": 0.06503120064735413, "learning_rate": 2.615814933523267e-05, "loss": 0.0177, "step": 64770 }, { "epoch": 0.4788445048934094, "grad_norm": 0.08198139071464539, "learning_rate": 2.6154439696106365e-05, "loss": 0.018, "step": 64780 }, { "epoch": 0.47891842346471125, "grad_norm": 0.07585715502500534, "learning_rate": 2.615073005698006e-05, "loss": 0.0182, "step": 64790 }, { "epoch": 0.47899234203601315, "grad_norm": 0.07827455550432205, "learning_rate": 2.614702041785375e-05, "loss": 0.0204, "step": 64800 }, { "epoch": 0.479066260607315, "grad_norm": 0.06340257823467255, "learning_rate": 2.6143310778727446e-05, "loss": 0.018, "step": 64810 }, { "epoch": 0.47914017917861684, "grad_norm": 0.12032327800989151, "learning_rate": 2.613960113960114e-05, "loss": 0.0199, "step": 64820 }, { "epoch": 0.4792140977499187, "grad_norm": 0.09568923711776733, "learning_rate": 2.6135891500474835e-05, "loss": 0.0188, "step": 64830 }, { "epoch": 0.47928801632122053, "grad_norm": 0.07581949234008789, "learning_rate": 2.613218186134853e-05, "loss": 0.0187, "step": 64840 }, { "epoch": 0.4793619348925224, "grad_norm": 0.07674810290336609, "learning_rate": 2.6128472222222223e-05, "loss": 0.0156, "step": 64850 }, { "epoch": 0.4794358534638242, "grad_norm": 0.09926360845565796, "learning_rate": 2.612476258309592e-05, "loss": 0.0179, "step": 64860 }, { "epoch": 0.4795097720351261, "grad_norm": 0.07553702592849731, "learning_rate": 2.612105294396961e-05, "loss": 0.0183, "step": 64870 }, { "epoch": 0.47958369060642797, "grad_norm": 0.10049743950366974, "learning_rate": 2.6117343304843304e-05, "loss": 0.0174, "step": 64880 }, { "epoch": 0.4796576091777298, "grad_norm": 0.05749649181962013, "learning_rate": 2.6113633665717004e-05, "loss": 0.0176, "step": 64890 }, { "epoch": 0.47973152774903166, "grad_norm": 0.07312509417533875, "learning_rate": 2.6109924026590693e-05, "loss": 0.0179, "step": 64900 }, { "epoch": 0.4798054463203335, "grad_norm": 0.07548145204782486, "learning_rate": 2.610621438746439e-05, "loss": 0.0172, "step": 64910 }, { "epoch": 0.47987936489163535, "grad_norm": 0.09609496593475342, "learning_rate": 2.610250474833808e-05, "loss": 0.0163, "step": 64920 }, { "epoch": 0.47995328346293725, "grad_norm": 0.08333881199359894, "learning_rate": 2.6098795109211778e-05, "loss": 0.0176, "step": 64930 }, { "epoch": 0.4800272020342391, "grad_norm": 0.08614440262317657, "learning_rate": 2.6095085470085474e-05, "loss": 0.0165, "step": 64940 }, { "epoch": 0.48010112060554094, "grad_norm": 0.11465088278055191, "learning_rate": 2.6091375830959163e-05, "loss": 0.017, "step": 64950 }, { "epoch": 0.4801750391768428, "grad_norm": 0.10436538606882095, "learning_rate": 2.608766619183286e-05, "loss": 0.0188, "step": 64960 }, { "epoch": 0.48024895774814463, "grad_norm": 0.09603813290596008, "learning_rate": 2.6083956552706558e-05, "loss": 0.0175, "step": 64970 }, { "epoch": 0.4803228763194465, "grad_norm": 0.08472411334514618, "learning_rate": 2.6080246913580247e-05, "loss": 0.0207, "step": 64980 }, { "epoch": 0.4803967948907483, "grad_norm": 0.07732685655355453, "learning_rate": 2.6076537274453943e-05, "loss": 0.017, "step": 64990 }, { "epoch": 0.4804707134620502, "grad_norm": 0.1053856760263443, "learning_rate": 2.6072827635327636e-05, "loss": 0.0203, "step": 65000 }, { "epoch": 0.48054463203335207, "grad_norm": 0.07103192061185837, "learning_rate": 2.6069117996201332e-05, "loss": 0.018, "step": 65010 }, { "epoch": 0.4806185506046539, "grad_norm": 0.08402489125728607, "learning_rate": 2.6065408357075028e-05, "loss": 0.0198, "step": 65020 }, { "epoch": 0.48069246917595576, "grad_norm": 0.06711668521165848, "learning_rate": 2.6061698717948717e-05, "loss": 0.0174, "step": 65030 }, { "epoch": 0.4807663877472576, "grad_norm": 0.10626384615898132, "learning_rate": 2.6057989078822416e-05, "loss": 0.0169, "step": 65040 }, { "epoch": 0.48084030631855945, "grad_norm": 0.08938641101121902, "learning_rate": 2.6054279439696105e-05, "loss": 0.0173, "step": 65050 }, { "epoch": 0.48091422488986135, "grad_norm": 0.07563548535108566, "learning_rate": 2.60505698005698e-05, "loss": 0.0182, "step": 65060 }, { "epoch": 0.4809881434611632, "grad_norm": 0.0857568308711052, "learning_rate": 2.6046860161443497e-05, "loss": 0.0189, "step": 65070 }, { "epoch": 0.48106206203246504, "grad_norm": 0.11830922961235046, "learning_rate": 2.604315052231719e-05, "loss": 0.0183, "step": 65080 }, { "epoch": 0.4811359806037669, "grad_norm": 0.09156577289104462, "learning_rate": 2.6039440883190886e-05, "loss": 0.019, "step": 65090 }, { "epoch": 0.48120989917506873, "grad_norm": 0.07325790077447891, "learning_rate": 2.6035731244064575e-05, "loss": 0.0179, "step": 65100 }, { "epoch": 0.4812838177463706, "grad_norm": 0.06196491792798042, "learning_rate": 2.603202160493827e-05, "loss": 0.0173, "step": 65110 }, { "epoch": 0.4813577363176724, "grad_norm": 0.09580127149820328, "learning_rate": 2.602831196581197e-05, "loss": 0.0175, "step": 65120 }, { "epoch": 0.4814316548889743, "grad_norm": 0.07817379385232925, "learning_rate": 2.602460232668566e-05, "loss": 0.0183, "step": 65130 }, { "epoch": 0.48150557346027617, "grad_norm": 0.0812036395072937, "learning_rate": 2.6020892687559356e-05, "loss": 0.0159, "step": 65140 }, { "epoch": 0.481579492031578, "grad_norm": 0.08802223205566406, "learning_rate": 2.6017183048433048e-05, "loss": 0.0186, "step": 65150 }, { "epoch": 0.48165341060287986, "grad_norm": 0.08315332978963852, "learning_rate": 2.6013473409306744e-05, "loss": 0.0184, "step": 65160 }, { "epoch": 0.4817273291741817, "grad_norm": 0.08085574954748154, "learning_rate": 2.600976377018044e-05, "loss": 0.018, "step": 65170 }, { "epoch": 0.48180124774548355, "grad_norm": 0.05223085731267929, "learning_rate": 2.600605413105413e-05, "loss": 0.0178, "step": 65180 }, { "epoch": 0.48187516631678545, "grad_norm": 0.055687062442302704, "learning_rate": 2.600234449192783e-05, "loss": 0.0167, "step": 65190 }, { "epoch": 0.4819490848880873, "grad_norm": 0.08231914043426514, "learning_rate": 2.5998634852801525e-05, "loss": 0.0167, "step": 65200 }, { "epoch": 0.48202300345938914, "grad_norm": 0.08846697211265564, "learning_rate": 2.5994925213675214e-05, "loss": 0.0171, "step": 65210 }, { "epoch": 0.482096922030691, "grad_norm": 0.09297716617584229, "learning_rate": 2.599121557454891e-05, "loss": 0.0183, "step": 65220 }, { "epoch": 0.48217084060199283, "grad_norm": 0.07962552458047867, "learning_rate": 2.5987505935422602e-05, "loss": 0.0168, "step": 65230 }, { "epoch": 0.4822447591732947, "grad_norm": 0.07073140889406204, "learning_rate": 2.59837962962963e-05, "loss": 0.017, "step": 65240 }, { "epoch": 0.4823186777445966, "grad_norm": 0.09182487428188324, "learning_rate": 2.5980086657169994e-05, "loss": 0.0176, "step": 65250 }, { "epoch": 0.4823925963158984, "grad_norm": 0.06719227135181427, "learning_rate": 2.5976377018043683e-05, "loss": 0.0188, "step": 65260 }, { "epoch": 0.48246651488720027, "grad_norm": 0.07798336446285248, "learning_rate": 2.5972667378917383e-05, "loss": 0.0192, "step": 65270 }, { "epoch": 0.4825404334585021, "grad_norm": 0.11315786093473434, "learning_rate": 2.5968957739791072e-05, "loss": 0.0191, "step": 65280 }, { "epoch": 0.48261435202980396, "grad_norm": 0.0917963758111, "learning_rate": 2.5965248100664768e-05, "loss": 0.0174, "step": 65290 }, { "epoch": 0.4826882706011058, "grad_norm": 0.07520270347595215, "learning_rate": 2.5961538461538464e-05, "loss": 0.017, "step": 65300 }, { "epoch": 0.48276218917240765, "grad_norm": 0.08580703288316727, "learning_rate": 2.5957828822412157e-05, "loss": 0.0193, "step": 65310 }, { "epoch": 0.48283610774370955, "grad_norm": 0.15712198615074158, "learning_rate": 2.5954119183285853e-05, "loss": 0.019, "step": 65320 }, { "epoch": 0.4829100263150114, "grad_norm": 0.0746866911649704, "learning_rate": 2.595040954415954e-05, "loss": 0.0155, "step": 65330 }, { "epoch": 0.48298394488631324, "grad_norm": 0.08219867199659348, "learning_rate": 2.5946699905033238e-05, "loss": 0.016, "step": 65340 }, { "epoch": 0.4830578634576151, "grad_norm": 0.10122178494930267, "learning_rate": 2.5942990265906937e-05, "loss": 0.0167, "step": 65350 }, { "epoch": 0.48313178202891693, "grad_norm": 0.11777858436107635, "learning_rate": 2.5939280626780626e-05, "loss": 0.0165, "step": 65360 }, { "epoch": 0.4832057006002188, "grad_norm": 0.0590934231877327, "learning_rate": 2.5935570987654322e-05, "loss": 0.0181, "step": 65370 }, { "epoch": 0.4832796191715207, "grad_norm": 0.07778100669384003, "learning_rate": 2.5931861348528015e-05, "loss": 0.0169, "step": 65380 }, { "epoch": 0.4833535377428225, "grad_norm": 0.09313920885324478, "learning_rate": 2.592815170940171e-05, "loss": 0.0186, "step": 65390 }, { "epoch": 0.48342745631412437, "grad_norm": 0.11338415741920471, "learning_rate": 2.5924442070275407e-05, "loss": 0.0173, "step": 65400 }, { "epoch": 0.4835013748854262, "grad_norm": 0.0711422711610794, "learning_rate": 2.5920732431149096e-05, "loss": 0.0174, "step": 65410 }, { "epoch": 0.48357529345672806, "grad_norm": 0.0966838076710701, "learning_rate": 2.5917022792022795e-05, "loss": 0.0188, "step": 65420 }, { "epoch": 0.4836492120280299, "grad_norm": 0.0847175195813179, "learning_rate": 2.591331315289649e-05, "loss": 0.0198, "step": 65430 }, { "epoch": 0.48372313059933175, "grad_norm": 0.08860213309526443, "learning_rate": 2.590960351377018e-05, "loss": 0.0184, "step": 65440 }, { "epoch": 0.48379704917063365, "grad_norm": 0.0929391011595726, "learning_rate": 2.5905893874643876e-05, "loss": 0.0184, "step": 65450 }, { "epoch": 0.4838709677419355, "grad_norm": 0.08175451308488846, "learning_rate": 2.590218423551757e-05, "loss": 0.0175, "step": 65460 }, { "epoch": 0.48394488631323734, "grad_norm": 0.0830913782119751, "learning_rate": 2.5898474596391265e-05, "loss": 0.0177, "step": 65470 }, { "epoch": 0.4840188048845392, "grad_norm": 0.09538637101650238, "learning_rate": 2.589476495726496e-05, "loss": 0.0178, "step": 65480 }, { "epoch": 0.48409272345584103, "grad_norm": 0.09078861773014069, "learning_rate": 2.589105531813865e-05, "loss": 0.0193, "step": 65490 }, { "epoch": 0.4841666420271429, "grad_norm": 0.08665713667869568, "learning_rate": 2.588734567901235e-05, "loss": 0.0172, "step": 65500 }, { "epoch": 0.4842405605984448, "grad_norm": 0.0846015214920044, "learning_rate": 2.588363603988604e-05, "loss": 0.0192, "step": 65510 }, { "epoch": 0.4843144791697466, "grad_norm": 0.09109742194414139, "learning_rate": 2.5879926400759735e-05, "loss": 0.0157, "step": 65520 }, { "epoch": 0.48438839774104847, "grad_norm": 0.10539749264717102, "learning_rate": 2.587621676163343e-05, "loss": 0.0199, "step": 65530 }, { "epoch": 0.4844623163123503, "grad_norm": 0.11487936973571777, "learning_rate": 2.5872507122507123e-05, "loss": 0.0195, "step": 65540 }, { "epoch": 0.48453623488365216, "grad_norm": 0.05182930827140808, "learning_rate": 2.586879748338082e-05, "loss": 0.016, "step": 65550 }, { "epoch": 0.484610153454954, "grad_norm": 0.0592120923101902, "learning_rate": 2.5865087844254508e-05, "loss": 0.0174, "step": 65560 }, { "epoch": 0.48468407202625585, "grad_norm": 0.07573354989290237, "learning_rate": 2.5861378205128208e-05, "loss": 0.0167, "step": 65570 }, { "epoch": 0.48475799059755775, "grad_norm": 0.07868777960538864, "learning_rate": 2.5857668566001904e-05, "loss": 0.0179, "step": 65580 }, { "epoch": 0.4848319091688596, "grad_norm": 0.1018095463514328, "learning_rate": 2.5853958926875593e-05, "loss": 0.0155, "step": 65590 }, { "epoch": 0.48490582774016144, "grad_norm": 0.0721440240740776, "learning_rate": 2.585024928774929e-05, "loss": 0.0175, "step": 65600 }, { "epoch": 0.4849797463114633, "grad_norm": 0.08099162578582764, "learning_rate": 2.584653964862298e-05, "loss": 0.0164, "step": 65610 }, { "epoch": 0.48505366488276513, "grad_norm": 0.11158620566129684, "learning_rate": 2.5842830009496677e-05, "loss": 0.0181, "step": 65620 }, { "epoch": 0.485127583454067, "grad_norm": 0.09026879817247391, "learning_rate": 2.5839120370370373e-05, "loss": 0.0187, "step": 65630 }, { "epoch": 0.4852015020253689, "grad_norm": 0.07620834559202194, "learning_rate": 2.5835410731244062e-05, "loss": 0.0217, "step": 65640 }, { "epoch": 0.4852754205966707, "grad_norm": 0.08644992113113403, "learning_rate": 2.5831701092117762e-05, "loss": 0.0176, "step": 65650 }, { "epoch": 0.48534933916797257, "grad_norm": 0.08692828565835953, "learning_rate": 2.5827991452991458e-05, "loss": 0.0202, "step": 65660 }, { "epoch": 0.4854232577392744, "grad_norm": 0.08011046051979065, "learning_rate": 2.5824281813865147e-05, "loss": 0.02, "step": 65670 }, { "epoch": 0.48549717631057626, "grad_norm": 0.057609450072050095, "learning_rate": 2.5820572174738843e-05, "loss": 0.0172, "step": 65680 }, { "epoch": 0.4855710948818781, "grad_norm": 0.09666851162910461, "learning_rate": 2.5816862535612536e-05, "loss": 0.0178, "step": 65690 }, { "epoch": 0.48564501345317995, "grad_norm": 0.0653696209192276, "learning_rate": 2.581315289648623e-05, "loss": 0.0163, "step": 65700 }, { "epoch": 0.48571893202448185, "grad_norm": 0.07197672873735428, "learning_rate": 2.5809443257359927e-05, "loss": 0.0205, "step": 65710 }, { "epoch": 0.4857928505957837, "grad_norm": 0.08986418694257736, "learning_rate": 2.580573361823362e-05, "loss": 0.0215, "step": 65720 }, { "epoch": 0.48586676916708554, "grad_norm": 0.09233395010232925, "learning_rate": 2.5802023979107316e-05, "loss": 0.0175, "step": 65730 }, { "epoch": 0.4859406877383874, "grad_norm": 0.08180242031812668, "learning_rate": 2.5798314339981005e-05, "loss": 0.0217, "step": 65740 }, { "epoch": 0.48601460630968923, "grad_norm": 0.08367449790239334, "learning_rate": 2.57946047008547e-05, "loss": 0.018, "step": 65750 }, { "epoch": 0.4860885248809911, "grad_norm": 0.07709220051765442, "learning_rate": 2.5790895061728397e-05, "loss": 0.0182, "step": 65760 }, { "epoch": 0.486162443452293, "grad_norm": 0.09754809737205505, "learning_rate": 2.578718542260209e-05, "loss": 0.0159, "step": 65770 }, { "epoch": 0.4862363620235948, "grad_norm": 0.0685078427195549, "learning_rate": 2.5783475783475786e-05, "loss": 0.0165, "step": 65780 }, { "epoch": 0.48631028059489667, "grad_norm": 0.1023360937833786, "learning_rate": 2.5779766144349475e-05, "loss": 0.0204, "step": 65790 }, { "epoch": 0.4863841991661985, "grad_norm": 0.09142835438251495, "learning_rate": 2.5776056505223174e-05, "loss": 0.0156, "step": 65800 }, { "epoch": 0.48645811773750036, "grad_norm": 0.06241421401500702, "learning_rate": 2.577234686609687e-05, "loss": 0.0186, "step": 65810 }, { "epoch": 0.4865320363088022, "grad_norm": 0.09255488216876984, "learning_rate": 2.576863722697056e-05, "loss": 0.0191, "step": 65820 }, { "epoch": 0.48660595488010405, "grad_norm": 0.06651777029037476, "learning_rate": 2.5764927587844255e-05, "loss": 0.0155, "step": 65830 }, { "epoch": 0.48667987345140595, "grad_norm": 0.07002021372318268, "learning_rate": 2.5761217948717948e-05, "loss": 0.0167, "step": 65840 }, { "epoch": 0.4867537920227078, "grad_norm": 0.07770857959985733, "learning_rate": 2.5757508309591644e-05, "loss": 0.0208, "step": 65850 }, { "epoch": 0.48682771059400964, "grad_norm": 0.10046552866697311, "learning_rate": 2.575379867046534e-05, "loss": 0.019, "step": 65860 }, { "epoch": 0.4869016291653115, "grad_norm": 0.06843043118715286, "learning_rate": 2.5750089031339032e-05, "loss": 0.0192, "step": 65870 }, { "epoch": 0.48697554773661333, "grad_norm": 0.05567110329866409, "learning_rate": 2.574637939221273e-05, "loss": 0.0201, "step": 65880 }, { "epoch": 0.4870494663079152, "grad_norm": 0.10436822474002838, "learning_rate": 2.5742669753086424e-05, "loss": 0.0164, "step": 65890 }, { "epoch": 0.4871233848792171, "grad_norm": 0.07619208842515945, "learning_rate": 2.5738960113960114e-05, "loss": 0.0176, "step": 65900 }, { "epoch": 0.4871973034505189, "grad_norm": 0.08201929926872253, "learning_rate": 2.573525047483381e-05, "loss": 0.0211, "step": 65910 }, { "epoch": 0.48727122202182077, "grad_norm": 0.08895740658044815, "learning_rate": 2.5731540835707502e-05, "loss": 0.0165, "step": 65920 }, { "epoch": 0.4873451405931226, "grad_norm": 0.05612145736813545, "learning_rate": 2.5727831196581198e-05, "loss": 0.016, "step": 65930 }, { "epoch": 0.48741905916442446, "grad_norm": 0.0678725466132164, "learning_rate": 2.5724121557454894e-05, "loss": 0.0181, "step": 65940 }, { "epoch": 0.4874929777357263, "grad_norm": 0.0889897495508194, "learning_rate": 2.5720411918328587e-05, "loss": 0.016, "step": 65950 }, { "epoch": 0.48756689630702815, "grad_norm": 0.09029419720172882, "learning_rate": 2.5716702279202283e-05, "loss": 0.0201, "step": 65960 }, { "epoch": 0.48764081487833005, "grad_norm": 0.057307250797748566, "learning_rate": 2.5712992640075972e-05, "loss": 0.0169, "step": 65970 }, { "epoch": 0.4877147334496319, "grad_norm": 0.08193568885326385, "learning_rate": 2.5709283000949668e-05, "loss": 0.0196, "step": 65980 }, { "epoch": 0.48778865202093374, "grad_norm": 0.055389512330293655, "learning_rate": 2.5705573361823364e-05, "loss": 0.0203, "step": 65990 }, { "epoch": 0.4878625705922356, "grad_norm": 0.07593169063329697, "learning_rate": 2.5701863722697056e-05, "loss": 0.0202, "step": 66000 }, { "epoch": 0.48793648916353743, "grad_norm": 0.06627099961042404, "learning_rate": 2.5698154083570752e-05, "loss": 0.0186, "step": 66010 }, { "epoch": 0.4880104077348393, "grad_norm": 0.07474975287914276, "learning_rate": 2.5694444444444445e-05, "loss": 0.0158, "step": 66020 }, { "epoch": 0.4880843263061412, "grad_norm": 0.08697369694709778, "learning_rate": 2.569073480531814e-05, "loss": 0.0179, "step": 66030 }, { "epoch": 0.488158244877443, "grad_norm": 0.06758727878332138, "learning_rate": 2.5687025166191837e-05, "loss": 0.0178, "step": 66040 }, { "epoch": 0.48823216344874487, "grad_norm": 0.1088317334651947, "learning_rate": 2.5683315527065526e-05, "loss": 0.0219, "step": 66050 }, { "epoch": 0.4883060820200467, "grad_norm": 0.11058960855007172, "learning_rate": 2.5679605887939222e-05, "loss": 0.0184, "step": 66060 }, { "epoch": 0.48838000059134856, "grad_norm": 0.07780216634273529, "learning_rate": 2.5675896248812914e-05, "loss": 0.018, "step": 66070 }, { "epoch": 0.4884539191626504, "grad_norm": 0.07846556603908539, "learning_rate": 2.567218660968661e-05, "loss": 0.0183, "step": 66080 }, { "epoch": 0.48852783773395225, "grad_norm": 0.0712955892086029, "learning_rate": 2.5668476970560306e-05, "loss": 0.0164, "step": 66090 }, { "epoch": 0.48860175630525415, "grad_norm": 0.09872201085090637, "learning_rate": 2.5664767331434e-05, "loss": 0.0198, "step": 66100 }, { "epoch": 0.488675674876556, "grad_norm": 0.09058482199907303, "learning_rate": 2.5661057692307695e-05, "loss": 0.0178, "step": 66110 }, { "epoch": 0.48874959344785784, "grad_norm": 0.07849286496639252, "learning_rate": 2.565734805318139e-05, "loss": 0.0198, "step": 66120 }, { "epoch": 0.4888235120191597, "grad_norm": 0.11207176744937897, "learning_rate": 2.565363841405508e-05, "loss": 0.0195, "step": 66130 }, { "epoch": 0.48889743059046153, "grad_norm": 0.06829576194286346, "learning_rate": 2.5649928774928776e-05, "loss": 0.0183, "step": 66140 }, { "epoch": 0.4889713491617634, "grad_norm": 0.07670718431472778, "learning_rate": 2.564621913580247e-05, "loss": 0.0173, "step": 66150 }, { "epoch": 0.4890452677330653, "grad_norm": 0.10271691530942917, "learning_rate": 2.5642509496676165e-05, "loss": 0.0162, "step": 66160 }, { "epoch": 0.4891191863043671, "grad_norm": 0.10622168332338333, "learning_rate": 2.563879985754986e-05, "loss": 0.0191, "step": 66170 }, { "epoch": 0.48919310487566897, "grad_norm": 0.07407471537590027, "learning_rate": 2.5635090218423553e-05, "loss": 0.0172, "step": 66180 }, { "epoch": 0.4892670234469708, "grad_norm": 0.08040529489517212, "learning_rate": 2.563138057929725e-05, "loss": 0.019, "step": 66190 }, { "epoch": 0.48934094201827266, "grad_norm": 0.06908878684043884, "learning_rate": 2.562767094017094e-05, "loss": 0.0196, "step": 66200 }, { "epoch": 0.4894148605895745, "grad_norm": 0.07580314576625824, "learning_rate": 2.5623961301044634e-05, "loss": 0.0162, "step": 66210 }, { "epoch": 0.48948877916087635, "grad_norm": 0.09771912544965744, "learning_rate": 2.562025166191833e-05, "loss": 0.0197, "step": 66220 }, { "epoch": 0.48956269773217825, "grad_norm": 0.0960688367486, "learning_rate": 2.5616542022792023e-05, "loss": 0.0199, "step": 66230 }, { "epoch": 0.4896366163034801, "grad_norm": 0.06303451955318451, "learning_rate": 2.561283238366572e-05, "loss": 0.0188, "step": 66240 }, { "epoch": 0.48971053487478194, "grad_norm": 0.06757359206676483, "learning_rate": 2.560912274453941e-05, "loss": 0.0188, "step": 66250 }, { "epoch": 0.4897844534460838, "grad_norm": 0.10776185989379883, "learning_rate": 2.5605413105413107e-05, "loss": 0.0163, "step": 66260 }, { "epoch": 0.48985837201738563, "grad_norm": 0.08810808509588242, "learning_rate": 2.5601703466286803e-05, "loss": 0.0182, "step": 66270 }, { "epoch": 0.4899322905886875, "grad_norm": 0.09690174460411072, "learning_rate": 2.5597993827160493e-05, "loss": 0.0191, "step": 66280 }, { "epoch": 0.4900062091599894, "grad_norm": 0.07141640037298203, "learning_rate": 2.559428418803419e-05, "loss": 0.0175, "step": 66290 }, { "epoch": 0.4900801277312912, "grad_norm": 0.065648153424263, "learning_rate": 2.559057454890788e-05, "loss": 0.0197, "step": 66300 }, { "epoch": 0.49015404630259307, "grad_norm": 0.07435780018568039, "learning_rate": 2.5586864909781577e-05, "loss": 0.0179, "step": 66310 }, { "epoch": 0.4902279648738949, "grad_norm": 0.0778043195605278, "learning_rate": 2.5583155270655273e-05, "loss": 0.0173, "step": 66320 }, { "epoch": 0.49030188344519676, "grad_norm": 0.05645797401666641, "learning_rate": 2.5579445631528966e-05, "loss": 0.0169, "step": 66330 }, { "epoch": 0.4903758020164986, "grad_norm": 0.09132159501314163, "learning_rate": 2.557573599240266e-05, "loss": 0.0182, "step": 66340 }, { "epoch": 0.49044972058780045, "grad_norm": 0.08254007250070572, "learning_rate": 2.5572026353276358e-05, "loss": 0.0241, "step": 66350 }, { "epoch": 0.49052363915910235, "grad_norm": 0.0711667537689209, "learning_rate": 2.5568316714150047e-05, "loss": 0.02, "step": 66360 }, { "epoch": 0.4905975577304042, "grad_norm": 0.07004100829362869, "learning_rate": 2.5564607075023743e-05, "loss": 0.0152, "step": 66370 }, { "epoch": 0.49067147630170604, "grad_norm": 0.07659637928009033, "learning_rate": 2.5560897435897435e-05, "loss": 0.0194, "step": 66380 }, { "epoch": 0.4907453948730079, "grad_norm": 0.06178012117743492, "learning_rate": 2.555718779677113e-05, "loss": 0.0195, "step": 66390 }, { "epoch": 0.49081931344430973, "grad_norm": 0.09294130653142929, "learning_rate": 2.5553478157644827e-05, "loss": 0.0196, "step": 66400 }, { "epoch": 0.4908932320156116, "grad_norm": 0.08098200708627701, "learning_rate": 2.554976851851852e-05, "loss": 0.0171, "step": 66410 }, { "epoch": 0.4909671505869135, "grad_norm": 0.09157669544219971, "learning_rate": 2.5546058879392216e-05, "loss": 0.0164, "step": 66420 }, { "epoch": 0.4910410691582153, "grad_norm": 0.10379356145858765, "learning_rate": 2.5542349240265905e-05, "loss": 0.0181, "step": 66430 }, { "epoch": 0.49111498772951717, "grad_norm": 0.08127181231975555, "learning_rate": 2.55386396011396e-05, "loss": 0.0178, "step": 66440 }, { "epoch": 0.491188906300819, "grad_norm": 0.07030443102121353, "learning_rate": 2.55349299620133e-05, "loss": 0.0178, "step": 66450 }, { "epoch": 0.49126282487212086, "grad_norm": 0.061049479991197586, "learning_rate": 2.553122032288699e-05, "loss": 0.0193, "step": 66460 }, { "epoch": 0.4913367434434227, "grad_norm": 0.04734016954898834, "learning_rate": 2.5527510683760685e-05, "loss": 0.0191, "step": 66470 }, { "epoch": 0.49141066201472455, "grad_norm": 0.13547852635383606, "learning_rate": 2.5523801044634378e-05, "loss": 0.0201, "step": 66480 }, { "epoch": 0.49148458058602645, "grad_norm": 0.05827900767326355, "learning_rate": 2.5520091405508074e-05, "loss": 0.0155, "step": 66490 }, { "epoch": 0.4915584991573283, "grad_norm": 0.10678491741418839, "learning_rate": 2.551638176638177e-05, "loss": 0.0174, "step": 66500 }, { "epoch": 0.49163241772863014, "grad_norm": 0.07167919725179672, "learning_rate": 2.551267212725546e-05, "loss": 0.0186, "step": 66510 }, { "epoch": 0.491706336299932, "grad_norm": 0.08683153241872787, "learning_rate": 2.5508962488129155e-05, "loss": 0.0168, "step": 66520 }, { "epoch": 0.49178025487123384, "grad_norm": 0.1263267695903778, "learning_rate": 2.5505252849002848e-05, "loss": 0.0196, "step": 66530 }, { "epoch": 0.4918541734425357, "grad_norm": 0.07182847708463669, "learning_rate": 2.5501543209876544e-05, "loss": 0.0159, "step": 66540 }, { "epoch": 0.4919280920138376, "grad_norm": 0.11993493884801865, "learning_rate": 2.549783357075024e-05, "loss": 0.0175, "step": 66550 }, { "epoch": 0.4920020105851394, "grad_norm": 0.088954858481884, "learning_rate": 2.5494123931623932e-05, "loss": 0.0182, "step": 66560 }, { "epoch": 0.49207592915644127, "grad_norm": 0.08541052788496017, "learning_rate": 2.5490414292497628e-05, "loss": 0.0194, "step": 66570 }, { "epoch": 0.4921498477277431, "grad_norm": 0.09256555885076523, "learning_rate": 2.5486704653371324e-05, "loss": 0.0189, "step": 66580 }, { "epoch": 0.49222376629904496, "grad_norm": 0.06345752626657486, "learning_rate": 2.5482995014245013e-05, "loss": 0.018, "step": 66590 }, { "epoch": 0.4922976848703468, "grad_norm": 0.0659131333231926, "learning_rate": 2.5479285375118713e-05, "loss": 0.0175, "step": 66600 }, { "epoch": 0.49237160344164865, "grad_norm": 0.08424822241067886, "learning_rate": 2.5475575735992402e-05, "loss": 0.0172, "step": 66610 }, { "epoch": 0.49244552201295055, "grad_norm": 0.10027677565813065, "learning_rate": 2.5471866096866098e-05, "loss": 0.0189, "step": 66620 }, { "epoch": 0.4925194405842524, "grad_norm": 0.05486908182501793, "learning_rate": 2.5468156457739794e-05, "loss": 0.016, "step": 66630 }, { "epoch": 0.49259335915555424, "grad_norm": 0.09274695813655853, "learning_rate": 2.5464446818613486e-05, "loss": 0.0183, "step": 66640 }, { "epoch": 0.4926672777268561, "grad_norm": 0.06558877974748611, "learning_rate": 2.5460737179487182e-05, "loss": 0.0201, "step": 66650 }, { "epoch": 0.49274119629815794, "grad_norm": 0.10087820142507553, "learning_rate": 2.545702754036087e-05, "loss": 0.0193, "step": 66660 }, { "epoch": 0.4928151148694598, "grad_norm": 0.0879107117652893, "learning_rate": 2.5453317901234567e-05, "loss": 0.0167, "step": 66670 }, { "epoch": 0.4928890334407617, "grad_norm": 0.06862018257379532, "learning_rate": 2.5449608262108267e-05, "loss": 0.0197, "step": 66680 }, { "epoch": 0.4929629520120635, "grad_norm": 0.10974738746881485, "learning_rate": 2.5445898622981956e-05, "loss": 0.0182, "step": 66690 }, { "epoch": 0.49303687058336537, "grad_norm": 0.08736329525709152, "learning_rate": 2.5442188983855652e-05, "loss": 0.0187, "step": 66700 }, { "epoch": 0.4931107891546672, "grad_norm": 0.10087980329990387, "learning_rate": 2.5438479344729345e-05, "loss": 0.0171, "step": 66710 }, { "epoch": 0.49318470772596906, "grad_norm": 0.08211347460746765, "learning_rate": 2.543476970560304e-05, "loss": 0.0157, "step": 66720 }, { "epoch": 0.4932586262972709, "grad_norm": 0.06540828943252563, "learning_rate": 2.5431060066476737e-05, "loss": 0.0204, "step": 66730 }, { "epoch": 0.49333254486857275, "grad_norm": 0.07201921194791794, "learning_rate": 2.5427350427350426e-05, "loss": 0.018, "step": 66740 }, { "epoch": 0.49340646343987465, "grad_norm": 0.09930144995450974, "learning_rate": 2.5423640788224125e-05, "loss": 0.0171, "step": 66750 }, { "epoch": 0.4934803820111765, "grad_norm": 0.08865474909543991, "learning_rate": 2.5419931149097814e-05, "loss": 0.0207, "step": 66760 }, { "epoch": 0.49355430058247834, "grad_norm": 0.0578952357172966, "learning_rate": 2.541622150997151e-05, "loss": 0.0183, "step": 66770 }, { "epoch": 0.4936282191537802, "grad_norm": 0.1250295639038086, "learning_rate": 2.5412511870845206e-05, "loss": 0.016, "step": 66780 }, { "epoch": 0.49370213772508204, "grad_norm": 0.08131686598062515, "learning_rate": 2.54088022317189e-05, "loss": 0.0164, "step": 66790 }, { "epoch": 0.4937760562963839, "grad_norm": 0.08497151732444763, "learning_rate": 2.5405092592592595e-05, "loss": 0.0169, "step": 66800 }, { "epoch": 0.4938499748676858, "grad_norm": 0.09198103845119476, "learning_rate": 2.540138295346629e-05, "loss": 0.0175, "step": 66810 }, { "epoch": 0.4939238934389876, "grad_norm": 0.054499588906764984, "learning_rate": 2.539767331433998e-05, "loss": 0.0178, "step": 66820 }, { "epoch": 0.49399781201028947, "grad_norm": 0.06767275184392929, "learning_rate": 2.539396367521368e-05, "loss": 0.016, "step": 66830 }, { "epoch": 0.4940717305815913, "grad_norm": 0.10051191598176956, "learning_rate": 2.539025403608737e-05, "loss": 0.0194, "step": 66840 }, { "epoch": 0.49414564915289316, "grad_norm": 0.09332738071680069, "learning_rate": 2.5386544396961064e-05, "loss": 0.0168, "step": 66850 }, { "epoch": 0.494219567724195, "grad_norm": 0.06618684530258179, "learning_rate": 2.538283475783476e-05, "loss": 0.0156, "step": 66860 }, { "epoch": 0.49429348629549685, "grad_norm": 0.07552550733089447, "learning_rate": 2.5379125118708453e-05, "loss": 0.0214, "step": 66870 }, { "epoch": 0.49436740486679875, "grad_norm": 0.07698984444141388, "learning_rate": 2.537541547958215e-05, "loss": 0.0165, "step": 66880 }, { "epoch": 0.4944413234381006, "grad_norm": 0.06162414327263832, "learning_rate": 2.5371705840455838e-05, "loss": 0.0163, "step": 66890 }, { "epoch": 0.49451524200940244, "grad_norm": 0.09170342981815338, "learning_rate": 2.5367996201329537e-05, "loss": 0.0201, "step": 66900 }, { "epoch": 0.4945891605807043, "grad_norm": 0.08989348262548447, "learning_rate": 2.5364286562203233e-05, "loss": 0.0194, "step": 66910 }, { "epoch": 0.49466307915200614, "grad_norm": 0.0776243656873703, "learning_rate": 2.5360576923076923e-05, "loss": 0.0178, "step": 66920 }, { "epoch": 0.494736997723308, "grad_norm": 0.11429768055677414, "learning_rate": 2.535686728395062e-05, "loss": 0.0183, "step": 66930 }, { "epoch": 0.4948109162946099, "grad_norm": 0.07270469516515732, "learning_rate": 2.535315764482431e-05, "loss": 0.0176, "step": 66940 }, { "epoch": 0.4948848348659117, "grad_norm": 0.0862298533320427, "learning_rate": 2.5349448005698007e-05, "loss": 0.0193, "step": 66950 }, { "epoch": 0.49495875343721357, "grad_norm": 0.09826846420764923, "learning_rate": 2.5345738366571703e-05, "loss": 0.0185, "step": 66960 }, { "epoch": 0.4950326720085154, "grad_norm": 0.10540689527988434, "learning_rate": 2.5342028727445392e-05, "loss": 0.0172, "step": 66970 }, { "epoch": 0.49510659057981726, "grad_norm": 0.0765228345990181, "learning_rate": 2.533831908831909e-05, "loss": 0.0186, "step": 66980 }, { "epoch": 0.4951805091511191, "grad_norm": 0.08622103184461594, "learning_rate": 2.533460944919278e-05, "loss": 0.0177, "step": 66990 }, { "epoch": 0.49525442772242095, "grad_norm": 0.07351600378751755, "learning_rate": 2.5330899810066477e-05, "loss": 0.0183, "step": 67000 }, { "epoch": 0.49532834629372285, "grad_norm": 0.0732818990945816, "learning_rate": 2.5327190170940173e-05, "loss": 0.018, "step": 67010 }, { "epoch": 0.4954022648650247, "grad_norm": 0.10038938373327255, "learning_rate": 2.5323480531813865e-05, "loss": 0.0201, "step": 67020 }, { "epoch": 0.49547618343632654, "grad_norm": 0.11697548627853394, "learning_rate": 2.531977089268756e-05, "loss": 0.0178, "step": 67030 }, { "epoch": 0.4955501020076284, "grad_norm": 0.0824909508228302, "learning_rate": 2.5316061253561257e-05, "loss": 0.0187, "step": 67040 }, { "epoch": 0.49562402057893024, "grad_norm": 0.07028558850288391, "learning_rate": 2.531235161443495e-05, "loss": 0.0155, "step": 67050 }, { "epoch": 0.4956979391502321, "grad_norm": 0.06478890776634216, "learning_rate": 2.5308641975308646e-05, "loss": 0.0179, "step": 67060 }, { "epoch": 0.495771857721534, "grad_norm": 0.08976764231920242, "learning_rate": 2.5304932336182335e-05, "loss": 0.0169, "step": 67070 }, { "epoch": 0.4958457762928358, "grad_norm": 0.07186666876077652, "learning_rate": 2.530122269705603e-05, "loss": 0.0186, "step": 67080 }, { "epoch": 0.4959196948641377, "grad_norm": 0.06850361078977585, "learning_rate": 2.5297513057929727e-05, "loss": 0.0194, "step": 67090 }, { "epoch": 0.4959936134354395, "grad_norm": 0.09590274095535278, "learning_rate": 2.529380341880342e-05, "loss": 0.0198, "step": 67100 }, { "epoch": 0.49606753200674136, "grad_norm": 0.094276562333107, "learning_rate": 2.5290093779677115e-05, "loss": 0.019, "step": 67110 }, { "epoch": 0.4961414505780432, "grad_norm": 0.11390369385480881, "learning_rate": 2.5286384140550805e-05, "loss": 0.0173, "step": 67120 }, { "epoch": 0.4962153691493451, "grad_norm": 0.0800207182765007, "learning_rate": 2.5282674501424504e-05, "loss": 0.0161, "step": 67130 }, { "epoch": 0.49628928772064695, "grad_norm": 0.08522894233465195, "learning_rate": 2.52789648622982e-05, "loss": 0.0189, "step": 67140 }, { "epoch": 0.4963632062919488, "grad_norm": 0.0826270580291748, "learning_rate": 2.527525522317189e-05, "loss": 0.0213, "step": 67150 }, { "epoch": 0.49643712486325065, "grad_norm": 0.09851718693971634, "learning_rate": 2.5271545584045585e-05, "loss": 0.018, "step": 67160 }, { "epoch": 0.4965110434345525, "grad_norm": 0.08281811326742172, "learning_rate": 2.5267835944919278e-05, "loss": 0.0179, "step": 67170 }, { "epoch": 0.49658496200585434, "grad_norm": 0.05920829251408577, "learning_rate": 2.5264126305792974e-05, "loss": 0.0161, "step": 67180 }, { "epoch": 0.4966588805771562, "grad_norm": 0.09232311695814133, "learning_rate": 2.526041666666667e-05, "loss": 0.018, "step": 67190 }, { "epoch": 0.4967327991484581, "grad_norm": 0.05791931599378586, "learning_rate": 2.525670702754036e-05, "loss": 0.0182, "step": 67200 }, { "epoch": 0.4968067177197599, "grad_norm": 0.08446840941905975, "learning_rate": 2.5252997388414058e-05, "loss": 0.018, "step": 67210 }, { "epoch": 0.4968806362910618, "grad_norm": 0.09007827937602997, "learning_rate": 2.5249287749287747e-05, "loss": 0.0191, "step": 67220 }, { "epoch": 0.4969545548623636, "grad_norm": 0.09295041114091873, "learning_rate": 2.5245578110161443e-05, "loss": 0.0189, "step": 67230 }, { "epoch": 0.49702847343366546, "grad_norm": 0.12847813963890076, "learning_rate": 2.524186847103514e-05, "loss": 0.0159, "step": 67240 }, { "epoch": 0.4971023920049673, "grad_norm": 0.07954330742359161, "learning_rate": 2.5238158831908832e-05, "loss": 0.0182, "step": 67250 }, { "epoch": 0.4971763105762692, "grad_norm": 0.0992896780371666, "learning_rate": 2.5234449192782528e-05, "loss": 0.0198, "step": 67260 }, { "epoch": 0.49725022914757105, "grad_norm": 0.07747029513120651, "learning_rate": 2.5230739553656224e-05, "loss": 0.0188, "step": 67270 }, { "epoch": 0.4973241477188729, "grad_norm": 0.07811928540468216, "learning_rate": 2.5227029914529916e-05, "loss": 0.0141, "step": 67280 }, { "epoch": 0.49739806629017475, "grad_norm": 0.06636947393417358, "learning_rate": 2.5223320275403612e-05, "loss": 0.0151, "step": 67290 }, { "epoch": 0.4974719848614766, "grad_norm": 0.06473857909440994, "learning_rate": 2.52196106362773e-05, "loss": 0.0157, "step": 67300 }, { "epoch": 0.49754590343277844, "grad_norm": 0.13399842381477356, "learning_rate": 2.5215900997150998e-05, "loss": 0.0179, "step": 67310 }, { "epoch": 0.4976198220040803, "grad_norm": 0.07125803083181381, "learning_rate": 2.5212191358024694e-05, "loss": 0.0177, "step": 67320 }, { "epoch": 0.4976937405753822, "grad_norm": 0.06952224671840668, "learning_rate": 2.5208481718898386e-05, "loss": 0.0176, "step": 67330 }, { "epoch": 0.497767659146684, "grad_norm": 0.09094695746898651, "learning_rate": 2.5204772079772082e-05, "loss": 0.0214, "step": 67340 }, { "epoch": 0.4978415777179859, "grad_norm": 0.08997868001461029, "learning_rate": 2.520106244064577e-05, "loss": 0.0205, "step": 67350 }, { "epoch": 0.4979154962892877, "grad_norm": 0.05725601315498352, "learning_rate": 2.519735280151947e-05, "loss": 0.0172, "step": 67360 }, { "epoch": 0.49798941486058956, "grad_norm": 0.10619509220123291, "learning_rate": 2.5193643162393167e-05, "loss": 0.022, "step": 67370 }, { "epoch": 0.4980633334318914, "grad_norm": 0.07303507626056671, "learning_rate": 2.5189933523266856e-05, "loss": 0.018, "step": 67380 }, { "epoch": 0.4981372520031933, "grad_norm": 0.0775846466422081, "learning_rate": 2.5186223884140552e-05, "loss": 0.0177, "step": 67390 }, { "epoch": 0.49821117057449515, "grad_norm": 0.07945775240659714, "learning_rate": 2.5182514245014244e-05, "loss": 0.0182, "step": 67400 }, { "epoch": 0.498285089145797, "grad_norm": 0.07879578322172165, "learning_rate": 2.517880460588794e-05, "loss": 0.0179, "step": 67410 }, { "epoch": 0.49835900771709885, "grad_norm": 0.09689656645059586, "learning_rate": 2.5175094966761636e-05, "loss": 0.0189, "step": 67420 }, { "epoch": 0.4984329262884007, "grad_norm": 0.0677686408162117, "learning_rate": 2.517138532763533e-05, "loss": 0.0169, "step": 67430 }, { "epoch": 0.49850684485970254, "grad_norm": 0.09172812849283218, "learning_rate": 2.5167675688509025e-05, "loss": 0.0182, "step": 67440 }, { "epoch": 0.4985807634310044, "grad_norm": 0.08993425965309143, "learning_rate": 2.5163966049382714e-05, "loss": 0.019, "step": 67450 }, { "epoch": 0.4986546820023063, "grad_norm": 0.10504340380430222, "learning_rate": 2.516025641025641e-05, "loss": 0.019, "step": 67460 }, { "epoch": 0.4987286005736081, "grad_norm": 0.09816096723079681, "learning_rate": 2.5156546771130106e-05, "loss": 0.0169, "step": 67470 }, { "epoch": 0.49880251914491, "grad_norm": 0.08428593724966049, "learning_rate": 2.51528371320038e-05, "loss": 0.0182, "step": 67480 }, { "epoch": 0.4988764377162118, "grad_norm": 0.0799790471792221, "learning_rate": 2.5149127492877494e-05, "loss": 0.0194, "step": 67490 }, { "epoch": 0.49895035628751366, "grad_norm": 0.07538869976997375, "learning_rate": 2.514541785375119e-05, "loss": 0.0235, "step": 67500 }, { "epoch": 0.4990242748588155, "grad_norm": 0.07655777782201767, "learning_rate": 2.5141708214624883e-05, "loss": 0.018, "step": 67510 }, { "epoch": 0.4990981934301174, "grad_norm": 0.08150215446949005, "learning_rate": 2.513799857549858e-05, "loss": 0.0178, "step": 67520 }, { "epoch": 0.49917211200141925, "grad_norm": 0.08197018504142761, "learning_rate": 2.5134288936372268e-05, "loss": 0.0193, "step": 67530 }, { "epoch": 0.4992460305727211, "grad_norm": 0.09561475366353989, "learning_rate": 2.5130579297245964e-05, "loss": 0.0163, "step": 67540 }, { "epoch": 0.49931994914402295, "grad_norm": 0.06492964178323746, "learning_rate": 2.512686965811966e-05, "loss": 0.0156, "step": 67550 }, { "epoch": 0.4993938677153248, "grad_norm": 0.06733527779579163, "learning_rate": 2.5123160018993353e-05, "loss": 0.0188, "step": 67560 }, { "epoch": 0.49946778628662664, "grad_norm": 0.055312179028987885, "learning_rate": 2.511945037986705e-05, "loss": 0.0173, "step": 67570 }, { "epoch": 0.4995417048579285, "grad_norm": 0.096860371530056, "learning_rate": 2.511574074074074e-05, "loss": 0.0179, "step": 67580 }, { "epoch": 0.4996156234292304, "grad_norm": 0.06684661656618118, "learning_rate": 2.5112031101614437e-05, "loss": 0.0195, "step": 67590 }, { "epoch": 0.4996895420005322, "grad_norm": 0.10767112672328949, "learning_rate": 2.5108321462488133e-05, "loss": 0.0173, "step": 67600 }, { "epoch": 0.4997634605718341, "grad_norm": 0.07578767836093903, "learning_rate": 2.5104611823361822e-05, "loss": 0.0161, "step": 67610 }, { "epoch": 0.4998373791431359, "grad_norm": 0.07601311802864075, "learning_rate": 2.510090218423552e-05, "loss": 0.0188, "step": 67620 }, { "epoch": 0.49991129771443776, "grad_norm": 0.07916391640901566, "learning_rate": 2.509719254510921e-05, "loss": 0.0206, "step": 67630 }, { "epoch": 0.4999852162857396, "grad_norm": 0.09040073305368423, "learning_rate": 2.5093482905982907e-05, "loss": 0.018, "step": 67640 }, { "epoch": 0.5000591348570415, "grad_norm": 0.05715951323509216, "learning_rate": 2.5089773266856603e-05, "loss": 0.0172, "step": 67650 }, { "epoch": 0.5001330534283434, "grad_norm": 0.05888355150818825, "learning_rate": 2.5086063627730295e-05, "loss": 0.0167, "step": 67660 }, { "epoch": 0.5002069719996451, "grad_norm": 0.07443065196275711, "learning_rate": 2.508235398860399e-05, "loss": 0.0175, "step": 67670 }, { "epoch": 0.500280890570947, "grad_norm": 0.05773517116904259, "learning_rate": 2.507864434947768e-05, "loss": 0.0157, "step": 67680 }, { "epoch": 0.500354809142249, "grad_norm": 0.08666082471609116, "learning_rate": 2.5074934710351377e-05, "loss": 0.0182, "step": 67690 }, { "epoch": 0.5004287277135507, "grad_norm": 0.0956631749868393, "learning_rate": 2.5071225071225073e-05, "loss": 0.0195, "step": 67700 }, { "epoch": 0.5005026462848526, "grad_norm": 0.07875441014766693, "learning_rate": 2.5067515432098765e-05, "loss": 0.0169, "step": 67710 }, { "epoch": 0.5005765648561544, "grad_norm": 0.07937600463628769, "learning_rate": 2.506380579297246e-05, "loss": 0.0195, "step": 67720 }, { "epoch": 0.5006504834274563, "grad_norm": 0.0602254644036293, "learning_rate": 2.5060096153846157e-05, "loss": 0.0159, "step": 67730 }, { "epoch": 0.5007244019987581, "grad_norm": 0.10217604786157608, "learning_rate": 2.505638651471985e-05, "loss": 0.0178, "step": 67740 }, { "epoch": 0.50079832057006, "grad_norm": 0.08687470853328705, "learning_rate": 2.5052676875593546e-05, "loss": 0.0191, "step": 67750 }, { "epoch": 0.5008722391413619, "grad_norm": 0.0878107100725174, "learning_rate": 2.5048967236467235e-05, "loss": 0.0184, "step": 67760 }, { "epoch": 0.5009461577126637, "grad_norm": 0.0794096440076828, "learning_rate": 2.504525759734093e-05, "loss": 0.0172, "step": 67770 }, { "epoch": 0.5010200762839656, "grad_norm": 0.0839376449584961, "learning_rate": 2.504154795821463e-05, "loss": 0.0158, "step": 67780 }, { "epoch": 0.5010939948552674, "grad_norm": 0.09538474678993225, "learning_rate": 2.503783831908832e-05, "loss": 0.0222, "step": 67790 }, { "epoch": 0.5011679134265693, "grad_norm": 0.08887049555778503, "learning_rate": 2.5034128679962015e-05, "loss": 0.0189, "step": 67800 }, { "epoch": 0.5012418319978711, "grad_norm": 0.12286271154880524, "learning_rate": 2.5030419040835708e-05, "loss": 0.0162, "step": 67810 }, { "epoch": 0.501315750569173, "grad_norm": 0.0786716490983963, "learning_rate": 2.5026709401709404e-05, "loss": 0.0179, "step": 67820 }, { "epoch": 0.5013896691404749, "grad_norm": 0.08333033323287964, "learning_rate": 2.50229997625831e-05, "loss": 0.0183, "step": 67830 }, { "epoch": 0.5014635877117767, "grad_norm": 0.06486833095550537, "learning_rate": 2.501929012345679e-05, "loss": 0.0157, "step": 67840 }, { "epoch": 0.5015375062830786, "grad_norm": 0.07939372211694717, "learning_rate": 2.5015580484330485e-05, "loss": 0.0196, "step": 67850 }, { "epoch": 0.5016114248543804, "grad_norm": 0.08343151211738586, "learning_rate": 2.5011870845204177e-05, "loss": 0.0168, "step": 67860 }, { "epoch": 0.5016853434256823, "grad_norm": 0.0886319950222969, "learning_rate": 2.5008161206077873e-05, "loss": 0.0181, "step": 67870 }, { "epoch": 0.5017592619969842, "grad_norm": 0.07745710760354996, "learning_rate": 2.500445156695157e-05, "loss": 0.0174, "step": 67880 }, { "epoch": 0.501833180568286, "grad_norm": 0.10162079334259033, "learning_rate": 2.5000741927825262e-05, "loss": 0.0184, "step": 67890 }, { "epoch": 0.5019070991395879, "grad_norm": 0.06939810514450073, "learning_rate": 2.4997032288698958e-05, "loss": 0.0191, "step": 67900 }, { "epoch": 0.5019810177108897, "grad_norm": 0.09388607740402222, "learning_rate": 2.499332264957265e-05, "loss": 0.0212, "step": 67910 }, { "epoch": 0.5020549362821916, "grad_norm": 0.06481907516717911, "learning_rate": 2.4989613010446343e-05, "loss": 0.0169, "step": 67920 }, { "epoch": 0.5021288548534933, "grad_norm": 0.05753227323293686, "learning_rate": 2.498590337132004e-05, "loss": 0.0194, "step": 67930 }, { "epoch": 0.5022027734247952, "grad_norm": 0.07289828360080719, "learning_rate": 2.4982193732193735e-05, "loss": 0.0179, "step": 67940 }, { "epoch": 0.5022766919960971, "grad_norm": 0.08217739313840866, "learning_rate": 2.4978484093067428e-05, "loss": 0.0171, "step": 67950 }, { "epoch": 0.5023506105673989, "grad_norm": 0.07248316705226898, "learning_rate": 2.497477445394112e-05, "loss": 0.0168, "step": 67960 }, { "epoch": 0.5024245291387008, "grad_norm": 0.07131918519735336, "learning_rate": 2.4971064814814816e-05, "loss": 0.0181, "step": 67970 }, { "epoch": 0.5024984477100026, "grad_norm": 0.08347027003765106, "learning_rate": 2.4967355175688512e-05, "loss": 0.0149, "step": 67980 }, { "epoch": 0.5025723662813045, "grad_norm": 0.08220400661230087, "learning_rate": 2.4963645536562205e-05, "loss": 0.0185, "step": 67990 }, { "epoch": 0.5026462848526063, "grad_norm": 0.092999666929245, "learning_rate": 2.4959935897435897e-05, "loss": 0.0197, "step": 68000 }, { "epoch": 0.5027202034239082, "grad_norm": 0.0755041241645813, "learning_rate": 2.4956226258309593e-05, "loss": 0.0196, "step": 68010 }, { "epoch": 0.5027941219952101, "grad_norm": 0.07427439838647842, "learning_rate": 2.4952516619183286e-05, "loss": 0.0169, "step": 68020 }, { "epoch": 0.5028680405665119, "grad_norm": 0.08319801092147827, "learning_rate": 2.4948806980056982e-05, "loss": 0.0162, "step": 68030 }, { "epoch": 0.5029419591378138, "grad_norm": 0.1142725721001625, "learning_rate": 2.4945097340930674e-05, "loss": 0.0151, "step": 68040 }, { "epoch": 0.5030158777091156, "grad_norm": 0.0719955712556839, "learning_rate": 2.494138770180437e-05, "loss": 0.0169, "step": 68050 }, { "epoch": 0.5030897962804175, "grad_norm": 0.07777555286884308, "learning_rate": 2.4937678062678063e-05, "loss": 0.0193, "step": 68060 }, { "epoch": 0.5031637148517194, "grad_norm": 0.09181669354438782, "learning_rate": 2.4933968423551756e-05, "loss": 0.0168, "step": 68070 }, { "epoch": 0.5032376334230212, "grad_norm": 0.06580276042222977, "learning_rate": 2.493025878442545e-05, "loss": 0.0184, "step": 68080 }, { "epoch": 0.5033115519943231, "grad_norm": 0.07432732731103897, "learning_rate": 2.4926549145299147e-05, "loss": 0.0161, "step": 68090 }, { "epoch": 0.5033854705656249, "grad_norm": 0.07752696424722672, "learning_rate": 2.492283950617284e-05, "loss": 0.017, "step": 68100 }, { "epoch": 0.5034593891369268, "grad_norm": 0.07764051109552383, "learning_rate": 2.4919129867046533e-05, "loss": 0.0162, "step": 68110 }, { "epoch": 0.5035333077082286, "grad_norm": 0.09618353098630905, "learning_rate": 2.4915420227920232e-05, "loss": 0.0182, "step": 68120 }, { "epoch": 0.5036072262795305, "grad_norm": 0.07791148126125336, "learning_rate": 2.4911710588793925e-05, "loss": 0.0187, "step": 68130 }, { "epoch": 0.5036811448508324, "grad_norm": 0.09849604964256287, "learning_rate": 2.4908000949667617e-05, "loss": 0.0176, "step": 68140 }, { "epoch": 0.5037550634221342, "grad_norm": 0.1010863184928894, "learning_rate": 2.490429131054131e-05, "loss": 0.0206, "step": 68150 }, { "epoch": 0.5038289819934361, "grad_norm": 0.07524994015693665, "learning_rate": 2.4900581671415006e-05, "loss": 0.017, "step": 68160 }, { "epoch": 0.5039029005647379, "grad_norm": 0.0941774845123291, "learning_rate": 2.48968720322887e-05, "loss": 0.0173, "step": 68170 }, { "epoch": 0.5039768191360398, "grad_norm": 0.10298559069633484, "learning_rate": 2.4893162393162394e-05, "loss": 0.0204, "step": 68180 }, { "epoch": 0.5040507377073415, "grad_norm": 0.08465954661369324, "learning_rate": 2.4889452754036087e-05, "loss": 0.0192, "step": 68190 }, { "epoch": 0.5041246562786434, "grad_norm": 0.10369189828634262, "learning_rate": 2.4885743114909783e-05, "loss": 0.0203, "step": 68200 }, { "epoch": 0.5041985748499453, "grad_norm": 0.05515586957335472, "learning_rate": 2.488203347578348e-05, "loss": 0.0186, "step": 68210 }, { "epoch": 0.5042724934212471, "grad_norm": 0.0919659435749054, "learning_rate": 2.487832383665717e-05, "loss": 0.0164, "step": 68220 }, { "epoch": 0.504346411992549, "grad_norm": 0.10131487995386124, "learning_rate": 2.4874614197530864e-05, "loss": 0.0175, "step": 68230 }, { "epoch": 0.5044203305638508, "grad_norm": 0.10229624807834625, "learning_rate": 2.487090455840456e-05, "loss": 0.0195, "step": 68240 }, { "epoch": 0.5044942491351527, "grad_norm": 0.08007878810167313, "learning_rate": 2.4867194919278252e-05, "loss": 0.0202, "step": 68250 }, { "epoch": 0.5045681677064545, "grad_norm": 0.07810080796480179, "learning_rate": 2.486348528015195e-05, "loss": 0.018, "step": 68260 }, { "epoch": 0.5046420862777564, "grad_norm": 0.09035582840442657, "learning_rate": 2.4859775641025644e-05, "loss": 0.0174, "step": 68270 }, { "epoch": 0.5047160048490583, "grad_norm": 0.09939395636320114, "learning_rate": 2.4856066001899337e-05, "loss": 0.0157, "step": 68280 }, { "epoch": 0.5047899234203601, "grad_norm": 0.06271559000015259, "learning_rate": 2.485235636277303e-05, "loss": 0.0194, "step": 68290 }, { "epoch": 0.504863841991662, "grad_norm": 0.05262134224176407, "learning_rate": 2.4848646723646722e-05, "loss": 0.0168, "step": 68300 }, { "epoch": 0.5049377605629638, "grad_norm": 0.07836416363716125, "learning_rate": 2.484493708452042e-05, "loss": 0.0181, "step": 68310 }, { "epoch": 0.5050116791342657, "grad_norm": 0.08511856943368912, "learning_rate": 2.4841227445394114e-05, "loss": 0.0167, "step": 68320 }, { "epoch": 0.5050855977055676, "grad_norm": 0.1118873730301857, "learning_rate": 2.4837517806267807e-05, "loss": 0.02, "step": 68330 }, { "epoch": 0.5051595162768694, "grad_norm": 0.06002812087535858, "learning_rate": 2.48338081671415e-05, "loss": 0.0156, "step": 68340 }, { "epoch": 0.5052334348481713, "grad_norm": 0.07912244647741318, "learning_rate": 2.48300985280152e-05, "loss": 0.0178, "step": 68350 }, { "epoch": 0.5053073534194731, "grad_norm": 0.07713264971971512, "learning_rate": 2.482638888888889e-05, "loss": 0.0176, "step": 68360 }, { "epoch": 0.505381271990775, "grad_norm": 0.08661041408777237, "learning_rate": 2.4822679249762584e-05, "loss": 0.0157, "step": 68370 }, { "epoch": 0.5054551905620768, "grad_norm": 0.07685781270265579, "learning_rate": 2.4818969610636276e-05, "loss": 0.0185, "step": 68380 }, { "epoch": 0.5055291091333787, "grad_norm": 0.06656645238399506, "learning_rate": 2.4815259971509972e-05, "loss": 0.0198, "step": 68390 }, { "epoch": 0.5056030277046806, "grad_norm": 0.06481455266475677, "learning_rate": 2.4811550332383668e-05, "loss": 0.0166, "step": 68400 }, { "epoch": 0.5056769462759824, "grad_norm": 0.10537232458591461, "learning_rate": 2.480784069325736e-05, "loss": 0.0204, "step": 68410 }, { "epoch": 0.5057508648472843, "grad_norm": 0.08557964116334915, "learning_rate": 2.4804131054131057e-05, "loss": 0.0187, "step": 68420 }, { "epoch": 0.505824783418586, "grad_norm": 0.08855307102203369, "learning_rate": 2.480042141500475e-05, "loss": 0.0172, "step": 68430 }, { "epoch": 0.505898701989888, "grad_norm": 0.07538998126983643, "learning_rate": 2.4796711775878445e-05, "loss": 0.0177, "step": 68440 }, { "epoch": 0.5059726205611897, "grad_norm": 0.07634703814983368, "learning_rate": 2.4793002136752138e-05, "loss": 0.0205, "step": 68450 }, { "epoch": 0.5060465391324916, "grad_norm": 0.08731398731470108, "learning_rate": 2.4789292497625834e-05, "loss": 0.0167, "step": 68460 }, { "epoch": 0.5061204577037935, "grad_norm": 0.09150371700525284, "learning_rate": 2.4785582858499526e-05, "loss": 0.0184, "step": 68470 }, { "epoch": 0.5061943762750953, "grad_norm": 0.07076684385538101, "learning_rate": 2.478187321937322e-05, "loss": 0.0154, "step": 68480 }, { "epoch": 0.5062682948463972, "grad_norm": 0.07761862128973007, "learning_rate": 2.4778163580246915e-05, "loss": 0.0177, "step": 68490 }, { "epoch": 0.506342213417699, "grad_norm": 0.10504137724637985, "learning_rate": 2.477445394112061e-05, "loss": 0.0206, "step": 68500 }, { "epoch": 0.5064161319890009, "grad_norm": 0.08980455249547958, "learning_rate": 2.4770744301994304e-05, "loss": 0.0176, "step": 68510 }, { "epoch": 0.5064900505603027, "grad_norm": 0.09038835763931274, "learning_rate": 2.4767034662867996e-05, "loss": 0.0198, "step": 68520 }, { "epoch": 0.5065639691316046, "grad_norm": 0.08619700372219086, "learning_rate": 2.476332502374169e-05, "loss": 0.0187, "step": 68530 }, { "epoch": 0.5066378877029065, "grad_norm": 0.0925818458199501, "learning_rate": 2.4759615384615388e-05, "loss": 0.0177, "step": 68540 }, { "epoch": 0.5067118062742083, "grad_norm": 0.07353539019823074, "learning_rate": 2.475590574548908e-05, "loss": 0.0186, "step": 68550 }, { "epoch": 0.5067857248455102, "grad_norm": 0.09178286045789719, "learning_rate": 2.4752196106362773e-05, "loss": 0.0226, "step": 68560 }, { "epoch": 0.506859643416812, "grad_norm": 0.08274193108081818, "learning_rate": 2.4748486467236466e-05, "loss": 0.0154, "step": 68570 }, { "epoch": 0.5069335619881139, "grad_norm": 0.09752582758665085, "learning_rate": 2.4744776828110165e-05, "loss": 0.0205, "step": 68580 }, { "epoch": 0.5070074805594158, "grad_norm": 0.06460082530975342, "learning_rate": 2.4741067188983858e-05, "loss": 0.0171, "step": 68590 }, { "epoch": 0.5070813991307176, "grad_norm": 0.09168283641338348, "learning_rate": 2.473735754985755e-05, "loss": 0.0209, "step": 68600 }, { "epoch": 0.5071553177020195, "grad_norm": 0.09616568684577942, "learning_rate": 2.4733647910731246e-05, "loss": 0.0194, "step": 68610 }, { "epoch": 0.5072292362733213, "grad_norm": 0.090309739112854, "learning_rate": 2.472993827160494e-05, "loss": 0.0178, "step": 68620 }, { "epoch": 0.5073031548446232, "grad_norm": 0.07238759845495224, "learning_rate": 2.4726228632478635e-05, "loss": 0.0195, "step": 68630 }, { "epoch": 0.507377073415925, "grad_norm": 0.07042551040649414, "learning_rate": 2.4722518993352327e-05, "loss": 0.0191, "step": 68640 }, { "epoch": 0.5074509919872269, "grad_norm": 0.09275923669338226, "learning_rate": 2.4718809354226023e-05, "loss": 0.0169, "step": 68650 }, { "epoch": 0.5075249105585288, "grad_norm": 0.09293848276138306, "learning_rate": 2.4715099715099716e-05, "loss": 0.0169, "step": 68660 }, { "epoch": 0.5075988291298306, "grad_norm": 0.09748068451881409, "learning_rate": 2.4711390075973412e-05, "loss": 0.0167, "step": 68670 }, { "epoch": 0.5076727477011325, "grad_norm": 0.07259243726730347, "learning_rate": 2.4707680436847104e-05, "loss": 0.0202, "step": 68680 }, { "epoch": 0.5077466662724343, "grad_norm": 0.09704194217920303, "learning_rate": 2.47039707977208e-05, "loss": 0.0181, "step": 68690 }, { "epoch": 0.5078205848437362, "grad_norm": 0.09361874312162399, "learning_rate": 2.4700261158594493e-05, "loss": 0.0184, "step": 68700 }, { "epoch": 0.507894503415038, "grad_norm": 0.07901400327682495, "learning_rate": 2.4696551519468186e-05, "loss": 0.0179, "step": 68710 }, { "epoch": 0.5079684219863398, "grad_norm": 0.10706298798322678, "learning_rate": 2.469284188034188e-05, "loss": 0.0187, "step": 68720 }, { "epoch": 0.5080423405576417, "grad_norm": 0.07765945047140121, "learning_rate": 2.4689132241215578e-05, "loss": 0.0174, "step": 68730 }, { "epoch": 0.5081162591289435, "grad_norm": 0.06914977729320526, "learning_rate": 2.468542260208927e-05, "loss": 0.0168, "step": 68740 }, { "epoch": 0.5081901777002454, "grad_norm": 0.1388658732175827, "learning_rate": 2.4681712962962963e-05, "loss": 0.0191, "step": 68750 }, { "epoch": 0.5082640962715472, "grad_norm": 0.06330770254135132, "learning_rate": 2.467800332383666e-05, "loss": 0.0163, "step": 68760 }, { "epoch": 0.5083380148428491, "grad_norm": 0.08374302834272385, "learning_rate": 2.4674293684710355e-05, "loss": 0.0164, "step": 68770 }, { "epoch": 0.5084119334141509, "grad_norm": 0.09005650132894516, "learning_rate": 2.4670584045584047e-05, "loss": 0.0172, "step": 68780 }, { "epoch": 0.5084858519854528, "grad_norm": 0.08126549422740936, "learning_rate": 2.466687440645774e-05, "loss": 0.0193, "step": 68790 }, { "epoch": 0.5085597705567547, "grad_norm": 0.0886392742395401, "learning_rate": 2.4663164767331436e-05, "loss": 0.0177, "step": 68800 }, { "epoch": 0.5086336891280565, "grad_norm": 0.09654468297958374, "learning_rate": 2.4659455128205132e-05, "loss": 0.0198, "step": 68810 }, { "epoch": 0.5087076076993584, "grad_norm": 0.09124394506216049, "learning_rate": 2.4655745489078824e-05, "loss": 0.0177, "step": 68820 }, { "epoch": 0.5087815262706602, "grad_norm": 0.07470245659351349, "learning_rate": 2.4652035849952517e-05, "loss": 0.0168, "step": 68830 }, { "epoch": 0.5088554448419621, "grad_norm": 0.06541818380355835, "learning_rate": 2.4648326210826213e-05, "loss": 0.0172, "step": 68840 }, { "epoch": 0.508929363413264, "grad_norm": 0.06566719710826874, "learning_rate": 2.4644616571699905e-05, "loss": 0.0188, "step": 68850 }, { "epoch": 0.5090032819845658, "grad_norm": 0.08970692753791809, "learning_rate": 2.46409069325736e-05, "loss": 0.0173, "step": 68860 }, { "epoch": 0.5090772005558677, "grad_norm": 0.08380686491727829, "learning_rate": 2.4637197293447294e-05, "loss": 0.0174, "step": 68870 }, { "epoch": 0.5091511191271695, "grad_norm": 0.0961533933877945, "learning_rate": 2.463348765432099e-05, "loss": 0.019, "step": 68880 }, { "epoch": 0.5092250376984714, "grad_norm": 0.07376634329557419, "learning_rate": 2.4629778015194683e-05, "loss": 0.0175, "step": 68890 }, { "epoch": 0.5092989562697732, "grad_norm": 0.08185489475727081, "learning_rate": 2.462606837606838e-05, "loss": 0.0181, "step": 68900 }, { "epoch": 0.5093728748410751, "grad_norm": 0.11931279301643372, "learning_rate": 2.462235873694207e-05, "loss": 0.0192, "step": 68910 }, { "epoch": 0.509446793412377, "grad_norm": 0.09398669004440308, "learning_rate": 2.4618649097815767e-05, "loss": 0.0184, "step": 68920 }, { "epoch": 0.5095207119836788, "grad_norm": 0.08356818556785583, "learning_rate": 2.461493945868946e-05, "loss": 0.0189, "step": 68930 }, { "epoch": 0.5095946305549807, "grad_norm": 0.06756184250116348, "learning_rate": 2.4611229819563152e-05, "loss": 0.0179, "step": 68940 }, { "epoch": 0.5096685491262825, "grad_norm": 0.048982974141836166, "learning_rate": 2.4607520180436848e-05, "loss": 0.0182, "step": 68950 }, { "epoch": 0.5097424676975844, "grad_norm": 0.06728595495223999, "learning_rate": 2.4603810541310544e-05, "loss": 0.0163, "step": 68960 }, { "epoch": 0.5098163862688861, "grad_norm": 0.09846184402704239, "learning_rate": 2.4600100902184237e-05, "loss": 0.0166, "step": 68970 }, { "epoch": 0.509890304840188, "grad_norm": 0.0789187103509903, "learning_rate": 2.459639126305793e-05, "loss": 0.0183, "step": 68980 }, { "epoch": 0.50996422341149, "grad_norm": 0.06714697182178497, "learning_rate": 2.4592681623931625e-05, "loss": 0.0185, "step": 68990 }, { "epoch": 0.5100381419827917, "grad_norm": 0.06789720058441162, "learning_rate": 2.458897198480532e-05, "loss": 0.0162, "step": 69000 }, { "epoch": 0.5101120605540936, "grad_norm": 0.12073642015457153, "learning_rate": 2.4585262345679014e-05, "loss": 0.018, "step": 69010 }, { "epoch": 0.5101859791253954, "grad_norm": 0.07985580712556839, "learning_rate": 2.4581552706552706e-05, "loss": 0.017, "step": 69020 }, { "epoch": 0.5102598976966973, "grad_norm": 0.0621945783495903, "learning_rate": 2.4577843067426402e-05, "loss": 0.0187, "step": 69030 }, { "epoch": 0.5103338162679991, "grad_norm": 0.05953545123338699, "learning_rate": 2.45741334283001e-05, "loss": 0.017, "step": 69040 }, { "epoch": 0.510407734839301, "grad_norm": 0.08608721941709518, "learning_rate": 2.457042378917379e-05, "loss": 0.0192, "step": 69050 }, { "epoch": 0.5104816534106029, "grad_norm": 0.08489928394556046, "learning_rate": 2.4566714150047483e-05, "loss": 0.0173, "step": 69060 }, { "epoch": 0.5105555719819047, "grad_norm": 0.06985501945018768, "learning_rate": 2.456300451092118e-05, "loss": 0.0179, "step": 69070 }, { "epoch": 0.5106294905532066, "grad_norm": 0.07255925983190536, "learning_rate": 2.4559294871794872e-05, "loss": 0.0172, "step": 69080 }, { "epoch": 0.5107034091245084, "grad_norm": 0.09363655745983124, "learning_rate": 2.4555585232668568e-05, "loss": 0.0187, "step": 69090 }, { "epoch": 0.5107773276958103, "grad_norm": 0.07645261287689209, "learning_rate": 2.455187559354226e-05, "loss": 0.017, "step": 69100 }, { "epoch": 0.5108512462671122, "grad_norm": 0.07369133830070496, "learning_rate": 2.4548165954415957e-05, "loss": 0.0186, "step": 69110 }, { "epoch": 0.510925164838414, "grad_norm": 0.07639345526695251, "learning_rate": 2.454445631528965e-05, "loss": 0.0164, "step": 69120 }, { "epoch": 0.5109990834097159, "grad_norm": 0.05209139734506607, "learning_rate": 2.4540746676163345e-05, "loss": 0.0172, "step": 69130 }, { "epoch": 0.5110730019810177, "grad_norm": 0.09408107399940491, "learning_rate": 2.4537037037037038e-05, "loss": 0.0195, "step": 69140 }, { "epoch": 0.5111469205523196, "grad_norm": 0.07027251273393631, "learning_rate": 2.4533327397910734e-05, "loss": 0.0182, "step": 69150 }, { "epoch": 0.5112208391236214, "grad_norm": 0.10147867351770401, "learning_rate": 2.4529617758784426e-05, "loss": 0.0204, "step": 69160 }, { "epoch": 0.5112947576949233, "grad_norm": 0.07984784245491028, "learning_rate": 2.452590811965812e-05, "loss": 0.0187, "step": 69170 }, { "epoch": 0.5113686762662252, "grad_norm": 0.07958944141864777, "learning_rate": 2.4522198480531815e-05, "loss": 0.017, "step": 69180 }, { "epoch": 0.511442594837527, "grad_norm": 0.039841070771217346, "learning_rate": 2.451848884140551e-05, "loss": 0.0186, "step": 69190 }, { "epoch": 0.5115165134088289, "grad_norm": 0.07965556532144547, "learning_rate": 2.4514779202279203e-05, "loss": 0.0172, "step": 69200 }, { "epoch": 0.5115904319801307, "grad_norm": 0.0786585807800293, "learning_rate": 2.4511069563152896e-05, "loss": 0.0184, "step": 69210 }, { "epoch": 0.5116643505514326, "grad_norm": 0.06293124705553055, "learning_rate": 2.4507359924026592e-05, "loss": 0.0168, "step": 69220 }, { "epoch": 0.5117382691227343, "grad_norm": 0.12131396681070328, "learning_rate": 2.4503650284900288e-05, "loss": 0.0193, "step": 69230 }, { "epoch": 0.5118121876940362, "grad_norm": 0.07918912917375565, "learning_rate": 2.449994064577398e-05, "loss": 0.0201, "step": 69240 }, { "epoch": 0.5118861062653381, "grad_norm": 0.08990711718797684, "learning_rate": 2.4496231006647673e-05, "loss": 0.0225, "step": 69250 }, { "epoch": 0.5119600248366399, "grad_norm": 0.043771080672740936, "learning_rate": 2.449252136752137e-05, "loss": 0.0178, "step": 69260 }, { "epoch": 0.5120339434079418, "grad_norm": 0.12659858167171478, "learning_rate": 2.4488811728395065e-05, "loss": 0.0198, "step": 69270 }, { "epoch": 0.5121078619792436, "grad_norm": 0.07652303576469421, "learning_rate": 2.4485102089268757e-05, "loss": 0.0173, "step": 69280 }, { "epoch": 0.5121817805505455, "grad_norm": 0.07906925678253174, "learning_rate": 2.448139245014245e-05, "loss": 0.0191, "step": 69290 }, { "epoch": 0.5122556991218473, "grad_norm": 0.06692638993263245, "learning_rate": 2.4477682811016146e-05, "loss": 0.02, "step": 69300 }, { "epoch": 0.5123296176931492, "grad_norm": 0.07715941965579987, "learning_rate": 2.447397317188984e-05, "loss": 0.0184, "step": 69310 }, { "epoch": 0.5124035362644511, "grad_norm": 0.06227978691458702, "learning_rate": 2.4470263532763535e-05, "loss": 0.0158, "step": 69320 }, { "epoch": 0.5124774548357529, "grad_norm": 0.0803263857960701, "learning_rate": 2.4466553893637227e-05, "loss": 0.0166, "step": 69330 }, { "epoch": 0.5125513734070548, "grad_norm": 0.07377856224775314, "learning_rate": 2.4462844254510923e-05, "loss": 0.0162, "step": 69340 }, { "epoch": 0.5126252919783566, "grad_norm": 0.08652761578559875, "learning_rate": 2.4459134615384616e-05, "loss": 0.0204, "step": 69350 }, { "epoch": 0.5126992105496585, "grad_norm": 0.056324318051338196, "learning_rate": 2.445542497625831e-05, "loss": 0.0158, "step": 69360 }, { "epoch": 0.5127731291209604, "grad_norm": 0.07542183995246887, "learning_rate": 2.4451715337132004e-05, "loss": 0.0153, "step": 69370 }, { "epoch": 0.5128470476922622, "grad_norm": 0.06313826143741608, "learning_rate": 2.44480056980057e-05, "loss": 0.0172, "step": 69380 }, { "epoch": 0.5129209662635641, "grad_norm": 0.10730694979429245, "learning_rate": 2.4444296058879393e-05, "loss": 0.0202, "step": 69390 }, { "epoch": 0.5129948848348659, "grad_norm": 0.0748748779296875, "learning_rate": 2.4440586419753085e-05, "loss": 0.0175, "step": 69400 }, { "epoch": 0.5130688034061678, "grad_norm": 0.07341676205396652, "learning_rate": 2.443687678062678e-05, "loss": 0.017, "step": 69410 }, { "epoch": 0.5131427219774696, "grad_norm": 0.14897748827934265, "learning_rate": 2.4433167141500477e-05, "loss": 0.0171, "step": 69420 }, { "epoch": 0.5132166405487715, "grad_norm": 0.08443755656480789, "learning_rate": 2.442945750237417e-05, "loss": 0.0195, "step": 69430 }, { "epoch": 0.5132905591200734, "grad_norm": 0.0830007791519165, "learning_rate": 2.4425747863247862e-05, "loss": 0.0188, "step": 69440 }, { "epoch": 0.5133644776913752, "grad_norm": 0.08024938404560089, "learning_rate": 2.442203822412156e-05, "loss": 0.0189, "step": 69450 }, { "epoch": 0.5134383962626771, "grad_norm": 0.07932229340076447, "learning_rate": 2.4418328584995254e-05, "loss": 0.0164, "step": 69460 }, { "epoch": 0.5135123148339789, "grad_norm": 0.1015755757689476, "learning_rate": 2.4414618945868947e-05, "loss": 0.0162, "step": 69470 }, { "epoch": 0.5135862334052808, "grad_norm": 0.08583158254623413, "learning_rate": 2.441090930674264e-05, "loss": 0.0154, "step": 69480 }, { "epoch": 0.5136601519765825, "grad_norm": 0.08639636635780334, "learning_rate": 2.4407199667616336e-05, "loss": 0.018, "step": 69490 }, { "epoch": 0.5137340705478844, "grad_norm": 0.07380050420761108, "learning_rate": 2.440349002849003e-05, "loss": 0.0203, "step": 69500 }, { "epoch": 0.5138079891191863, "grad_norm": 0.0639515370130539, "learning_rate": 2.4399780389363724e-05, "loss": 0.017, "step": 69510 }, { "epoch": 0.5138819076904881, "grad_norm": 0.08474968373775482, "learning_rate": 2.4396070750237417e-05, "loss": 0.0186, "step": 69520 }, { "epoch": 0.51395582626179, "grad_norm": 0.09511318802833557, "learning_rate": 2.4392361111111113e-05, "loss": 0.0186, "step": 69530 }, { "epoch": 0.5140297448330918, "grad_norm": 0.08560945838689804, "learning_rate": 2.4388651471984805e-05, "loss": 0.0186, "step": 69540 }, { "epoch": 0.5141036634043937, "grad_norm": 0.08364006876945496, "learning_rate": 2.43849418328585e-05, "loss": 0.0194, "step": 69550 }, { "epoch": 0.5141775819756955, "grad_norm": 0.07831569015979767, "learning_rate": 2.4381232193732194e-05, "loss": 0.0185, "step": 69560 }, { "epoch": 0.5142515005469974, "grad_norm": 0.07500734180212021, "learning_rate": 2.437752255460589e-05, "loss": 0.0171, "step": 69570 }, { "epoch": 0.5143254191182993, "grad_norm": 0.10483544319868088, "learning_rate": 2.4373812915479582e-05, "loss": 0.019, "step": 69580 }, { "epoch": 0.5143993376896011, "grad_norm": 0.08549469709396362, "learning_rate": 2.4370103276353278e-05, "loss": 0.0191, "step": 69590 }, { "epoch": 0.514473256260903, "grad_norm": 0.06504369527101517, "learning_rate": 2.436639363722697e-05, "loss": 0.0189, "step": 69600 }, { "epoch": 0.5145471748322048, "grad_norm": 0.054876018315553665, "learning_rate": 2.4362683998100667e-05, "loss": 0.0172, "step": 69610 }, { "epoch": 0.5146210934035067, "grad_norm": 0.0693962424993515, "learning_rate": 2.435897435897436e-05, "loss": 0.0176, "step": 69620 }, { "epoch": 0.5146950119748086, "grad_norm": 0.11357058584690094, "learning_rate": 2.4355264719848052e-05, "loss": 0.0175, "step": 69630 }, { "epoch": 0.5147689305461104, "grad_norm": 0.09563953429460526, "learning_rate": 2.435155508072175e-05, "loss": 0.0178, "step": 69640 }, { "epoch": 0.5148428491174123, "grad_norm": 0.06931401789188385, "learning_rate": 2.4347845441595444e-05, "loss": 0.0156, "step": 69650 }, { "epoch": 0.5149167676887141, "grad_norm": 0.10174936056137085, "learning_rate": 2.4344135802469136e-05, "loss": 0.0204, "step": 69660 }, { "epoch": 0.514990686260016, "grad_norm": 0.0861804261803627, "learning_rate": 2.434042616334283e-05, "loss": 0.0177, "step": 69670 }, { "epoch": 0.5150646048313178, "grad_norm": 0.07918040454387665, "learning_rate": 2.4336716524216525e-05, "loss": 0.0176, "step": 69680 }, { "epoch": 0.5151385234026197, "grad_norm": 0.07646022737026215, "learning_rate": 2.433300688509022e-05, "loss": 0.016, "step": 69690 }, { "epoch": 0.5152124419739216, "grad_norm": 0.10380648821592331, "learning_rate": 2.4329297245963914e-05, "loss": 0.0185, "step": 69700 }, { "epoch": 0.5152863605452234, "grad_norm": 0.09445256739854813, "learning_rate": 2.4325587606837606e-05, "loss": 0.0167, "step": 69710 }, { "epoch": 0.5153602791165253, "grad_norm": 0.07841164618730545, "learning_rate": 2.4321877967711302e-05, "loss": 0.016, "step": 69720 }, { "epoch": 0.5154341976878271, "grad_norm": 0.12285198271274567, "learning_rate": 2.4318168328584998e-05, "loss": 0.0175, "step": 69730 }, { "epoch": 0.515508116259129, "grad_norm": 0.07969985902309418, "learning_rate": 2.431445868945869e-05, "loss": 0.0188, "step": 69740 }, { "epoch": 0.5155820348304307, "grad_norm": 0.09048346430063248, "learning_rate": 2.4310749050332383e-05, "loss": 0.0178, "step": 69750 }, { "epoch": 0.5156559534017326, "grad_norm": 0.08953306823968887, "learning_rate": 2.430703941120608e-05, "loss": 0.0165, "step": 69760 }, { "epoch": 0.5157298719730345, "grad_norm": 0.09533467888832092, "learning_rate": 2.4303329772079772e-05, "loss": 0.0171, "step": 69770 }, { "epoch": 0.5158037905443363, "grad_norm": 0.06188954412937164, "learning_rate": 2.4299620132953468e-05, "loss": 0.0179, "step": 69780 }, { "epoch": 0.5158777091156382, "grad_norm": 0.07564985007047653, "learning_rate": 2.4295910493827164e-05, "loss": 0.0157, "step": 69790 }, { "epoch": 0.51595162768694, "grad_norm": 0.18343132734298706, "learning_rate": 2.4292200854700856e-05, "loss": 0.0198, "step": 69800 }, { "epoch": 0.5160255462582419, "grad_norm": 0.07260244339704514, "learning_rate": 2.428849121557455e-05, "loss": 0.0179, "step": 69810 }, { "epoch": 0.5160994648295437, "grad_norm": 0.057750802487134933, "learning_rate": 2.4284781576448245e-05, "loss": 0.0151, "step": 69820 }, { "epoch": 0.5161733834008456, "grad_norm": 0.07174156606197357, "learning_rate": 2.428107193732194e-05, "loss": 0.0177, "step": 69830 }, { "epoch": 0.5162473019721475, "grad_norm": 0.09543014317750931, "learning_rate": 2.4277362298195633e-05, "loss": 0.0193, "step": 69840 }, { "epoch": 0.5163212205434493, "grad_norm": 0.08153504878282547, "learning_rate": 2.4273652659069326e-05, "loss": 0.0205, "step": 69850 }, { "epoch": 0.5163951391147512, "grad_norm": 0.07932714372873306, "learning_rate": 2.426994301994302e-05, "loss": 0.019, "step": 69860 }, { "epoch": 0.516469057686053, "grad_norm": 0.06483565270900726, "learning_rate": 2.4266233380816718e-05, "loss": 0.0173, "step": 69870 }, { "epoch": 0.5165429762573549, "grad_norm": 0.07810701429843903, "learning_rate": 2.426252374169041e-05, "loss": 0.0189, "step": 69880 }, { "epoch": 0.5166168948286568, "grad_norm": 0.087563157081604, "learning_rate": 2.4258814102564103e-05, "loss": 0.0184, "step": 69890 }, { "epoch": 0.5166908133999586, "grad_norm": 0.09393598139286041, "learning_rate": 2.4255104463437796e-05, "loss": 0.0173, "step": 69900 }, { "epoch": 0.5167647319712605, "grad_norm": 0.09094571322202682, "learning_rate": 2.425139482431149e-05, "loss": 0.02, "step": 69910 }, { "epoch": 0.5168386505425623, "grad_norm": 0.07238578796386719, "learning_rate": 2.4247685185185188e-05, "loss": 0.0161, "step": 69920 }, { "epoch": 0.5169125691138642, "grad_norm": 0.07430820167064667, "learning_rate": 2.424397554605888e-05, "loss": 0.0166, "step": 69930 }, { "epoch": 0.516986487685166, "grad_norm": 0.07901631295681, "learning_rate": 2.4240265906932573e-05, "loss": 0.0188, "step": 69940 }, { "epoch": 0.5170604062564679, "grad_norm": 0.06880009174346924, "learning_rate": 2.423655626780627e-05, "loss": 0.0164, "step": 69950 }, { "epoch": 0.5171343248277698, "grad_norm": 0.11061283200979233, "learning_rate": 2.4232846628679965e-05, "loss": 0.0181, "step": 69960 }, { "epoch": 0.5172082433990716, "grad_norm": 0.08216848969459534, "learning_rate": 2.4229136989553657e-05, "loss": 0.0179, "step": 69970 }, { "epoch": 0.5172821619703735, "grad_norm": 0.06348450481891632, "learning_rate": 2.4225427350427353e-05, "loss": 0.0191, "step": 69980 }, { "epoch": 0.5173560805416753, "grad_norm": 0.07877887785434723, "learning_rate": 2.4221717711301046e-05, "loss": 0.0167, "step": 69990 }, { "epoch": 0.5174299991129772, "grad_norm": 0.08459838479757309, "learning_rate": 2.421800807217474e-05, "loss": 0.0184, "step": 70000 }, { "epoch": 0.5174299991129772, "eval_f1": 0.6190886640565278, "eval_loss": 0.017734253779053688, "eval_precision": 0.4918493016326983, "eval_recall": 0.8351344665085773, "eval_runtime": 2668.5338, "eval_samples_per_second": 202.783, "eval_steps_per_second": 3.169, "step": 70000 }, { "epoch": 0.517503917684279, "grad_norm": 0.07879788428544998, "learning_rate": 2.4214298433048434e-05, "loss": 0.0189, "step": 70010 }, { "epoch": 0.5175778362555808, "grad_norm": 0.07976265996694565, "learning_rate": 2.421058879392213e-05, "loss": 0.0182, "step": 70020 }, { "epoch": 0.5176517548268827, "grad_norm": 0.10265471786260605, "learning_rate": 2.4206879154795823e-05, "loss": 0.0184, "step": 70030 }, { "epoch": 0.5177256733981845, "grad_norm": 0.0674351379275322, "learning_rate": 2.4203169515669515e-05, "loss": 0.0178, "step": 70040 }, { "epoch": 0.5177995919694864, "grad_norm": 0.08812592923641205, "learning_rate": 2.419945987654321e-05, "loss": 0.0191, "step": 70050 }, { "epoch": 0.5178735105407882, "grad_norm": 0.06502197682857513, "learning_rate": 2.4195750237416907e-05, "loss": 0.0167, "step": 70060 }, { "epoch": 0.5179474291120901, "grad_norm": 0.09050507843494415, "learning_rate": 2.41920405982906e-05, "loss": 0.0165, "step": 70070 }, { "epoch": 0.518021347683392, "grad_norm": 0.07016700506210327, "learning_rate": 2.4188330959164293e-05, "loss": 0.016, "step": 70080 }, { "epoch": 0.5180952662546938, "grad_norm": 0.07196346670389175, "learning_rate": 2.4184621320037985e-05, "loss": 0.0159, "step": 70090 }, { "epoch": 0.5181691848259957, "grad_norm": 0.08884629607200623, "learning_rate": 2.4180911680911684e-05, "loss": 0.0166, "step": 70100 }, { "epoch": 0.5182431033972975, "grad_norm": 0.05546234920620918, "learning_rate": 2.4177202041785377e-05, "loss": 0.0165, "step": 70110 }, { "epoch": 0.5183170219685994, "grad_norm": 0.09314189106225967, "learning_rate": 2.417349240265907e-05, "loss": 0.0171, "step": 70120 }, { "epoch": 0.5183909405399012, "grad_norm": 0.0631951317191124, "learning_rate": 2.4169782763532766e-05, "loss": 0.019, "step": 70130 }, { "epoch": 0.5184648591112031, "grad_norm": 0.07926510274410248, "learning_rate": 2.4166073124406458e-05, "loss": 0.0192, "step": 70140 }, { "epoch": 0.518538777682505, "grad_norm": 0.08929207921028137, "learning_rate": 2.4162363485280154e-05, "loss": 0.019, "step": 70150 }, { "epoch": 0.5186126962538068, "grad_norm": 0.07223989814519882, "learning_rate": 2.4158653846153847e-05, "loss": 0.0183, "step": 70160 }, { "epoch": 0.5186866148251087, "grad_norm": 0.06583063304424286, "learning_rate": 2.4154944207027543e-05, "loss": 0.0149, "step": 70170 }, { "epoch": 0.5187605333964105, "grad_norm": 0.09024758636951447, "learning_rate": 2.4151234567901235e-05, "loss": 0.0164, "step": 70180 }, { "epoch": 0.5188344519677124, "grad_norm": 0.0851876512169838, "learning_rate": 2.414752492877493e-05, "loss": 0.0179, "step": 70190 }, { "epoch": 0.5189083705390142, "grad_norm": 0.07299210131168365, "learning_rate": 2.4143815289648624e-05, "loss": 0.0185, "step": 70200 }, { "epoch": 0.5189822891103161, "grad_norm": 0.1315525621175766, "learning_rate": 2.414010565052232e-05, "loss": 0.0195, "step": 70210 }, { "epoch": 0.519056207681618, "grad_norm": 0.10280711948871613, "learning_rate": 2.4136396011396012e-05, "loss": 0.0213, "step": 70220 }, { "epoch": 0.5191301262529198, "grad_norm": 0.08981981873512268, "learning_rate": 2.4132686372269705e-05, "loss": 0.0159, "step": 70230 }, { "epoch": 0.5192040448242217, "grad_norm": 0.06911870837211609, "learning_rate": 2.41289767331434e-05, "loss": 0.0188, "step": 70240 }, { "epoch": 0.5192779633955235, "grad_norm": 0.09323382377624512, "learning_rate": 2.4125267094017097e-05, "loss": 0.0185, "step": 70250 }, { "epoch": 0.5193518819668254, "grad_norm": 0.08451409637928009, "learning_rate": 2.412155745489079e-05, "loss": 0.0166, "step": 70260 }, { "epoch": 0.5194258005381271, "grad_norm": 0.08477005362510681, "learning_rate": 2.4117847815764482e-05, "loss": 0.0185, "step": 70270 }, { "epoch": 0.519499719109429, "grad_norm": 0.06879612803459167, "learning_rate": 2.4114138176638178e-05, "loss": 0.0166, "step": 70280 }, { "epoch": 0.519573637680731, "grad_norm": 0.08084993809461594, "learning_rate": 2.4110428537511874e-05, "loss": 0.0198, "step": 70290 }, { "epoch": 0.5196475562520327, "grad_norm": 0.11922654509544373, "learning_rate": 2.4106718898385567e-05, "loss": 0.0187, "step": 70300 }, { "epoch": 0.5197214748233346, "grad_norm": 0.09586163610219955, "learning_rate": 2.410300925925926e-05, "loss": 0.0159, "step": 70310 }, { "epoch": 0.5197953933946364, "grad_norm": 0.08208409696817398, "learning_rate": 2.4099299620132955e-05, "loss": 0.0214, "step": 70320 }, { "epoch": 0.5198693119659383, "grad_norm": 0.08232421427965164, "learning_rate": 2.409558998100665e-05, "loss": 0.0178, "step": 70330 }, { "epoch": 0.5199432305372402, "grad_norm": 0.05796344578266144, "learning_rate": 2.4091880341880344e-05, "loss": 0.0171, "step": 70340 }, { "epoch": 0.520017149108542, "grad_norm": 0.07854857295751572, "learning_rate": 2.4088170702754036e-05, "loss": 0.0181, "step": 70350 }, { "epoch": 0.5200910676798439, "grad_norm": 0.06449563801288605, "learning_rate": 2.4084461063627732e-05, "loss": 0.0164, "step": 70360 }, { "epoch": 0.5201649862511457, "grad_norm": 0.09255155175924301, "learning_rate": 2.4080751424501425e-05, "loss": 0.0223, "step": 70370 }, { "epoch": 0.5202389048224476, "grad_norm": 0.07693065702915192, "learning_rate": 2.407704178537512e-05, "loss": 0.016, "step": 70380 }, { "epoch": 0.5203128233937494, "grad_norm": 0.09296828508377075, "learning_rate": 2.4073332146248813e-05, "loss": 0.0178, "step": 70390 }, { "epoch": 0.5203867419650513, "grad_norm": 0.08111296594142914, "learning_rate": 2.406962250712251e-05, "loss": 0.0169, "step": 70400 }, { "epoch": 0.5204606605363532, "grad_norm": 0.08747255057096481, "learning_rate": 2.4065912867996202e-05, "loss": 0.0194, "step": 70410 }, { "epoch": 0.520534579107655, "grad_norm": 0.07338342070579529, "learning_rate": 2.4062203228869898e-05, "loss": 0.0184, "step": 70420 }, { "epoch": 0.5206084976789569, "grad_norm": 0.06751928478479385, "learning_rate": 2.405849358974359e-05, "loss": 0.0165, "step": 70430 }, { "epoch": 0.5206824162502587, "grad_norm": 0.08731786906719208, "learning_rate": 2.4054783950617286e-05, "loss": 0.016, "step": 70440 }, { "epoch": 0.5207563348215606, "grad_norm": 0.07595943659543991, "learning_rate": 2.405107431149098e-05, "loss": 0.018, "step": 70450 }, { "epoch": 0.5208302533928624, "grad_norm": 0.09234201163053513, "learning_rate": 2.404736467236467e-05, "loss": 0.0176, "step": 70460 }, { "epoch": 0.5209041719641643, "grad_norm": 0.08772604912519455, "learning_rate": 2.4043655033238367e-05, "loss": 0.0177, "step": 70470 }, { "epoch": 0.5209780905354662, "grad_norm": 0.08371371030807495, "learning_rate": 2.4039945394112063e-05, "loss": 0.0201, "step": 70480 }, { "epoch": 0.521052009106768, "grad_norm": 0.06990265846252441, "learning_rate": 2.4036235754985756e-05, "loss": 0.0164, "step": 70490 }, { "epoch": 0.5211259276780699, "grad_norm": 0.11240319162607193, "learning_rate": 2.403252611585945e-05, "loss": 0.0164, "step": 70500 }, { "epoch": 0.5211998462493717, "grad_norm": 0.09507393836975098, "learning_rate": 2.4028816476733145e-05, "loss": 0.0175, "step": 70510 }, { "epoch": 0.5212737648206736, "grad_norm": 0.056186776608228683, "learning_rate": 2.402510683760684e-05, "loss": 0.0162, "step": 70520 }, { "epoch": 0.5213476833919753, "grad_norm": 0.07950075715780258, "learning_rate": 2.4021397198480533e-05, "loss": 0.0157, "step": 70530 }, { "epoch": 0.5214216019632772, "grad_norm": 0.06671198457479477, "learning_rate": 2.4017687559354226e-05, "loss": 0.02, "step": 70540 }, { "epoch": 0.5214955205345791, "grad_norm": 0.1159062460064888, "learning_rate": 2.401397792022792e-05, "loss": 0.0185, "step": 70550 }, { "epoch": 0.5215694391058809, "grad_norm": 0.059728387743234634, "learning_rate": 2.4010268281101618e-05, "loss": 0.0181, "step": 70560 }, { "epoch": 0.5216433576771828, "grad_norm": 0.07596202939748764, "learning_rate": 2.400655864197531e-05, "loss": 0.0177, "step": 70570 }, { "epoch": 0.5217172762484846, "grad_norm": 0.10383056849241257, "learning_rate": 2.4002849002849003e-05, "loss": 0.0183, "step": 70580 }, { "epoch": 0.5217911948197865, "grad_norm": 0.0796007513999939, "learning_rate": 2.39991393637227e-05, "loss": 0.0167, "step": 70590 }, { "epoch": 0.5218651133910884, "grad_norm": 0.06471708416938782, "learning_rate": 2.399542972459639e-05, "loss": 0.0171, "step": 70600 }, { "epoch": 0.5219390319623902, "grad_norm": 0.06391924619674683, "learning_rate": 2.3991720085470087e-05, "loss": 0.0182, "step": 70610 }, { "epoch": 0.5220129505336921, "grad_norm": 0.11240462213754654, "learning_rate": 2.398801044634378e-05, "loss": 0.0177, "step": 70620 }, { "epoch": 0.5220868691049939, "grad_norm": 0.07067076861858368, "learning_rate": 2.3984300807217476e-05, "loss": 0.0177, "step": 70630 }, { "epoch": 0.5221607876762958, "grad_norm": 0.07260678708553314, "learning_rate": 2.398059116809117e-05, "loss": 0.0191, "step": 70640 }, { "epoch": 0.5222347062475976, "grad_norm": 0.0927368700504303, "learning_rate": 2.3976881528964864e-05, "loss": 0.019, "step": 70650 }, { "epoch": 0.5223086248188995, "grad_norm": 0.07368585467338562, "learning_rate": 2.3973171889838557e-05, "loss": 0.0179, "step": 70660 }, { "epoch": 0.5223825433902014, "grad_norm": 0.09810861945152283, "learning_rate": 2.3969462250712253e-05, "loss": 0.0196, "step": 70670 }, { "epoch": 0.5224564619615032, "grad_norm": 0.08655349910259247, "learning_rate": 2.3965752611585946e-05, "loss": 0.0182, "step": 70680 }, { "epoch": 0.5225303805328051, "grad_norm": 0.06796327233314514, "learning_rate": 2.3962042972459638e-05, "loss": 0.0173, "step": 70690 }, { "epoch": 0.5226042991041069, "grad_norm": 0.08273261785507202, "learning_rate": 2.3958333333333334e-05, "loss": 0.0203, "step": 70700 }, { "epoch": 0.5226782176754088, "grad_norm": 0.08774213492870331, "learning_rate": 2.395462369420703e-05, "loss": 0.0177, "step": 70710 }, { "epoch": 0.5227521362467106, "grad_norm": 0.06766923516988754, "learning_rate": 2.3950914055080723e-05, "loss": 0.018, "step": 70720 }, { "epoch": 0.5228260548180125, "grad_norm": 0.08683652430772781, "learning_rate": 2.3947204415954415e-05, "loss": 0.0184, "step": 70730 }, { "epoch": 0.5228999733893144, "grad_norm": 0.06167571246623993, "learning_rate": 2.394349477682811e-05, "loss": 0.0166, "step": 70740 }, { "epoch": 0.5229738919606162, "grad_norm": 0.10443181544542313, "learning_rate": 2.3939785137701807e-05, "loss": 0.0173, "step": 70750 }, { "epoch": 0.5230478105319181, "grad_norm": 0.10969404131174088, "learning_rate": 2.39360754985755e-05, "loss": 0.0207, "step": 70760 }, { "epoch": 0.5231217291032199, "grad_norm": 0.0757172703742981, "learning_rate": 2.3932365859449192e-05, "loss": 0.0187, "step": 70770 }, { "epoch": 0.5231956476745218, "grad_norm": 0.0848625898361206, "learning_rate": 2.3928656220322888e-05, "loss": 0.0209, "step": 70780 }, { "epoch": 0.5232695662458235, "grad_norm": 0.06197137385606766, "learning_rate": 2.3924946581196584e-05, "loss": 0.0179, "step": 70790 }, { "epoch": 0.5233434848171254, "grad_norm": 0.06887900829315186, "learning_rate": 2.3921236942070277e-05, "loss": 0.0174, "step": 70800 }, { "epoch": 0.5234174033884273, "grad_norm": 0.07631140947341919, "learning_rate": 2.391752730294397e-05, "loss": 0.0191, "step": 70810 }, { "epoch": 0.5234913219597291, "grad_norm": 0.08222679793834686, "learning_rate": 2.3913817663817665e-05, "loss": 0.0185, "step": 70820 }, { "epoch": 0.523565240531031, "grad_norm": 0.07283231616020203, "learning_rate": 2.3910108024691358e-05, "loss": 0.0171, "step": 70830 }, { "epoch": 0.5236391591023328, "grad_norm": 0.07829372584819794, "learning_rate": 2.3906398385565054e-05, "loss": 0.0179, "step": 70840 }, { "epoch": 0.5237130776736347, "grad_norm": 0.06830435246229172, "learning_rate": 2.3902688746438746e-05, "loss": 0.0199, "step": 70850 }, { "epoch": 0.5237869962449366, "grad_norm": 0.10674279183149338, "learning_rate": 2.3898979107312442e-05, "loss": 0.0214, "step": 70860 }, { "epoch": 0.5238609148162384, "grad_norm": 0.06788614392280579, "learning_rate": 2.3895269468186135e-05, "loss": 0.0171, "step": 70870 }, { "epoch": 0.5239348333875403, "grad_norm": 0.07488367706537247, "learning_rate": 2.389155982905983e-05, "loss": 0.0183, "step": 70880 }, { "epoch": 0.5240087519588421, "grad_norm": 0.07454147189855576, "learning_rate": 2.3887850189933524e-05, "loss": 0.0207, "step": 70890 }, { "epoch": 0.524082670530144, "grad_norm": 0.06669653952121735, "learning_rate": 2.388414055080722e-05, "loss": 0.019, "step": 70900 }, { "epoch": 0.5241565891014458, "grad_norm": 0.06841177493333817, "learning_rate": 2.3880430911680912e-05, "loss": 0.0172, "step": 70910 }, { "epoch": 0.5242305076727477, "grad_norm": 0.08513778448104858, "learning_rate": 2.3876721272554605e-05, "loss": 0.0191, "step": 70920 }, { "epoch": 0.5243044262440496, "grad_norm": 0.08537270873785019, "learning_rate": 2.38730116334283e-05, "loss": 0.0171, "step": 70930 }, { "epoch": 0.5243783448153514, "grad_norm": 0.09533053636550903, "learning_rate": 2.3869301994301997e-05, "loss": 0.0199, "step": 70940 }, { "epoch": 0.5244522633866533, "grad_norm": 0.07149159163236618, "learning_rate": 2.386559235517569e-05, "loss": 0.0168, "step": 70950 }, { "epoch": 0.5245261819579551, "grad_norm": 0.09240260720252991, "learning_rate": 2.3861882716049382e-05, "loss": 0.0193, "step": 70960 }, { "epoch": 0.524600100529257, "grad_norm": 0.07488304376602173, "learning_rate": 2.3858173076923078e-05, "loss": 0.0183, "step": 70970 }, { "epoch": 0.5246740191005588, "grad_norm": 0.061253637075424194, "learning_rate": 2.3854463437796774e-05, "loss": 0.0179, "step": 70980 }, { "epoch": 0.5247479376718607, "grad_norm": 0.08286695927381516, "learning_rate": 2.3850753798670466e-05, "loss": 0.0184, "step": 70990 }, { "epoch": 0.5248218562431626, "grad_norm": 0.09659498184919357, "learning_rate": 2.384704415954416e-05, "loss": 0.0179, "step": 71000 }, { "epoch": 0.5248957748144644, "grad_norm": 0.05578518658876419, "learning_rate": 2.3843334520417855e-05, "loss": 0.0157, "step": 71010 }, { "epoch": 0.5249696933857663, "grad_norm": 0.07591883838176727, "learning_rate": 2.383962488129155e-05, "loss": 0.0174, "step": 71020 }, { "epoch": 0.5250436119570681, "grad_norm": 0.08070017397403717, "learning_rate": 2.3835915242165243e-05, "loss": 0.0164, "step": 71030 }, { "epoch": 0.52511753052837, "grad_norm": 0.05172639340162277, "learning_rate": 2.3832205603038936e-05, "loss": 0.0175, "step": 71040 }, { "epoch": 0.5251914490996717, "grad_norm": 0.07358026504516602, "learning_rate": 2.3828495963912632e-05, "loss": 0.0172, "step": 71050 }, { "epoch": 0.5252653676709736, "grad_norm": 0.0738629549741745, "learning_rate": 2.3824786324786324e-05, "loss": 0.0167, "step": 71060 }, { "epoch": 0.5253392862422755, "grad_norm": 0.06652035564184189, "learning_rate": 2.382107668566002e-05, "loss": 0.0158, "step": 71070 }, { "epoch": 0.5254132048135773, "grad_norm": 0.09065684676170349, "learning_rate": 2.3817367046533713e-05, "loss": 0.0183, "step": 71080 }, { "epoch": 0.5254871233848792, "grad_norm": 0.09338533133268356, "learning_rate": 2.381365740740741e-05, "loss": 0.0197, "step": 71090 }, { "epoch": 0.525561041956181, "grad_norm": 0.07664430141448975, "learning_rate": 2.38099477682811e-05, "loss": 0.0191, "step": 71100 }, { "epoch": 0.5256349605274829, "grad_norm": 0.07986918091773987, "learning_rate": 2.3806238129154798e-05, "loss": 0.0176, "step": 71110 }, { "epoch": 0.5257088790987848, "grad_norm": 0.07862329483032227, "learning_rate": 2.380252849002849e-05, "loss": 0.0164, "step": 71120 }, { "epoch": 0.5257827976700866, "grad_norm": 0.05761149525642395, "learning_rate": 2.3798818850902186e-05, "loss": 0.0172, "step": 71130 }, { "epoch": 0.5258567162413885, "grad_norm": 0.06935084611177444, "learning_rate": 2.379510921177588e-05, "loss": 0.0172, "step": 71140 }, { "epoch": 0.5259306348126903, "grad_norm": 0.05792957916855812, "learning_rate": 2.379139957264957e-05, "loss": 0.0174, "step": 71150 }, { "epoch": 0.5260045533839922, "grad_norm": 0.07632586359977722, "learning_rate": 2.3787689933523267e-05, "loss": 0.0172, "step": 71160 }, { "epoch": 0.526078471955294, "grad_norm": 0.077678382396698, "learning_rate": 2.3783980294396963e-05, "loss": 0.0167, "step": 71170 }, { "epoch": 0.5261523905265959, "grad_norm": 0.09699144959449768, "learning_rate": 2.3780270655270656e-05, "loss": 0.018, "step": 71180 }, { "epoch": 0.5262263090978978, "grad_norm": 0.06520868092775345, "learning_rate": 2.377656101614435e-05, "loss": 0.0148, "step": 71190 }, { "epoch": 0.5263002276691996, "grad_norm": 0.07202000170946121, "learning_rate": 2.3772851377018048e-05, "loss": 0.0203, "step": 71200 }, { "epoch": 0.5263741462405015, "grad_norm": 0.09399239718914032, "learning_rate": 2.376914173789174e-05, "loss": 0.0187, "step": 71210 }, { "epoch": 0.5264480648118033, "grad_norm": 0.05629691854119301, "learning_rate": 2.3765432098765433e-05, "loss": 0.0174, "step": 71220 }, { "epoch": 0.5265219833831052, "grad_norm": 0.05375561863183975, "learning_rate": 2.3761722459639125e-05, "loss": 0.0181, "step": 71230 }, { "epoch": 0.526595901954407, "grad_norm": 0.07988496124744415, "learning_rate": 2.375801282051282e-05, "loss": 0.0199, "step": 71240 }, { "epoch": 0.5266698205257089, "grad_norm": 0.07610704749822617, "learning_rate": 2.3754303181386517e-05, "loss": 0.0152, "step": 71250 }, { "epoch": 0.5267437390970108, "grad_norm": 0.07854470610618591, "learning_rate": 2.375059354226021e-05, "loss": 0.0183, "step": 71260 }, { "epoch": 0.5268176576683126, "grad_norm": 0.06778319180011749, "learning_rate": 2.3746883903133903e-05, "loss": 0.0194, "step": 71270 }, { "epoch": 0.5268915762396145, "grad_norm": 0.06963464617729187, "learning_rate": 2.37431742640076e-05, "loss": 0.0188, "step": 71280 }, { "epoch": 0.5269654948109163, "grad_norm": 0.07358632236719131, "learning_rate": 2.373946462488129e-05, "loss": 0.0193, "step": 71290 }, { "epoch": 0.5270394133822182, "grad_norm": 0.07299153506755829, "learning_rate": 2.3735754985754987e-05, "loss": 0.0163, "step": 71300 }, { "epoch": 0.52711333195352, "grad_norm": 0.08467881381511688, "learning_rate": 2.373204534662868e-05, "loss": 0.0179, "step": 71310 }, { "epoch": 0.5271872505248218, "grad_norm": 0.07622227072715759, "learning_rate": 2.3728335707502376e-05, "loss": 0.0208, "step": 71320 }, { "epoch": 0.5272611690961237, "grad_norm": 0.10649682581424713, "learning_rate": 2.3724626068376068e-05, "loss": 0.0173, "step": 71330 }, { "epoch": 0.5273350876674255, "grad_norm": 0.07591233402490616, "learning_rate": 2.3720916429249764e-05, "loss": 0.0188, "step": 71340 }, { "epoch": 0.5274090062387274, "grad_norm": 0.12941525876522064, "learning_rate": 2.371720679012346e-05, "loss": 0.0203, "step": 71350 }, { "epoch": 0.5274829248100292, "grad_norm": 0.1032840758562088, "learning_rate": 2.3713497150997153e-05, "loss": 0.0162, "step": 71360 }, { "epoch": 0.5275568433813311, "grad_norm": 0.08252818137407303, "learning_rate": 2.3709787511870845e-05, "loss": 0.0187, "step": 71370 }, { "epoch": 0.527630761952633, "grad_norm": 0.0837445855140686, "learning_rate": 2.3706077872744538e-05, "loss": 0.0194, "step": 71380 }, { "epoch": 0.5277046805239348, "grad_norm": 0.05505933240056038, "learning_rate": 2.3702368233618237e-05, "loss": 0.0169, "step": 71390 }, { "epoch": 0.5277785990952367, "grad_norm": 0.0914740040898323, "learning_rate": 2.369865859449193e-05, "loss": 0.0184, "step": 71400 }, { "epoch": 0.5278525176665385, "grad_norm": 0.07132803648710251, "learning_rate": 2.3694948955365622e-05, "loss": 0.0189, "step": 71410 }, { "epoch": 0.5279264362378404, "grad_norm": 0.0907021313905716, "learning_rate": 2.3691239316239315e-05, "loss": 0.0182, "step": 71420 }, { "epoch": 0.5280003548091422, "grad_norm": 0.07915528118610382, "learning_rate": 2.3687529677113014e-05, "loss": 0.0167, "step": 71430 }, { "epoch": 0.5280742733804441, "grad_norm": 0.08348540216684341, "learning_rate": 2.3683820037986707e-05, "loss": 0.0177, "step": 71440 }, { "epoch": 0.528148191951746, "grad_norm": 0.07703004032373428, "learning_rate": 2.36801103988604e-05, "loss": 0.0207, "step": 71450 }, { "epoch": 0.5282221105230478, "grad_norm": 0.09287678450345993, "learning_rate": 2.3676400759734092e-05, "loss": 0.0185, "step": 71460 }, { "epoch": 0.5282960290943497, "grad_norm": 0.08298031985759735, "learning_rate": 2.3672691120607788e-05, "loss": 0.0172, "step": 71470 }, { "epoch": 0.5283699476656515, "grad_norm": 0.08304513990879059, "learning_rate": 2.3668981481481484e-05, "loss": 0.0167, "step": 71480 }, { "epoch": 0.5284438662369534, "grad_norm": 0.07882574200630188, "learning_rate": 2.3665271842355177e-05, "loss": 0.0181, "step": 71490 }, { "epoch": 0.5285177848082552, "grad_norm": 0.10319788008928299, "learning_rate": 2.3661562203228873e-05, "loss": 0.019, "step": 71500 }, { "epoch": 0.5285917033795571, "grad_norm": 0.0673266127705574, "learning_rate": 2.3657852564102565e-05, "loss": 0.0204, "step": 71510 }, { "epoch": 0.528665621950859, "grad_norm": 0.08062201738357544, "learning_rate": 2.3654142924976258e-05, "loss": 0.0164, "step": 71520 }, { "epoch": 0.5287395405221608, "grad_norm": 0.10110322386026382, "learning_rate": 2.3650433285849954e-05, "loss": 0.0206, "step": 71530 }, { "epoch": 0.5288134590934627, "grad_norm": 0.05925064533948898, "learning_rate": 2.364672364672365e-05, "loss": 0.018, "step": 71540 }, { "epoch": 0.5288873776647645, "grad_norm": 0.0888727605342865, "learning_rate": 2.3643014007597342e-05, "loss": 0.0189, "step": 71550 }, { "epoch": 0.5289612962360664, "grad_norm": 0.05731735751032829, "learning_rate": 2.3639304368471035e-05, "loss": 0.0154, "step": 71560 }, { "epoch": 0.5290352148073681, "grad_norm": 0.06728977710008621, "learning_rate": 2.363559472934473e-05, "loss": 0.0167, "step": 71570 }, { "epoch": 0.52910913337867, "grad_norm": 0.1000228300690651, "learning_rate": 2.3631885090218427e-05, "loss": 0.0181, "step": 71580 }, { "epoch": 0.529183051949972, "grad_norm": 0.09549673646688461, "learning_rate": 2.362817545109212e-05, "loss": 0.0193, "step": 71590 }, { "epoch": 0.5292569705212737, "grad_norm": 0.11185158789157867, "learning_rate": 2.3624465811965812e-05, "loss": 0.0201, "step": 71600 }, { "epoch": 0.5293308890925756, "grad_norm": 0.06617502868175507, "learning_rate": 2.3620756172839504e-05, "loss": 0.0172, "step": 71610 }, { "epoch": 0.5294048076638774, "grad_norm": 0.062105994671583176, "learning_rate": 2.3617046533713204e-05, "loss": 0.0166, "step": 71620 }, { "epoch": 0.5294787262351793, "grad_norm": 0.09233871847391129, "learning_rate": 2.3613336894586896e-05, "loss": 0.0184, "step": 71630 }, { "epoch": 0.5295526448064812, "grad_norm": 0.06355202198028564, "learning_rate": 2.360962725546059e-05, "loss": 0.0172, "step": 71640 }, { "epoch": 0.529626563377783, "grad_norm": 0.0670185461640358, "learning_rate": 2.3605917616334285e-05, "loss": 0.0195, "step": 71650 }, { "epoch": 0.5297004819490849, "grad_norm": 0.07617296278476715, "learning_rate": 2.360220797720798e-05, "loss": 0.0179, "step": 71660 }, { "epoch": 0.5297744005203867, "grad_norm": 0.055757950991392136, "learning_rate": 2.3598498338081673e-05, "loss": 0.0173, "step": 71670 }, { "epoch": 0.5298483190916886, "grad_norm": 0.0978802815079689, "learning_rate": 2.3594788698955366e-05, "loss": 0.0163, "step": 71680 }, { "epoch": 0.5299222376629904, "grad_norm": 0.06614841520786285, "learning_rate": 2.3591079059829062e-05, "loss": 0.0178, "step": 71690 }, { "epoch": 0.5299961562342923, "grad_norm": 0.10969319939613342, "learning_rate": 2.3587369420702755e-05, "loss": 0.0189, "step": 71700 }, { "epoch": 0.5300700748055942, "grad_norm": 0.07790661603212357, "learning_rate": 2.358365978157645e-05, "loss": 0.0183, "step": 71710 }, { "epoch": 0.530143993376896, "grad_norm": 0.04722243547439575, "learning_rate": 2.3579950142450143e-05, "loss": 0.016, "step": 71720 }, { "epoch": 0.5302179119481979, "grad_norm": 0.09032213687896729, "learning_rate": 2.357624050332384e-05, "loss": 0.0155, "step": 71730 }, { "epoch": 0.5302918305194997, "grad_norm": 0.07642046362161636, "learning_rate": 2.357253086419753e-05, "loss": 0.0162, "step": 71740 }, { "epoch": 0.5303657490908016, "grad_norm": 0.1244816929101944, "learning_rate": 2.3568821225071228e-05, "loss": 0.0197, "step": 71750 }, { "epoch": 0.5304396676621034, "grad_norm": 0.0643230676651001, "learning_rate": 2.356511158594492e-05, "loss": 0.0167, "step": 71760 }, { "epoch": 0.5305135862334053, "grad_norm": 0.07190030813217163, "learning_rate": 2.3561401946818616e-05, "loss": 0.0185, "step": 71770 }, { "epoch": 0.5305875048047072, "grad_norm": 0.06764136254787445, "learning_rate": 2.355769230769231e-05, "loss": 0.0198, "step": 71780 }, { "epoch": 0.530661423376009, "grad_norm": 0.10977832227945328, "learning_rate": 2.3553982668566e-05, "loss": 0.0203, "step": 71790 }, { "epoch": 0.5307353419473109, "grad_norm": 0.09612040966749191, "learning_rate": 2.3550273029439697e-05, "loss": 0.0182, "step": 71800 }, { "epoch": 0.5308092605186127, "grad_norm": 0.08732334524393082, "learning_rate": 2.3546563390313393e-05, "loss": 0.0179, "step": 71810 }, { "epoch": 0.5308831790899146, "grad_norm": 0.0612306222319603, "learning_rate": 2.3542853751187086e-05, "loss": 0.0192, "step": 71820 }, { "epoch": 0.5309570976612165, "grad_norm": 0.08263002336025238, "learning_rate": 2.353914411206078e-05, "loss": 0.0188, "step": 71830 }, { "epoch": 0.5310310162325182, "grad_norm": 0.06137899309396744, "learning_rate": 2.3535434472934474e-05, "loss": 0.0191, "step": 71840 }, { "epoch": 0.5311049348038202, "grad_norm": 0.07601217925548553, "learning_rate": 2.353172483380817e-05, "loss": 0.0164, "step": 71850 }, { "epoch": 0.5311788533751219, "grad_norm": 0.07243067026138306, "learning_rate": 2.3528015194681863e-05, "loss": 0.0178, "step": 71860 }, { "epoch": 0.5312527719464238, "grad_norm": 0.09085898101329803, "learning_rate": 2.3524305555555556e-05, "loss": 0.0178, "step": 71870 }, { "epoch": 0.5313266905177256, "grad_norm": 0.09606260061264038, "learning_rate": 2.352059591642925e-05, "loss": 0.0207, "step": 71880 }, { "epoch": 0.5314006090890275, "grad_norm": 0.05547720566391945, "learning_rate": 2.3516886277302947e-05, "loss": 0.0174, "step": 71890 }, { "epoch": 0.5314745276603294, "grad_norm": 0.08246821910142899, "learning_rate": 2.351317663817664e-05, "loss": 0.0177, "step": 71900 }, { "epoch": 0.5315484462316312, "grad_norm": 0.11368558555841446, "learning_rate": 2.3509466999050333e-05, "loss": 0.0159, "step": 71910 }, { "epoch": 0.5316223648029331, "grad_norm": 0.09380337595939636, "learning_rate": 2.350575735992403e-05, "loss": 0.0168, "step": 71920 }, { "epoch": 0.5316962833742349, "grad_norm": 0.05818706005811691, "learning_rate": 2.350204772079772e-05, "loss": 0.0184, "step": 71930 }, { "epoch": 0.5317702019455368, "grad_norm": 0.08017941564321518, "learning_rate": 2.3498338081671417e-05, "loss": 0.0175, "step": 71940 }, { "epoch": 0.5318441205168386, "grad_norm": 0.08289653807878494, "learning_rate": 2.349462844254511e-05, "loss": 0.0156, "step": 71950 }, { "epoch": 0.5319180390881405, "grad_norm": 0.06446444988250732, "learning_rate": 2.3490918803418806e-05, "loss": 0.0161, "step": 71960 }, { "epoch": 0.5319919576594424, "grad_norm": 0.07639432698488235, "learning_rate": 2.3487209164292498e-05, "loss": 0.0171, "step": 71970 }, { "epoch": 0.5320658762307442, "grad_norm": 0.049774058163166046, "learning_rate": 2.3483499525166194e-05, "loss": 0.0187, "step": 71980 }, { "epoch": 0.5321397948020461, "grad_norm": 0.09060239046812057, "learning_rate": 2.3479789886039887e-05, "loss": 0.0178, "step": 71990 }, { "epoch": 0.5322137133733479, "grad_norm": 0.08577951043844223, "learning_rate": 2.3476080246913583e-05, "loss": 0.0203, "step": 72000 }, { "epoch": 0.5322876319446498, "grad_norm": 0.08357306569814682, "learning_rate": 2.3472370607787275e-05, "loss": 0.0189, "step": 72010 }, { "epoch": 0.5323615505159516, "grad_norm": 0.0907038152217865, "learning_rate": 2.3468660968660968e-05, "loss": 0.0179, "step": 72020 }, { "epoch": 0.5324354690872535, "grad_norm": 0.07584109157323837, "learning_rate": 2.3464951329534664e-05, "loss": 0.0181, "step": 72030 }, { "epoch": 0.5325093876585554, "grad_norm": 0.06174978241324425, "learning_rate": 2.346124169040836e-05, "loss": 0.0189, "step": 72040 }, { "epoch": 0.5325833062298572, "grad_norm": 0.07204340398311615, "learning_rate": 2.3457532051282052e-05, "loss": 0.0179, "step": 72050 }, { "epoch": 0.5326572248011591, "grad_norm": 0.07583874464035034, "learning_rate": 2.3453822412155745e-05, "loss": 0.0156, "step": 72060 }, { "epoch": 0.5327311433724609, "grad_norm": 0.07981467247009277, "learning_rate": 2.345011277302944e-05, "loss": 0.0189, "step": 72070 }, { "epoch": 0.5328050619437628, "grad_norm": 0.0859747901558876, "learning_rate": 2.3446403133903137e-05, "loss": 0.0169, "step": 72080 }, { "epoch": 0.5328789805150647, "grad_norm": 0.060712117701768875, "learning_rate": 2.344269349477683e-05, "loss": 0.0171, "step": 72090 }, { "epoch": 0.5329528990863664, "grad_norm": 0.10640475898981094, "learning_rate": 2.3438983855650522e-05, "loss": 0.0203, "step": 72100 }, { "epoch": 0.5330268176576684, "grad_norm": 0.07040079683065414, "learning_rate": 2.3435274216524218e-05, "loss": 0.0169, "step": 72110 }, { "epoch": 0.5331007362289701, "grad_norm": 0.06555905193090439, "learning_rate": 2.3431564577397914e-05, "loss": 0.0183, "step": 72120 }, { "epoch": 0.533174654800272, "grad_norm": 0.0838426873087883, "learning_rate": 2.3427854938271607e-05, "loss": 0.0167, "step": 72130 }, { "epoch": 0.5332485733715738, "grad_norm": 0.061927784234285355, "learning_rate": 2.34241452991453e-05, "loss": 0.0163, "step": 72140 }, { "epoch": 0.5333224919428757, "grad_norm": 0.08208876848220825, "learning_rate": 2.3420435660018995e-05, "loss": 0.0173, "step": 72150 }, { "epoch": 0.5333964105141776, "grad_norm": 0.0723063200712204, "learning_rate": 2.3416726020892688e-05, "loss": 0.0197, "step": 72160 }, { "epoch": 0.5334703290854794, "grad_norm": 0.07213089615106583, "learning_rate": 2.3413016381766384e-05, "loss": 0.0176, "step": 72170 }, { "epoch": 0.5335442476567813, "grad_norm": 0.08938119560480118, "learning_rate": 2.3409306742640076e-05, "loss": 0.0189, "step": 72180 }, { "epoch": 0.5336181662280831, "grad_norm": 0.0909256711602211, "learning_rate": 2.3405597103513772e-05, "loss": 0.0197, "step": 72190 }, { "epoch": 0.533692084799385, "grad_norm": 0.06849884241819382, "learning_rate": 2.3401887464387465e-05, "loss": 0.0187, "step": 72200 }, { "epoch": 0.5337660033706868, "grad_norm": 0.0745333582162857, "learning_rate": 2.339817782526116e-05, "loss": 0.0177, "step": 72210 }, { "epoch": 0.5338399219419887, "grad_norm": 0.074846550822258, "learning_rate": 2.3394468186134853e-05, "loss": 0.0169, "step": 72220 }, { "epoch": 0.5339138405132906, "grad_norm": 0.060825783759355545, "learning_rate": 2.339075854700855e-05, "loss": 0.0164, "step": 72230 }, { "epoch": 0.5339877590845924, "grad_norm": 0.07052914053201675, "learning_rate": 2.3387048907882242e-05, "loss": 0.0169, "step": 72240 }, { "epoch": 0.5340616776558943, "grad_norm": 0.07752663642168045, "learning_rate": 2.3383339268755935e-05, "loss": 0.0192, "step": 72250 }, { "epoch": 0.5341355962271961, "grad_norm": 0.08008281141519547, "learning_rate": 2.337962962962963e-05, "loss": 0.0165, "step": 72260 }, { "epoch": 0.534209514798498, "grad_norm": 0.08343818038702011, "learning_rate": 2.3375919990503326e-05, "loss": 0.0201, "step": 72270 }, { "epoch": 0.5342834333697998, "grad_norm": 0.08409687876701355, "learning_rate": 2.337221035137702e-05, "loss": 0.0169, "step": 72280 }, { "epoch": 0.5343573519411017, "grad_norm": 0.08192925155162811, "learning_rate": 2.336850071225071e-05, "loss": 0.0179, "step": 72290 }, { "epoch": 0.5344312705124036, "grad_norm": 0.08260039240121841, "learning_rate": 2.3364791073124408e-05, "loss": 0.019, "step": 72300 }, { "epoch": 0.5345051890837054, "grad_norm": 0.06475205719470978, "learning_rate": 2.3361081433998104e-05, "loss": 0.0187, "step": 72310 }, { "epoch": 0.5345791076550073, "grad_norm": 0.05737556517124176, "learning_rate": 2.3357371794871796e-05, "loss": 0.0166, "step": 72320 }, { "epoch": 0.5346530262263091, "grad_norm": 0.0827866718173027, "learning_rate": 2.335366215574549e-05, "loss": 0.0195, "step": 72330 }, { "epoch": 0.534726944797611, "grad_norm": 0.055962737649679184, "learning_rate": 2.3349952516619185e-05, "loss": 0.0184, "step": 72340 }, { "epoch": 0.5348008633689129, "grad_norm": 0.08469745516777039, "learning_rate": 2.334624287749288e-05, "loss": 0.0192, "step": 72350 }, { "epoch": 0.5348747819402146, "grad_norm": 0.06490087509155273, "learning_rate": 2.3342533238366573e-05, "loss": 0.0176, "step": 72360 }, { "epoch": 0.5349487005115166, "grad_norm": 0.07816960662603378, "learning_rate": 2.3338823599240266e-05, "loss": 0.0179, "step": 72370 }, { "epoch": 0.5350226190828183, "grad_norm": 0.05101162567734718, "learning_rate": 2.3335113960113962e-05, "loss": 0.0166, "step": 72380 }, { "epoch": 0.5350965376541202, "grad_norm": 0.10295893996953964, "learning_rate": 2.3331404320987654e-05, "loss": 0.0182, "step": 72390 }, { "epoch": 0.535170456225422, "grad_norm": 0.0677361711859703, "learning_rate": 2.332769468186135e-05, "loss": 0.02, "step": 72400 }, { "epoch": 0.5352443747967239, "grad_norm": 0.08023238182067871, "learning_rate": 2.3323985042735043e-05, "loss": 0.0162, "step": 72410 }, { "epoch": 0.5353182933680258, "grad_norm": 0.08963119983673096, "learning_rate": 2.332027540360874e-05, "loss": 0.018, "step": 72420 }, { "epoch": 0.5353922119393276, "grad_norm": 0.08507431298494339, "learning_rate": 2.331656576448243e-05, "loss": 0.0187, "step": 72430 }, { "epoch": 0.5354661305106295, "grad_norm": 0.12224476784467697, "learning_rate": 2.3312856125356127e-05, "loss": 0.02, "step": 72440 }, { "epoch": 0.5355400490819313, "grad_norm": 0.0635685995221138, "learning_rate": 2.330914648622982e-05, "loss": 0.0165, "step": 72450 }, { "epoch": 0.5356139676532332, "grad_norm": 0.05425122380256653, "learning_rate": 2.3305436847103516e-05, "loss": 0.0177, "step": 72460 }, { "epoch": 0.535687886224535, "grad_norm": 0.08225373178720474, "learning_rate": 2.330172720797721e-05, "loss": 0.017, "step": 72470 }, { "epoch": 0.5357618047958369, "grad_norm": 0.0681522786617279, "learning_rate": 2.32980175688509e-05, "loss": 0.017, "step": 72480 }, { "epoch": 0.5358357233671388, "grad_norm": 0.060225699096918106, "learning_rate": 2.3294307929724597e-05, "loss": 0.0161, "step": 72490 }, { "epoch": 0.5359096419384406, "grad_norm": 0.0953868180513382, "learning_rate": 2.3290598290598293e-05, "loss": 0.0185, "step": 72500 }, { "epoch": 0.5359835605097425, "grad_norm": 0.10979098081588745, "learning_rate": 2.3286888651471986e-05, "loss": 0.0207, "step": 72510 }, { "epoch": 0.5360574790810443, "grad_norm": 0.10969498008489609, "learning_rate": 2.3283179012345678e-05, "loss": 0.0182, "step": 72520 }, { "epoch": 0.5361313976523462, "grad_norm": 0.06657572090625763, "learning_rate": 2.3279469373219374e-05, "loss": 0.0179, "step": 72530 }, { "epoch": 0.536205316223648, "grad_norm": 0.08106397092342377, "learning_rate": 2.327575973409307e-05, "loss": 0.0164, "step": 72540 }, { "epoch": 0.5362792347949499, "grad_norm": 0.08693154901266098, "learning_rate": 2.3272050094966763e-05, "loss": 0.0187, "step": 72550 }, { "epoch": 0.5363531533662518, "grad_norm": 0.12388740479946136, "learning_rate": 2.3268340455840455e-05, "loss": 0.0196, "step": 72560 }, { "epoch": 0.5364270719375536, "grad_norm": 0.07695605605840683, "learning_rate": 2.326463081671415e-05, "loss": 0.0179, "step": 72570 }, { "epoch": 0.5365009905088555, "grad_norm": 0.07933904975652695, "learning_rate": 2.3260921177587847e-05, "loss": 0.0168, "step": 72580 }, { "epoch": 0.5365749090801573, "grad_norm": 0.07312914729118347, "learning_rate": 2.325721153846154e-05, "loss": 0.0173, "step": 72590 }, { "epoch": 0.5366488276514592, "grad_norm": 0.06449300050735474, "learning_rate": 2.3253501899335232e-05, "loss": 0.0192, "step": 72600 }, { "epoch": 0.5367227462227611, "grad_norm": 0.07357765734195709, "learning_rate": 2.324979226020893e-05, "loss": 0.0183, "step": 72610 }, { "epoch": 0.5367966647940628, "grad_norm": 0.06910233199596405, "learning_rate": 2.324608262108262e-05, "loss": 0.0186, "step": 72620 }, { "epoch": 0.5368705833653648, "grad_norm": 0.06882346421480179, "learning_rate": 2.3242372981956317e-05, "loss": 0.0181, "step": 72630 }, { "epoch": 0.5369445019366665, "grad_norm": 0.08777708560228348, "learning_rate": 2.323866334283001e-05, "loss": 0.0164, "step": 72640 }, { "epoch": 0.5370184205079684, "grad_norm": 0.07195254415273666, "learning_rate": 2.3234953703703705e-05, "loss": 0.0182, "step": 72650 }, { "epoch": 0.5370923390792702, "grad_norm": 0.06659919023513794, "learning_rate": 2.3231244064577398e-05, "loss": 0.0167, "step": 72660 }, { "epoch": 0.5371662576505721, "grad_norm": 0.07043921202421188, "learning_rate": 2.3227534425451094e-05, "loss": 0.0192, "step": 72670 }, { "epoch": 0.537240176221874, "grad_norm": 0.061976149678230286, "learning_rate": 2.3223824786324787e-05, "loss": 0.0167, "step": 72680 }, { "epoch": 0.5373140947931758, "grad_norm": 0.08181758224964142, "learning_rate": 2.3220115147198483e-05, "loss": 0.0163, "step": 72690 }, { "epoch": 0.5373880133644777, "grad_norm": 0.07538007944822311, "learning_rate": 2.3216405508072175e-05, "loss": 0.0187, "step": 72700 }, { "epoch": 0.5374619319357795, "grad_norm": 0.07163660228252411, "learning_rate": 2.3212695868945868e-05, "loss": 0.0183, "step": 72710 }, { "epoch": 0.5375358505070814, "grad_norm": 0.08359681069850922, "learning_rate": 2.3208986229819567e-05, "loss": 0.0197, "step": 72720 }, { "epoch": 0.5376097690783832, "grad_norm": 0.06933805346488953, "learning_rate": 2.320527659069326e-05, "loss": 0.0177, "step": 72730 }, { "epoch": 0.5376836876496851, "grad_norm": 0.09602054208517075, "learning_rate": 2.3201566951566952e-05, "loss": 0.0184, "step": 72740 }, { "epoch": 0.537757606220987, "grad_norm": 0.0886077880859375, "learning_rate": 2.3197857312440645e-05, "loss": 0.0196, "step": 72750 }, { "epoch": 0.5378315247922888, "grad_norm": 0.08779918402433395, "learning_rate": 2.319414767331434e-05, "loss": 0.0186, "step": 72760 }, { "epoch": 0.5379054433635907, "grad_norm": 0.06885647773742676, "learning_rate": 2.3190438034188037e-05, "loss": 0.0171, "step": 72770 }, { "epoch": 0.5379793619348925, "grad_norm": 0.08270540088415146, "learning_rate": 2.318672839506173e-05, "loss": 0.0195, "step": 72780 }, { "epoch": 0.5380532805061944, "grad_norm": 0.08925561606884003, "learning_rate": 2.3183018755935422e-05, "loss": 0.0164, "step": 72790 }, { "epoch": 0.5381271990774962, "grad_norm": 0.08344494551420212, "learning_rate": 2.3179309116809118e-05, "loss": 0.0172, "step": 72800 }, { "epoch": 0.5382011176487981, "grad_norm": 0.08896711468696594, "learning_rate": 2.3175599477682814e-05, "loss": 0.0166, "step": 72810 }, { "epoch": 0.5382750362201, "grad_norm": 0.07909851521253586, "learning_rate": 2.3171889838556506e-05, "loss": 0.0154, "step": 72820 }, { "epoch": 0.5383489547914018, "grad_norm": 0.06761901080608368, "learning_rate": 2.31681801994302e-05, "loss": 0.0176, "step": 72830 }, { "epoch": 0.5384228733627037, "grad_norm": 0.09635329991579056, "learning_rate": 2.3164470560303895e-05, "loss": 0.017, "step": 72840 }, { "epoch": 0.5384967919340055, "grad_norm": 0.08209284394979477, "learning_rate": 2.3160760921177587e-05, "loss": 0.0182, "step": 72850 }, { "epoch": 0.5385707105053074, "grad_norm": 0.07711105048656464, "learning_rate": 2.3157051282051283e-05, "loss": 0.0195, "step": 72860 }, { "epoch": 0.5386446290766093, "grad_norm": 0.07249467819929123, "learning_rate": 2.315334164292498e-05, "loss": 0.0167, "step": 72870 }, { "epoch": 0.538718547647911, "grad_norm": 0.07699708640575409, "learning_rate": 2.3149632003798672e-05, "loss": 0.019, "step": 72880 }, { "epoch": 0.538792466219213, "grad_norm": 0.08737131953239441, "learning_rate": 2.3145922364672365e-05, "loss": 0.0168, "step": 72890 }, { "epoch": 0.5388663847905147, "grad_norm": 0.0935729444026947, "learning_rate": 2.314221272554606e-05, "loss": 0.0189, "step": 72900 }, { "epoch": 0.5389403033618166, "grad_norm": 0.07913996279239655, "learning_rate": 2.3138503086419757e-05, "loss": 0.0166, "step": 72910 }, { "epoch": 0.5390142219331184, "grad_norm": 0.07874778658151627, "learning_rate": 2.313479344729345e-05, "loss": 0.0185, "step": 72920 }, { "epoch": 0.5390881405044203, "grad_norm": 0.059319932013750076, "learning_rate": 2.313108380816714e-05, "loss": 0.0187, "step": 72930 }, { "epoch": 0.5391620590757222, "grad_norm": 0.06820226460695267, "learning_rate": 2.3127374169040834e-05, "loss": 0.0194, "step": 72940 }, { "epoch": 0.539235977647024, "grad_norm": 0.0838264748454094, "learning_rate": 2.3123664529914534e-05, "loss": 0.0191, "step": 72950 }, { "epoch": 0.5393098962183259, "grad_norm": 0.08863027393817902, "learning_rate": 2.3119954890788226e-05, "loss": 0.0175, "step": 72960 }, { "epoch": 0.5393838147896277, "grad_norm": 0.0833144411444664, "learning_rate": 2.311624525166192e-05, "loss": 0.0172, "step": 72970 }, { "epoch": 0.5394577333609296, "grad_norm": 0.07805544883012772, "learning_rate": 2.311253561253561e-05, "loss": 0.0163, "step": 72980 }, { "epoch": 0.5395316519322314, "grad_norm": 0.12435826659202576, "learning_rate": 2.3108825973409307e-05, "loss": 0.0194, "step": 72990 }, { "epoch": 0.5396055705035333, "grad_norm": 0.06662425398826599, "learning_rate": 2.3105116334283003e-05, "loss": 0.0184, "step": 73000 }, { "epoch": 0.5396794890748352, "grad_norm": 0.06451261788606644, "learning_rate": 2.3101406695156696e-05, "loss": 0.0186, "step": 73010 }, { "epoch": 0.539753407646137, "grad_norm": 0.09885028004646301, "learning_rate": 2.3097697056030392e-05, "loss": 0.018, "step": 73020 }, { "epoch": 0.5398273262174389, "grad_norm": 0.0644676461815834, "learning_rate": 2.3093987416904084e-05, "loss": 0.0179, "step": 73030 }, { "epoch": 0.5399012447887407, "grad_norm": 0.07263324409723282, "learning_rate": 2.309027777777778e-05, "loss": 0.0189, "step": 73040 }, { "epoch": 0.5399751633600426, "grad_norm": 0.07130710780620575, "learning_rate": 2.3086568138651473e-05, "loss": 0.018, "step": 73050 }, { "epoch": 0.5400490819313444, "grad_norm": 0.1313527673482895, "learning_rate": 2.308285849952517e-05, "loss": 0.0164, "step": 73060 }, { "epoch": 0.5401230005026463, "grad_norm": 0.11921053379774094, "learning_rate": 2.307914886039886e-05, "loss": 0.0199, "step": 73070 }, { "epoch": 0.5401969190739482, "grad_norm": 0.071078822016716, "learning_rate": 2.3075439221272554e-05, "loss": 0.0165, "step": 73080 }, { "epoch": 0.54027083764525, "grad_norm": 0.06688307225704193, "learning_rate": 2.307172958214625e-05, "loss": 0.0176, "step": 73090 }, { "epoch": 0.5403447562165519, "grad_norm": 0.07644398510456085, "learning_rate": 2.3068019943019946e-05, "loss": 0.0185, "step": 73100 }, { "epoch": 0.5404186747878537, "grad_norm": 0.08246079087257385, "learning_rate": 2.306431030389364e-05, "loss": 0.0169, "step": 73110 }, { "epoch": 0.5404925933591556, "grad_norm": 0.08127640187740326, "learning_rate": 2.306060066476733e-05, "loss": 0.0193, "step": 73120 }, { "epoch": 0.5405665119304575, "grad_norm": 0.058930739760398865, "learning_rate": 2.3056891025641027e-05, "loss": 0.0174, "step": 73130 }, { "epoch": 0.5406404305017593, "grad_norm": 0.09932822734117508, "learning_rate": 2.3053181386514723e-05, "loss": 0.0159, "step": 73140 }, { "epoch": 0.5407143490730612, "grad_norm": 0.07006815075874329, "learning_rate": 2.3049471747388416e-05, "loss": 0.0179, "step": 73150 }, { "epoch": 0.5407882676443629, "grad_norm": 0.06904808431863785, "learning_rate": 2.3045762108262108e-05, "loss": 0.0206, "step": 73160 }, { "epoch": 0.5408621862156648, "grad_norm": 0.0655849501490593, "learning_rate": 2.30420524691358e-05, "loss": 0.0176, "step": 73170 }, { "epoch": 0.5409361047869666, "grad_norm": 0.11681249737739563, "learning_rate": 2.30383428300095e-05, "loss": 0.0182, "step": 73180 }, { "epoch": 0.5410100233582685, "grad_norm": 0.08647429943084717, "learning_rate": 2.3034633190883193e-05, "loss": 0.0184, "step": 73190 }, { "epoch": 0.5410839419295704, "grad_norm": 0.11153209954500198, "learning_rate": 2.3030923551756885e-05, "loss": 0.0198, "step": 73200 }, { "epoch": 0.5411578605008722, "grad_norm": 0.07431022077798843, "learning_rate": 2.302721391263058e-05, "loss": 0.0156, "step": 73210 }, { "epoch": 0.5412317790721741, "grad_norm": 0.08188775926828384, "learning_rate": 2.3023504273504274e-05, "loss": 0.0171, "step": 73220 }, { "epoch": 0.5413056976434759, "grad_norm": 0.07274893671274185, "learning_rate": 2.301979463437797e-05, "loss": 0.0163, "step": 73230 }, { "epoch": 0.5413796162147778, "grad_norm": 0.1031305342912674, "learning_rate": 2.3016084995251662e-05, "loss": 0.0172, "step": 73240 }, { "epoch": 0.5414535347860796, "grad_norm": 0.06862304359674454, "learning_rate": 2.301237535612536e-05, "loss": 0.0178, "step": 73250 }, { "epoch": 0.5415274533573815, "grad_norm": 0.06975569576025009, "learning_rate": 2.300866571699905e-05, "loss": 0.0178, "step": 73260 }, { "epoch": 0.5416013719286834, "grad_norm": 0.08459268510341644, "learning_rate": 2.3004956077872747e-05, "loss": 0.0165, "step": 73270 }, { "epoch": 0.5416752904999852, "grad_norm": 0.07779370993375778, "learning_rate": 2.300124643874644e-05, "loss": 0.0167, "step": 73280 }, { "epoch": 0.5417492090712871, "grad_norm": 0.05710046738386154, "learning_rate": 2.2997536799620135e-05, "loss": 0.0166, "step": 73290 }, { "epoch": 0.5418231276425889, "grad_norm": 0.07504494488239288, "learning_rate": 2.2993827160493828e-05, "loss": 0.0149, "step": 73300 }, { "epoch": 0.5418970462138908, "grad_norm": 0.07329048216342926, "learning_rate": 2.299011752136752e-05, "loss": 0.0172, "step": 73310 }, { "epoch": 0.5419709647851926, "grad_norm": 0.06944195926189423, "learning_rate": 2.2986407882241217e-05, "loss": 0.0191, "step": 73320 }, { "epoch": 0.5420448833564945, "grad_norm": 0.07709192484617233, "learning_rate": 2.2982698243114913e-05, "loss": 0.0163, "step": 73330 }, { "epoch": 0.5421188019277964, "grad_norm": 0.09037928283214569, "learning_rate": 2.2978988603988605e-05, "loss": 0.0161, "step": 73340 }, { "epoch": 0.5421927204990982, "grad_norm": 0.1045730784535408, "learning_rate": 2.2975278964862298e-05, "loss": 0.0183, "step": 73350 }, { "epoch": 0.5422666390704001, "grad_norm": 0.08890021592378616, "learning_rate": 2.2971569325735994e-05, "loss": 0.0198, "step": 73360 }, { "epoch": 0.5423405576417019, "grad_norm": 0.08284857124090195, "learning_rate": 2.296785968660969e-05, "loss": 0.0196, "step": 73370 }, { "epoch": 0.5424144762130038, "grad_norm": 0.09294552356004715, "learning_rate": 2.2964150047483382e-05, "loss": 0.0199, "step": 73380 }, { "epoch": 0.5424883947843057, "grad_norm": 0.07610155642032623, "learning_rate": 2.2960440408357075e-05, "loss": 0.0185, "step": 73390 }, { "epoch": 0.5425623133556075, "grad_norm": 0.07990909367799759, "learning_rate": 2.295673076923077e-05, "loss": 0.0185, "step": 73400 }, { "epoch": 0.5426362319269094, "grad_norm": 0.08958449959754944, "learning_rate": 2.2953021130104467e-05, "loss": 0.0158, "step": 73410 }, { "epoch": 0.5427101504982111, "grad_norm": 0.07816076278686523, "learning_rate": 2.294931149097816e-05, "loss": 0.0187, "step": 73420 }, { "epoch": 0.542784069069513, "grad_norm": 0.05900447815656662, "learning_rate": 2.2945601851851852e-05, "loss": 0.0175, "step": 73430 }, { "epoch": 0.5428579876408148, "grad_norm": 0.0727897435426712, "learning_rate": 2.2941892212725548e-05, "loss": 0.0194, "step": 73440 }, { "epoch": 0.5429319062121167, "grad_norm": 0.10008365660905838, "learning_rate": 2.293818257359924e-05, "loss": 0.0181, "step": 73450 }, { "epoch": 0.5430058247834186, "grad_norm": 0.07405528426170349, "learning_rate": 2.2934472934472936e-05, "loss": 0.0165, "step": 73460 }, { "epoch": 0.5430797433547204, "grad_norm": 0.0775710791349411, "learning_rate": 2.293076329534663e-05, "loss": 0.0171, "step": 73470 }, { "epoch": 0.5431536619260223, "grad_norm": 0.07803450524806976, "learning_rate": 2.2927053656220325e-05, "loss": 0.0155, "step": 73480 }, { "epoch": 0.5432275804973241, "grad_norm": 0.0809224396944046, "learning_rate": 2.2923344017094018e-05, "loss": 0.0186, "step": 73490 }, { "epoch": 0.543301499068626, "grad_norm": 0.10041411966085434, "learning_rate": 2.2919634377967714e-05, "loss": 0.0174, "step": 73500 }, { "epoch": 0.5433754176399278, "grad_norm": 0.07222852855920792, "learning_rate": 2.2915924738841406e-05, "loss": 0.0213, "step": 73510 }, { "epoch": 0.5434493362112297, "grad_norm": 0.09778133034706116, "learning_rate": 2.2912215099715102e-05, "loss": 0.02, "step": 73520 }, { "epoch": 0.5435232547825316, "grad_norm": 0.07030012458562851, "learning_rate": 2.2908505460588795e-05, "loss": 0.0199, "step": 73530 }, { "epoch": 0.5435971733538334, "grad_norm": 0.06923951208591461, "learning_rate": 2.2904795821462487e-05, "loss": 0.0184, "step": 73540 }, { "epoch": 0.5436710919251353, "grad_norm": 0.08172609657049179, "learning_rate": 2.2901086182336183e-05, "loss": 0.0168, "step": 73550 }, { "epoch": 0.5437450104964371, "grad_norm": 0.07019809633493423, "learning_rate": 2.289737654320988e-05, "loss": 0.0181, "step": 73560 }, { "epoch": 0.543818929067739, "grad_norm": 0.08588024228811264, "learning_rate": 2.2893666904083572e-05, "loss": 0.0191, "step": 73570 }, { "epoch": 0.5438928476390408, "grad_norm": 0.07677468657493591, "learning_rate": 2.2889957264957264e-05, "loss": 0.0174, "step": 73580 }, { "epoch": 0.5439667662103427, "grad_norm": 0.07189089059829712, "learning_rate": 2.288624762583096e-05, "loss": 0.0161, "step": 73590 }, { "epoch": 0.5440406847816446, "grad_norm": 0.0762021616101265, "learning_rate": 2.2882537986704656e-05, "loss": 0.0164, "step": 73600 }, { "epoch": 0.5441146033529464, "grad_norm": 0.08038612455129623, "learning_rate": 2.287882834757835e-05, "loss": 0.0207, "step": 73610 }, { "epoch": 0.5441885219242483, "grad_norm": 0.07729607820510864, "learning_rate": 2.287511870845204e-05, "loss": 0.0187, "step": 73620 }, { "epoch": 0.5442624404955501, "grad_norm": 0.0560038797557354, "learning_rate": 2.2871409069325737e-05, "loss": 0.0181, "step": 73630 }, { "epoch": 0.544336359066852, "grad_norm": 0.07387551665306091, "learning_rate": 2.2867699430199433e-05, "loss": 0.0166, "step": 73640 }, { "epoch": 0.5444102776381539, "grad_norm": 0.06997978687286377, "learning_rate": 2.2863989791073126e-05, "loss": 0.0182, "step": 73650 }, { "epoch": 0.5444841962094557, "grad_norm": 0.08107759058475494, "learning_rate": 2.286028015194682e-05, "loss": 0.0179, "step": 73660 }, { "epoch": 0.5445581147807576, "grad_norm": 0.07450973242521286, "learning_rate": 2.2856570512820514e-05, "loss": 0.0161, "step": 73670 }, { "epoch": 0.5446320333520593, "grad_norm": 0.058085158467292786, "learning_rate": 2.2852860873694207e-05, "loss": 0.0188, "step": 73680 }, { "epoch": 0.5447059519233612, "grad_norm": 0.099921815097332, "learning_rate": 2.2849151234567903e-05, "loss": 0.0175, "step": 73690 }, { "epoch": 0.544779870494663, "grad_norm": 0.09356129914522171, "learning_rate": 2.2845441595441596e-05, "loss": 0.0195, "step": 73700 }, { "epoch": 0.5448537890659649, "grad_norm": 0.07478101551532745, "learning_rate": 2.284173195631529e-05, "loss": 0.0169, "step": 73710 }, { "epoch": 0.5449277076372668, "grad_norm": 0.06617410480976105, "learning_rate": 2.2838022317188984e-05, "loss": 0.0183, "step": 73720 }, { "epoch": 0.5450016262085686, "grad_norm": 0.11164570599794388, "learning_rate": 2.283431267806268e-05, "loss": 0.0171, "step": 73730 }, { "epoch": 0.5450755447798705, "grad_norm": 0.06951144337654114, "learning_rate": 2.2830603038936373e-05, "loss": 0.0176, "step": 73740 }, { "epoch": 0.5451494633511723, "grad_norm": 0.09214255213737488, "learning_rate": 2.282689339981007e-05, "loss": 0.0182, "step": 73750 }, { "epoch": 0.5452233819224742, "grad_norm": 0.07372793555259705, "learning_rate": 2.282318376068376e-05, "loss": 0.0175, "step": 73760 }, { "epoch": 0.545297300493776, "grad_norm": 0.06709369271993637, "learning_rate": 2.2819474121557454e-05, "loss": 0.0156, "step": 73770 }, { "epoch": 0.5453712190650779, "grad_norm": 0.08580457419157028, "learning_rate": 2.281576448243115e-05, "loss": 0.0197, "step": 73780 }, { "epoch": 0.5454451376363798, "grad_norm": 0.08991753309965134, "learning_rate": 2.2812054843304846e-05, "loss": 0.0199, "step": 73790 }, { "epoch": 0.5455190562076816, "grad_norm": 0.08562833070755005, "learning_rate": 2.280834520417854e-05, "loss": 0.0172, "step": 73800 }, { "epoch": 0.5455929747789835, "grad_norm": 0.0725017562508583, "learning_rate": 2.280463556505223e-05, "loss": 0.0185, "step": 73810 }, { "epoch": 0.5456668933502853, "grad_norm": 0.10185367614030838, "learning_rate": 2.2800925925925927e-05, "loss": 0.0185, "step": 73820 }, { "epoch": 0.5457408119215872, "grad_norm": 0.07755794376134872, "learning_rate": 2.2797216286799623e-05, "loss": 0.0187, "step": 73830 }, { "epoch": 0.5458147304928891, "grad_norm": 0.058865927159786224, "learning_rate": 2.2793506647673315e-05, "loss": 0.0156, "step": 73840 }, { "epoch": 0.5458886490641909, "grad_norm": 0.08337011933326721, "learning_rate": 2.2789797008547008e-05, "loss": 0.0201, "step": 73850 }, { "epoch": 0.5459625676354928, "grad_norm": 0.08722520619630814, "learning_rate": 2.2786087369420704e-05, "loss": 0.0177, "step": 73860 }, { "epoch": 0.5460364862067946, "grad_norm": 0.07942160218954086, "learning_rate": 2.27823777302944e-05, "loss": 0.0162, "step": 73870 }, { "epoch": 0.5461104047780965, "grad_norm": 0.0768059492111206, "learning_rate": 2.2778668091168093e-05, "loss": 0.0183, "step": 73880 }, { "epoch": 0.5461843233493983, "grad_norm": 0.07833820581436157, "learning_rate": 2.2774958452041785e-05, "loss": 0.0207, "step": 73890 }, { "epoch": 0.5462582419207002, "grad_norm": 0.07780931890010834, "learning_rate": 2.277124881291548e-05, "loss": 0.0161, "step": 73900 }, { "epoch": 0.5463321604920021, "grad_norm": 0.07057272642850876, "learning_rate": 2.2767539173789174e-05, "loss": 0.0174, "step": 73910 }, { "epoch": 0.5464060790633039, "grad_norm": 0.08263225853443146, "learning_rate": 2.276382953466287e-05, "loss": 0.0197, "step": 73920 }, { "epoch": 0.5464799976346058, "grad_norm": 0.07716953754425049, "learning_rate": 2.2760119895536562e-05, "loss": 0.0182, "step": 73930 }, { "epoch": 0.5465539162059075, "grad_norm": 0.06758254766464233, "learning_rate": 2.2756410256410258e-05, "loss": 0.0186, "step": 73940 }, { "epoch": 0.5466278347772094, "grad_norm": 0.06011701002717018, "learning_rate": 2.275270061728395e-05, "loss": 0.0169, "step": 73950 }, { "epoch": 0.5467017533485112, "grad_norm": 0.09424585103988647, "learning_rate": 2.2748990978157647e-05, "loss": 0.018, "step": 73960 }, { "epoch": 0.5467756719198131, "grad_norm": 0.07628542184829712, "learning_rate": 2.274528133903134e-05, "loss": 0.0177, "step": 73970 }, { "epoch": 0.546849590491115, "grad_norm": 0.08188623934984207, "learning_rate": 2.2741571699905035e-05, "loss": 0.0183, "step": 73980 }, { "epoch": 0.5469235090624168, "grad_norm": 0.07078612595796585, "learning_rate": 2.2737862060778728e-05, "loss": 0.0213, "step": 73990 }, { "epoch": 0.5469974276337187, "grad_norm": 0.08338020741939545, "learning_rate": 2.273415242165242e-05, "loss": 0.0193, "step": 74000 }, { "epoch": 0.5470713462050205, "grad_norm": 0.0826638787984848, "learning_rate": 2.2730442782526116e-05, "loss": 0.0198, "step": 74010 }, { "epoch": 0.5471452647763224, "grad_norm": 0.07347635924816132, "learning_rate": 2.2726733143399812e-05, "loss": 0.017, "step": 74020 }, { "epoch": 0.5472191833476242, "grad_norm": 0.06849198788404465, "learning_rate": 2.2723023504273505e-05, "loss": 0.017, "step": 74030 }, { "epoch": 0.5472931019189261, "grad_norm": 0.07767181843519211, "learning_rate": 2.2719313865147197e-05, "loss": 0.019, "step": 74040 }, { "epoch": 0.547367020490228, "grad_norm": 0.08283500373363495, "learning_rate": 2.2715604226020893e-05, "loss": 0.0183, "step": 74050 }, { "epoch": 0.5474409390615298, "grad_norm": 0.07500879466533661, "learning_rate": 2.271189458689459e-05, "loss": 0.0184, "step": 74060 }, { "epoch": 0.5475148576328317, "grad_norm": 0.07654564082622528, "learning_rate": 2.2708184947768282e-05, "loss": 0.0202, "step": 74070 }, { "epoch": 0.5475887762041335, "grad_norm": 0.07483880966901779, "learning_rate": 2.2704475308641975e-05, "loss": 0.0186, "step": 74080 }, { "epoch": 0.5476626947754354, "grad_norm": 0.05762667953968048, "learning_rate": 2.270076566951567e-05, "loss": 0.0163, "step": 74090 }, { "epoch": 0.5477366133467373, "grad_norm": 0.08308656513690948, "learning_rate": 2.2697056030389367e-05, "loss": 0.0178, "step": 74100 }, { "epoch": 0.5478105319180391, "grad_norm": 0.09272373467683792, "learning_rate": 2.269334639126306e-05, "loss": 0.0185, "step": 74110 }, { "epoch": 0.547884450489341, "grad_norm": 0.09792761504650116, "learning_rate": 2.268963675213675e-05, "loss": 0.0191, "step": 74120 }, { "epoch": 0.5479583690606428, "grad_norm": 0.08621697872877121, "learning_rate": 2.2685927113010448e-05, "loss": 0.0173, "step": 74130 }, { "epoch": 0.5480322876319447, "grad_norm": 0.07154551148414612, "learning_rate": 2.268221747388414e-05, "loss": 0.0176, "step": 74140 }, { "epoch": 0.5481062062032465, "grad_norm": 0.06855598092079163, "learning_rate": 2.2678507834757836e-05, "loss": 0.0227, "step": 74150 }, { "epoch": 0.5481801247745484, "grad_norm": 0.07203419506549835, "learning_rate": 2.267479819563153e-05, "loss": 0.0174, "step": 74160 }, { "epoch": 0.5482540433458503, "grad_norm": 0.07480933517217636, "learning_rate": 2.2671088556505225e-05, "loss": 0.0178, "step": 74170 }, { "epoch": 0.548327961917152, "grad_norm": 0.08844535052776337, "learning_rate": 2.2667378917378917e-05, "loss": 0.0178, "step": 74180 }, { "epoch": 0.548401880488454, "grad_norm": 0.08185859024524689, "learning_rate": 2.2663669278252613e-05, "loss": 0.0163, "step": 74190 }, { "epoch": 0.5484757990597557, "grad_norm": 0.07496652007102966, "learning_rate": 2.2659959639126306e-05, "loss": 0.0185, "step": 74200 }, { "epoch": 0.5485497176310576, "grad_norm": 0.06973458081483841, "learning_rate": 2.2656250000000002e-05, "loss": 0.0155, "step": 74210 }, { "epoch": 0.5486236362023594, "grad_norm": 0.08845594525337219, "learning_rate": 2.2652540360873694e-05, "loss": 0.0169, "step": 74220 }, { "epoch": 0.5486975547736613, "grad_norm": 0.051325857639312744, "learning_rate": 2.2648830721747387e-05, "loss": 0.0168, "step": 74230 }, { "epoch": 0.5487714733449632, "grad_norm": 0.0844346433877945, "learning_rate": 2.2645121082621086e-05, "loss": 0.0174, "step": 74240 }, { "epoch": 0.548845391916265, "grad_norm": 0.06688199937343597, "learning_rate": 2.264141144349478e-05, "loss": 0.0188, "step": 74250 }, { "epoch": 0.5489193104875669, "grad_norm": 0.07292843610048294, "learning_rate": 2.263770180436847e-05, "loss": 0.0176, "step": 74260 }, { "epoch": 0.5489932290588687, "grad_norm": 0.10604050010442734, "learning_rate": 2.2633992165242164e-05, "loss": 0.0182, "step": 74270 }, { "epoch": 0.5490671476301706, "grad_norm": 0.07840543985366821, "learning_rate": 2.2630282526115863e-05, "loss": 0.0172, "step": 74280 }, { "epoch": 0.5491410662014724, "grad_norm": 0.12083763629198074, "learning_rate": 2.2626572886989556e-05, "loss": 0.0185, "step": 74290 }, { "epoch": 0.5492149847727743, "grad_norm": 0.05159150809049606, "learning_rate": 2.262286324786325e-05, "loss": 0.0174, "step": 74300 }, { "epoch": 0.5492889033440762, "grad_norm": 0.10930929332971573, "learning_rate": 2.261915360873694e-05, "loss": 0.0199, "step": 74310 }, { "epoch": 0.549362821915378, "grad_norm": 0.060169853270053864, "learning_rate": 2.2615443969610637e-05, "loss": 0.0169, "step": 74320 }, { "epoch": 0.5494367404866799, "grad_norm": 0.08198923617601395, "learning_rate": 2.2611734330484333e-05, "loss": 0.0182, "step": 74330 }, { "epoch": 0.5495106590579817, "grad_norm": 0.0873372033238411, "learning_rate": 2.2608024691358026e-05, "loss": 0.019, "step": 74340 }, { "epoch": 0.5495845776292836, "grad_norm": 0.08752260357141495, "learning_rate": 2.2604315052231718e-05, "loss": 0.0167, "step": 74350 }, { "epoch": 0.5496584962005855, "grad_norm": 0.09364860504865646, "learning_rate": 2.2600605413105414e-05, "loss": 0.0172, "step": 74360 }, { "epoch": 0.5497324147718873, "grad_norm": 0.061986666172742844, "learning_rate": 2.2596895773979107e-05, "loss": 0.0159, "step": 74370 }, { "epoch": 0.5498063333431892, "grad_norm": 0.07798568159341812, "learning_rate": 2.2593186134852803e-05, "loss": 0.0157, "step": 74380 }, { "epoch": 0.549880251914491, "grad_norm": 0.05340861156582832, "learning_rate": 2.25894764957265e-05, "loss": 0.0172, "step": 74390 }, { "epoch": 0.5499541704857929, "grad_norm": 0.10631144791841507, "learning_rate": 2.258576685660019e-05, "loss": 0.0169, "step": 74400 }, { "epoch": 0.5500280890570947, "grad_norm": 0.06307753175497055, "learning_rate": 2.2582057217473884e-05, "loss": 0.0183, "step": 74410 }, { "epoch": 0.5501020076283966, "grad_norm": 0.10609875619411469, "learning_rate": 2.257834757834758e-05, "loss": 0.0212, "step": 74420 }, { "epoch": 0.5501759261996985, "grad_norm": 0.09723684936761856, "learning_rate": 2.2574637939221276e-05, "loss": 0.0182, "step": 74430 }, { "epoch": 0.5502498447710003, "grad_norm": 0.07827954739332199, "learning_rate": 2.257092830009497e-05, "loss": 0.0153, "step": 74440 }, { "epoch": 0.5503237633423022, "grad_norm": 0.05574478209018707, "learning_rate": 2.256721866096866e-05, "loss": 0.0163, "step": 74450 }, { "epoch": 0.5503976819136039, "grad_norm": 0.06514042615890503, "learning_rate": 2.2563509021842354e-05, "loss": 0.0178, "step": 74460 }, { "epoch": 0.5504716004849058, "grad_norm": 0.06927794218063354, "learning_rate": 2.2559799382716053e-05, "loss": 0.0174, "step": 74470 }, { "epoch": 0.5505455190562076, "grad_norm": 0.08658149838447571, "learning_rate": 2.2556089743589746e-05, "loss": 0.0178, "step": 74480 }, { "epoch": 0.5506194376275095, "grad_norm": 0.059816401451826096, "learning_rate": 2.2552380104463438e-05, "loss": 0.0199, "step": 74490 }, { "epoch": 0.5506933561988114, "grad_norm": 0.07856094092130661, "learning_rate": 2.254867046533713e-05, "loss": 0.0181, "step": 74500 }, { "epoch": 0.5507672747701132, "grad_norm": 0.09727056324481964, "learning_rate": 2.254496082621083e-05, "loss": 0.0192, "step": 74510 }, { "epoch": 0.5508411933414151, "grad_norm": 0.07374399900436401, "learning_rate": 2.2541251187084523e-05, "loss": 0.0182, "step": 74520 }, { "epoch": 0.5509151119127169, "grad_norm": 0.08984479308128357, "learning_rate": 2.2537541547958215e-05, "loss": 0.0176, "step": 74530 }, { "epoch": 0.5509890304840188, "grad_norm": 0.08314557373523712, "learning_rate": 2.2533831908831908e-05, "loss": 0.0157, "step": 74540 }, { "epoch": 0.5510629490553206, "grad_norm": 0.09913858026266098, "learning_rate": 2.2530122269705604e-05, "loss": 0.0178, "step": 74550 }, { "epoch": 0.5511368676266225, "grad_norm": 0.06641694903373718, "learning_rate": 2.25264126305793e-05, "loss": 0.0175, "step": 74560 }, { "epoch": 0.5512107861979244, "grad_norm": 0.07155690342187881, "learning_rate": 2.2522702991452992e-05, "loss": 0.0207, "step": 74570 }, { "epoch": 0.5512847047692262, "grad_norm": 0.07747691869735718, "learning_rate": 2.2518993352326688e-05, "loss": 0.0179, "step": 74580 }, { "epoch": 0.5513586233405281, "grad_norm": 0.06510493904352188, "learning_rate": 2.251528371320038e-05, "loss": 0.0166, "step": 74590 }, { "epoch": 0.5514325419118299, "grad_norm": 0.09934507310390472, "learning_rate": 2.2511574074074073e-05, "loss": 0.0167, "step": 74600 }, { "epoch": 0.5515064604831318, "grad_norm": 0.06525859236717224, "learning_rate": 2.250786443494777e-05, "loss": 0.0184, "step": 74610 }, { "epoch": 0.5515803790544337, "grad_norm": 0.104197658598423, "learning_rate": 2.2504154795821465e-05, "loss": 0.0185, "step": 74620 }, { "epoch": 0.5516542976257355, "grad_norm": 0.0662267878651619, "learning_rate": 2.2500445156695158e-05, "loss": 0.0181, "step": 74630 }, { "epoch": 0.5517282161970374, "grad_norm": 0.07721276581287384, "learning_rate": 2.249673551756885e-05, "loss": 0.0181, "step": 74640 }, { "epoch": 0.5518021347683392, "grad_norm": 0.09885270148515701, "learning_rate": 2.2493025878442546e-05, "loss": 0.0176, "step": 74650 }, { "epoch": 0.5518760533396411, "grad_norm": 0.0868011862039566, "learning_rate": 2.2489316239316242e-05, "loss": 0.0174, "step": 74660 }, { "epoch": 0.5519499719109429, "grad_norm": 0.07308496534824371, "learning_rate": 2.2485606600189935e-05, "loss": 0.0175, "step": 74670 }, { "epoch": 0.5520238904822448, "grad_norm": 0.08916335552930832, "learning_rate": 2.2481896961063628e-05, "loss": 0.0166, "step": 74680 }, { "epoch": 0.5520978090535467, "grad_norm": 0.08002009242773056, "learning_rate": 2.247818732193732e-05, "loss": 0.0164, "step": 74690 }, { "epoch": 0.5521717276248485, "grad_norm": 0.08925087749958038, "learning_rate": 2.247447768281102e-05, "loss": 0.0172, "step": 74700 }, { "epoch": 0.5522456461961504, "grad_norm": 0.061949167400598526, "learning_rate": 2.2470768043684712e-05, "loss": 0.0145, "step": 74710 }, { "epoch": 0.5523195647674521, "grad_norm": 0.10795128345489502, "learning_rate": 2.2467058404558405e-05, "loss": 0.016, "step": 74720 }, { "epoch": 0.552393483338754, "grad_norm": 0.15623247623443604, "learning_rate": 2.24633487654321e-05, "loss": 0.018, "step": 74730 }, { "epoch": 0.5524674019100558, "grad_norm": 0.08144319802522659, "learning_rate": 2.2459639126305797e-05, "loss": 0.0195, "step": 74740 }, { "epoch": 0.5525413204813577, "grad_norm": 0.09133787453174591, "learning_rate": 2.245592948717949e-05, "loss": 0.0188, "step": 74750 }, { "epoch": 0.5526152390526596, "grad_norm": 0.06813662499189377, "learning_rate": 2.2452219848053182e-05, "loss": 0.0159, "step": 74760 }, { "epoch": 0.5526891576239614, "grad_norm": 0.10573530197143555, "learning_rate": 2.2448510208926878e-05, "loss": 0.0195, "step": 74770 }, { "epoch": 0.5527630761952633, "grad_norm": 0.09004482626914978, "learning_rate": 2.244480056980057e-05, "loss": 0.021, "step": 74780 }, { "epoch": 0.5528369947665651, "grad_norm": 0.06961522996425629, "learning_rate": 2.2441090930674266e-05, "loss": 0.0175, "step": 74790 }, { "epoch": 0.552910913337867, "grad_norm": 0.10666133463382721, "learning_rate": 2.243738129154796e-05, "loss": 0.0193, "step": 74800 }, { "epoch": 0.5529848319091688, "grad_norm": 0.07972019910812378, "learning_rate": 2.2433671652421655e-05, "loss": 0.0165, "step": 74810 }, { "epoch": 0.5530587504804707, "grad_norm": 0.07946088165044785, "learning_rate": 2.2429962013295347e-05, "loss": 0.0168, "step": 74820 }, { "epoch": 0.5531326690517726, "grad_norm": 0.07809576392173767, "learning_rate": 2.242625237416904e-05, "loss": 0.0184, "step": 74830 }, { "epoch": 0.5532065876230744, "grad_norm": 0.12295723706483841, "learning_rate": 2.2422542735042736e-05, "loss": 0.0182, "step": 74840 }, { "epoch": 0.5532805061943763, "grad_norm": 0.06109769642353058, "learning_rate": 2.2418833095916432e-05, "loss": 0.0174, "step": 74850 }, { "epoch": 0.5533544247656781, "grad_norm": 0.07381372898817062, "learning_rate": 2.2415123456790124e-05, "loss": 0.0179, "step": 74860 }, { "epoch": 0.55342834333698, "grad_norm": 0.0744849368929863, "learning_rate": 2.2411413817663817e-05, "loss": 0.0189, "step": 74870 }, { "epoch": 0.5535022619082819, "grad_norm": 0.07806837558746338, "learning_rate": 2.2407704178537513e-05, "loss": 0.0184, "step": 74880 }, { "epoch": 0.5535761804795837, "grad_norm": 0.0611063651740551, "learning_rate": 2.240399453941121e-05, "loss": 0.017, "step": 74890 }, { "epoch": 0.5536500990508856, "grad_norm": 0.09074815362691879, "learning_rate": 2.24002849002849e-05, "loss": 0.0177, "step": 74900 }, { "epoch": 0.5537240176221874, "grad_norm": 0.05981917679309845, "learning_rate": 2.2396575261158594e-05, "loss": 0.0173, "step": 74910 }, { "epoch": 0.5537979361934893, "grad_norm": 0.06573251634836197, "learning_rate": 2.239286562203229e-05, "loss": 0.0155, "step": 74920 }, { "epoch": 0.5538718547647911, "grad_norm": 0.0894307941198349, "learning_rate": 2.2389155982905986e-05, "loss": 0.0177, "step": 74930 }, { "epoch": 0.553945773336093, "grad_norm": 0.07103675603866577, "learning_rate": 2.238544634377968e-05, "loss": 0.0178, "step": 74940 }, { "epoch": 0.5540196919073949, "grad_norm": 0.06956657022237778, "learning_rate": 2.238173670465337e-05, "loss": 0.0203, "step": 74950 }, { "epoch": 0.5540936104786967, "grad_norm": 0.08880143612623215, "learning_rate": 2.2378027065527067e-05, "loss": 0.0166, "step": 74960 }, { "epoch": 0.5541675290499986, "grad_norm": 0.07951267808675766, "learning_rate": 2.2374317426400763e-05, "loss": 0.017, "step": 74970 }, { "epoch": 0.5542414476213003, "grad_norm": 0.08745020627975464, "learning_rate": 2.2370607787274456e-05, "loss": 0.0166, "step": 74980 }, { "epoch": 0.5543153661926022, "grad_norm": 0.08782682567834854, "learning_rate": 2.236689814814815e-05, "loss": 0.0185, "step": 74990 }, { "epoch": 0.554389284763904, "grad_norm": 0.09408223628997803, "learning_rate": 2.2363188509021844e-05, "loss": 0.0176, "step": 75000 }, { "epoch": 0.5544632033352059, "grad_norm": 0.0643104761838913, "learning_rate": 2.2359478869895537e-05, "loss": 0.0182, "step": 75010 }, { "epoch": 0.5545371219065078, "grad_norm": 0.08297935128211975, "learning_rate": 2.2355769230769233e-05, "loss": 0.0201, "step": 75020 }, { "epoch": 0.5546110404778096, "grad_norm": 0.08416923135519028, "learning_rate": 2.2352059591642925e-05, "loss": 0.016, "step": 75030 }, { "epoch": 0.5546849590491115, "grad_norm": 0.09272485226392746, "learning_rate": 2.234834995251662e-05, "loss": 0.018, "step": 75040 }, { "epoch": 0.5547588776204133, "grad_norm": 0.06990412622690201, "learning_rate": 2.2344640313390314e-05, "loss": 0.0199, "step": 75050 }, { "epoch": 0.5548327961917152, "grad_norm": 0.08628682792186737, "learning_rate": 2.2340930674264007e-05, "loss": 0.0185, "step": 75060 }, { "epoch": 0.554906714763017, "grad_norm": 0.06135695427656174, "learning_rate": 2.2337221035137703e-05, "loss": 0.0183, "step": 75070 }, { "epoch": 0.5549806333343189, "grad_norm": 0.07100776582956314, "learning_rate": 2.23335113960114e-05, "loss": 0.018, "step": 75080 }, { "epoch": 0.5550545519056208, "grad_norm": 0.08598551154136658, "learning_rate": 2.232980175688509e-05, "loss": 0.0197, "step": 75090 }, { "epoch": 0.5551284704769226, "grad_norm": 0.09556099772453308, "learning_rate": 2.2326092117758784e-05, "loss": 0.0178, "step": 75100 }, { "epoch": 0.5552023890482245, "grad_norm": 0.09031404554843903, "learning_rate": 2.232238247863248e-05, "loss": 0.0184, "step": 75110 }, { "epoch": 0.5552763076195263, "grad_norm": 0.07659973204135895, "learning_rate": 2.2318672839506176e-05, "loss": 0.0177, "step": 75120 }, { "epoch": 0.5553502261908282, "grad_norm": 0.07315723598003387, "learning_rate": 2.2314963200379868e-05, "loss": 0.0179, "step": 75130 }, { "epoch": 0.5554241447621301, "grad_norm": 0.06226538494229317, "learning_rate": 2.231125356125356e-05, "loss": 0.0156, "step": 75140 }, { "epoch": 0.5554980633334319, "grad_norm": 0.061756961047649384, "learning_rate": 2.2307543922127257e-05, "loss": 0.0176, "step": 75150 }, { "epoch": 0.5555719819047338, "grad_norm": 0.09428950399160385, "learning_rate": 2.2303834283000953e-05, "loss": 0.0184, "step": 75160 }, { "epoch": 0.5556459004760356, "grad_norm": 0.06500783562660217, "learning_rate": 2.2300124643874645e-05, "loss": 0.0169, "step": 75170 }, { "epoch": 0.5557198190473375, "grad_norm": 0.0777098536491394, "learning_rate": 2.2296415004748338e-05, "loss": 0.0184, "step": 75180 }, { "epoch": 0.5557937376186393, "grad_norm": 0.07891745120286942, "learning_rate": 2.2292705365622034e-05, "loss": 0.0178, "step": 75190 }, { "epoch": 0.5558676561899412, "grad_norm": 0.05507243424654007, "learning_rate": 2.228899572649573e-05, "loss": 0.0179, "step": 75200 }, { "epoch": 0.5559415747612431, "grad_norm": 0.08101353049278259, "learning_rate": 2.2285286087369422e-05, "loss": 0.0169, "step": 75210 }, { "epoch": 0.5560154933325449, "grad_norm": 0.07776536792516708, "learning_rate": 2.2281576448243115e-05, "loss": 0.0169, "step": 75220 }, { "epoch": 0.5560894119038468, "grad_norm": 0.07454312592744827, "learning_rate": 2.227786680911681e-05, "loss": 0.0162, "step": 75230 }, { "epoch": 0.5561633304751485, "grad_norm": 0.07357978820800781, "learning_rate": 2.2274157169990503e-05, "loss": 0.0174, "step": 75240 }, { "epoch": 0.5562372490464504, "grad_norm": 0.0766085535287857, "learning_rate": 2.22704475308642e-05, "loss": 0.0198, "step": 75250 }, { "epoch": 0.5563111676177522, "grad_norm": 0.10694071650505066, "learning_rate": 2.2266737891737892e-05, "loss": 0.0182, "step": 75260 }, { "epoch": 0.5563850861890541, "grad_norm": 0.08427776396274567, "learning_rate": 2.2263028252611588e-05, "loss": 0.0169, "step": 75270 }, { "epoch": 0.556459004760356, "grad_norm": 0.06425360590219498, "learning_rate": 2.225931861348528e-05, "loss": 0.0182, "step": 75280 }, { "epoch": 0.5565329233316578, "grad_norm": 0.09031922370195389, "learning_rate": 2.2255608974358973e-05, "loss": 0.0197, "step": 75290 }, { "epoch": 0.5566068419029597, "grad_norm": 0.06680730730295181, "learning_rate": 2.225189933523267e-05, "loss": 0.0162, "step": 75300 }, { "epoch": 0.5566807604742615, "grad_norm": 0.06603684276342392, "learning_rate": 2.2248189696106365e-05, "loss": 0.0195, "step": 75310 }, { "epoch": 0.5567546790455634, "grad_norm": 0.06782803684473038, "learning_rate": 2.2244480056980058e-05, "loss": 0.0162, "step": 75320 }, { "epoch": 0.5568285976168652, "grad_norm": 0.09163343161344528, "learning_rate": 2.224077041785375e-05, "loss": 0.0184, "step": 75330 }, { "epoch": 0.5569025161881671, "grad_norm": 0.07321230322122574, "learning_rate": 2.2237060778727446e-05, "loss": 0.0193, "step": 75340 }, { "epoch": 0.556976434759469, "grad_norm": 0.06885476410388947, "learning_rate": 2.2233351139601142e-05, "loss": 0.0158, "step": 75350 }, { "epoch": 0.5570503533307708, "grad_norm": 0.08268599212169647, "learning_rate": 2.2229641500474835e-05, "loss": 0.0164, "step": 75360 }, { "epoch": 0.5571242719020727, "grad_norm": 0.08073693513870239, "learning_rate": 2.2225931861348527e-05, "loss": 0.0167, "step": 75370 }, { "epoch": 0.5571981904733745, "grad_norm": 0.05825946480035782, "learning_rate": 2.2222222222222223e-05, "loss": 0.0158, "step": 75380 }, { "epoch": 0.5572721090446764, "grad_norm": 0.08236400038003922, "learning_rate": 2.221851258309592e-05, "loss": 0.0179, "step": 75390 }, { "epoch": 0.5573460276159783, "grad_norm": 0.097753144800663, "learning_rate": 2.2214802943969612e-05, "loss": 0.0184, "step": 75400 }, { "epoch": 0.5574199461872801, "grad_norm": 0.08877979218959808, "learning_rate": 2.2211093304843304e-05, "loss": 0.0178, "step": 75410 }, { "epoch": 0.557493864758582, "grad_norm": 0.11679001152515411, "learning_rate": 2.2207383665717e-05, "loss": 0.0174, "step": 75420 }, { "epoch": 0.5575677833298838, "grad_norm": 0.08042196929454803, "learning_rate": 2.2203674026590696e-05, "loss": 0.0163, "step": 75430 }, { "epoch": 0.5576417019011857, "grad_norm": 0.06778399646282196, "learning_rate": 2.219996438746439e-05, "loss": 0.0183, "step": 75440 }, { "epoch": 0.5577156204724875, "grad_norm": 0.07316727936267853, "learning_rate": 2.219625474833808e-05, "loss": 0.0176, "step": 75450 }, { "epoch": 0.5577895390437894, "grad_norm": 0.0796537697315216, "learning_rate": 2.2192545109211777e-05, "loss": 0.0193, "step": 75460 }, { "epoch": 0.5578634576150913, "grad_norm": 0.08047375082969666, "learning_rate": 2.218883547008547e-05, "loss": 0.0182, "step": 75470 }, { "epoch": 0.557937376186393, "grad_norm": 0.06991118937730789, "learning_rate": 2.2185125830959166e-05, "loss": 0.0163, "step": 75480 }, { "epoch": 0.558011294757695, "grad_norm": 0.07676273584365845, "learning_rate": 2.218141619183286e-05, "loss": 0.0189, "step": 75490 }, { "epoch": 0.5580852133289967, "grad_norm": 0.07694961130619049, "learning_rate": 2.2177706552706555e-05, "loss": 0.0196, "step": 75500 }, { "epoch": 0.5581591319002986, "grad_norm": 0.08334438502788544, "learning_rate": 2.2173996913580247e-05, "loss": 0.017, "step": 75510 }, { "epoch": 0.5582330504716004, "grad_norm": 0.07191971689462662, "learning_rate": 2.217028727445394e-05, "loss": 0.019, "step": 75520 }, { "epoch": 0.5583069690429023, "grad_norm": 0.0889514610171318, "learning_rate": 2.2166577635327636e-05, "loss": 0.0144, "step": 75530 }, { "epoch": 0.5583808876142042, "grad_norm": 0.05933712422847748, "learning_rate": 2.216286799620133e-05, "loss": 0.0169, "step": 75540 }, { "epoch": 0.558454806185506, "grad_norm": 0.07951271533966064, "learning_rate": 2.2159158357075024e-05, "loss": 0.0173, "step": 75550 }, { "epoch": 0.5585287247568079, "grad_norm": 0.07584798336029053, "learning_rate": 2.2155448717948717e-05, "loss": 0.016, "step": 75560 }, { "epoch": 0.5586026433281097, "grad_norm": 0.07957648485898972, "learning_rate": 2.2151739078822413e-05, "loss": 0.0197, "step": 75570 }, { "epoch": 0.5586765618994116, "grad_norm": 0.13206903636455536, "learning_rate": 2.214802943969611e-05, "loss": 0.0186, "step": 75580 }, { "epoch": 0.5587504804707135, "grad_norm": 0.07318476587533951, "learning_rate": 2.21443198005698e-05, "loss": 0.0186, "step": 75590 }, { "epoch": 0.5588243990420153, "grad_norm": 0.06643281131982803, "learning_rate": 2.2140610161443494e-05, "loss": 0.0165, "step": 75600 }, { "epoch": 0.5588983176133172, "grad_norm": 0.07346879690885544, "learning_rate": 2.213690052231719e-05, "loss": 0.0159, "step": 75610 }, { "epoch": 0.558972236184619, "grad_norm": 0.09815242886543274, "learning_rate": 2.2133190883190886e-05, "loss": 0.0178, "step": 75620 }, { "epoch": 0.5590461547559209, "grad_norm": 0.08379372954368591, "learning_rate": 2.212948124406458e-05, "loss": 0.0171, "step": 75630 }, { "epoch": 0.5591200733272227, "grad_norm": 0.07338432967662811, "learning_rate": 2.212577160493827e-05, "loss": 0.0153, "step": 75640 }, { "epoch": 0.5591939918985246, "grad_norm": 0.06731506437063217, "learning_rate": 2.2122061965811967e-05, "loss": 0.0167, "step": 75650 }, { "epoch": 0.5592679104698265, "grad_norm": 0.09762943536043167, "learning_rate": 2.2118352326685663e-05, "loss": 0.0181, "step": 75660 }, { "epoch": 0.5593418290411283, "grad_norm": 0.06943827867507935, "learning_rate": 2.2114642687559356e-05, "loss": 0.0181, "step": 75670 }, { "epoch": 0.5594157476124302, "grad_norm": 0.08721622824668884, "learning_rate": 2.2110933048433048e-05, "loss": 0.0172, "step": 75680 }, { "epoch": 0.559489666183732, "grad_norm": 0.07268903404474258, "learning_rate": 2.2107223409306744e-05, "loss": 0.0168, "step": 75690 }, { "epoch": 0.5595635847550339, "grad_norm": 0.07044602185487747, "learning_rate": 2.2103513770180437e-05, "loss": 0.0178, "step": 75700 }, { "epoch": 0.5596375033263357, "grad_norm": 0.05945662781596184, "learning_rate": 2.2099804131054133e-05, "loss": 0.0162, "step": 75710 }, { "epoch": 0.5597114218976376, "grad_norm": 0.07119162380695343, "learning_rate": 2.2096094491927825e-05, "loss": 0.0183, "step": 75720 }, { "epoch": 0.5597853404689395, "grad_norm": 0.0783572718501091, "learning_rate": 2.209238485280152e-05, "loss": 0.0205, "step": 75730 }, { "epoch": 0.5598592590402413, "grad_norm": 0.1033354178071022, "learning_rate": 2.2088675213675214e-05, "loss": 0.017, "step": 75740 }, { "epoch": 0.5599331776115432, "grad_norm": 0.07081745564937592, "learning_rate": 2.2084965574548906e-05, "loss": 0.0189, "step": 75750 }, { "epoch": 0.5600070961828449, "grad_norm": 0.07376065105199814, "learning_rate": 2.2081255935422602e-05, "loss": 0.018, "step": 75760 }, { "epoch": 0.5600810147541468, "grad_norm": 0.08154357969760895, "learning_rate": 2.2077546296296298e-05, "loss": 0.018, "step": 75770 }, { "epoch": 0.5601549333254486, "grad_norm": 0.08173554390668869, "learning_rate": 2.207383665716999e-05, "loss": 0.0202, "step": 75780 }, { "epoch": 0.5602288518967505, "grad_norm": 0.08825518935918808, "learning_rate": 2.2070127018043683e-05, "loss": 0.0179, "step": 75790 }, { "epoch": 0.5603027704680524, "grad_norm": 0.09500667452812195, "learning_rate": 2.2066417378917383e-05, "loss": 0.0166, "step": 75800 }, { "epoch": 0.5603766890393542, "grad_norm": 0.09798631817102432, "learning_rate": 2.2062707739791075e-05, "loss": 0.0181, "step": 75810 }, { "epoch": 0.5604506076106561, "grad_norm": 0.06984465569257736, "learning_rate": 2.2058998100664768e-05, "loss": 0.0147, "step": 75820 }, { "epoch": 0.5605245261819579, "grad_norm": 0.08530226349830627, "learning_rate": 2.205528846153846e-05, "loss": 0.0173, "step": 75830 }, { "epoch": 0.5605984447532598, "grad_norm": 0.07154073566198349, "learning_rate": 2.2051578822412156e-05, "loss": 0.0176, "step": 75840 }, { "epoch": 0.5606723633245617, "grad_norm": 0.08098631352186203, "learning_rate": 2.2047869183285852e-05, "loss": 0.0174, "step": 75850 }, { "epoch": 0.5607462818958635, "grad_norm": 0.08754424750804901, "learning_rate": 2.2044159544159545e-05, "loss": 0.0174, "step": 75860 }, { "epoch": 0.5608202004671654, "grad_norm": 0.07257146388292313, "learning_rate": 2.2040449905033238e-05, "loss": 0.0185, "step": 75870 }, { "epoch": 0.5608941190384672, "grad_norm": 0.06962021440267563, "learning_rate": 2.2036740265906934e-05, "loss": 0.0179, "step": 75880 }, { "epoch": 0.5609680376097691, "grad_norm": 0.08640135824680328, "learning_rate": 2.203303062678063e-05, "loss": 0.0177, "step": 75890 }, { "epoch": 0.5610419561810709, "grad_norm": 0.10559427738189697, "learning_rate": 2.2029320987654322e-05, "loss": 0.0156, "step": 75900 }, { "epoch": 0.5611158747523728, "grad_norm": 0.08029115200042725, "learning_rate": 2.2025611348528015e-05, "loss": 0.0171, "step": 75910 }, { "epoch": 0.5611897933236747, "grad_norm": 0.09592238068580627, "learning_rate": 2.202190170940171e-05, "loss": 0.0208, "step": 75920 }, { "epoch": 0.5612637118949765, "grad_norm": 0.07311482727527618, "learning_rate": 2.2018192070275403e-05, "loss": 0.0186, "step": 75930 }, { "epoch": 0.5613376304662784, "grad_norm": 0.07350117713212967, "learning_rate": 2.20144824311491e-05, "loss": 0.0185, "step": 75940 }, { "epoch": 0.5614115490375802, "grad_norm": 0.11407254636287689, "learning_rate": 2.2010772792022795e-05, "loss": 0.0179, "step": 75950 }, { "epoch": 0.5614854676088821, "grad_norm": 0.07127930968999863, "learning_rate": 2.2007063152896488e-05, "loss": 0.0192, "step": 75960 }, { "epoch": 0.5615593861801839, "grad_norm": 0.09806405752897263, "learning_rate": 2.200335351377018e-05, "loss": 0.0182, "step": 75970 }, { "epoch": 0.5616333047514858, "grad_norm": 0.0642218291759491, "learning_rate": 2.1999643874643873e-05, "loss": 0.0169, "step": 75980 }, { "epoch": 0.5617072233227877, "grad_norm": 0.08178985863924026, "learning_rate": 2.1995934235517572e-05, "loss": 0.0169, "step": 75990 }, { "epoch": 0.5617811418940895, "grad_norm": 0.12003156542778015, "learning_rate": 2.1992224596391265e-05, "loss": 0.0195, "step": 76000 }, { "epoch": 0.5618550604653914, "grad_norm": 0.10624649375677109, "learning_rate": 2.1988514957264957e-05, "loss": 0.0168, "step": 76010 }, { "epoch": 0.5619289790366931, "grad_norm": 0.060141682624816895, "learning_rate": 2.198480531813865e-05, "loss": 0.016, "step": 76020 }, { "epoch": 0.562002897607995, "grad_norm": 0.07442566752433777, "learning_rate": 2.198109567901235e-05, "loss": 0.0186, "step": 76030 }, { "epoch": 0.5620768161792968, "grad_norm": 0.09498272836208344, "learning_rate": 2.1977386039886042e-05, "loss": 0.0162, "step": 76040 }, { "epoch": 0.5621507347505987, "grad_norm": 0.06662849336862564, "learning_rate": 2.1973676400759734e-05, "loss": 0.0174, "step": 76050 }, { "epoch": 0.5622246533219006, "grad_norm": 0.09005844593048096, "learning_rate": 2.1969966761633427e-05, "loss": 0.0172, "step": 76060 }, { "epoch": 0.5622985718932024, "grad_norm": 0.07276521623134613, "learning_rate": 2.1966257122507123e-05, "loss": 0.0161, "step": 76070 }, { "epoch": 0.5623724904645043, "grad_norm": 0.07238933444023132, "learning_rate": 2.196254748338082e-05, "loss": 0.0183, "step": 76080 }, { "epoch": 0.5624464090358061, "grad_norm": 0.061950720846652985, "learning_rate": 2.195883784425451e-05, "loss": 0.0198, "step": 76090 }, { "epoch": 0.562520327607108, "grad_norm": 0.07546308636665344, "learning_rate": 2.1955128205128208e-05, "loss": 0.0157, "step": 76100 }, { "epoch": 0.5625942461784099, "grad_norm": 0.0679684653878212, "learning_rate": 2.19514185660019e-05, "loss": 0.0166, "step": 76110 }, { "epoch": 0.5626681647497117, "grad_norm": 0.07437802106142044, "learning_rate": 2.1947708926875596e-05, "loss": 0.0186, "step": 76120 }, { "epoch": 0.5627420833210136, "grad_norm": 0.07765395939350128, "learning_rate": 2.194399928774929e-05, "loss": 0.0169, "step": 76130 }, { "epoch": 0.5628160018923154, "grad_norm": 0.0975767970085144, "learning_rate": 2.1940289648622985e-05, "loss": 0.0169, "step": 76140 }, { "epoch": 0.5628899204636173, "grad_norm": 0.06236174330115318, "learning_rate": 2.1936580009496677e-05, "loss": 0.0157, "step": 76150 }, { "epoch": 0.5629638390349191, "grad_norm": 0.09108094871044159, "learning_rate": 2.193287037037037e-05, "loss": 0.0194, "step": 76160 }, { "epoch": 0.563037757606221, "grad_norm": 0.10375560820102692, "learning_rate": 2.1929160731244066e-05, "loss": 0.0198, "step": 76170 }, { "epoch": 0.5631116761775229, "grad_norm": 0.10493025183677673, "learning_rate": 2.1925451092117762e-05, "loss": 0.0192, "step": 76180 }, { "epoch": 0.5631855947488247, "grad_norm": 0.06288543343544006, "learning_rate": 2.1921741452991454e-05, "loss": 0.0177, "step": 76190 }, { "epoch": 0.5632595133201266, "grad_norm": 0.0990319773554802, "learning_rate": 2.1918031813865147e-05, "loss": 0.0162, "step": 76200 }, { "epoch": 0.5633334318914284, "grad_norm": 0.08652309328317642, "learning_rate": 2.191432217473884e-05, "loss": 0.0192, "step": 76210 }, { "epoch": 0.5634073504627303, "grad_norm": 0.06287626922130585, "learning_rate": 2.191061253561254e-05, "loss": 0.0139, "step": 76220 }, { "epoch": 0.5634812690340321, "grad_norm": 0.08388041704893112, "learning_rate": 2.190690289648623e-05, "loss": 0.0185, "step": 76230 }, { "epoch": 0.563555187605334, "grad_norm": 0.0805421769618988, "learning_rate": 2.1903193257359924e-05, "loss": 0.0162, "step": 76240 }, { "epoch": 0.5636291061766359, "grad_norm": 0.08735102415084839, "learning_rate": 2.189948361823362e-05, "loss": 0.0188, "step": 76250 }, { "epoch": 0.5637030247479377, "grad_norm": 0.07975315302610397, "learning_rate": 2.1895773979107316e-05, "loss": 0.0184, "step": 76260 }, { "epoch": 0.5637769433192396, "grad_norm": 0.09392821043729782, "learning_rate": 2.189206433998101e-05, "loss": 0.0159, "step": 76270 }, { "epoch": 0.5638508618905413, "grad_norm": 0.08077412843704224, "learning_rate": 2.18883547008547e-05, "loss": 0.0177, "step": 76280 }, { "epoch": 0.5639247804618432, "grad_norm": 0.07388119399547577, "learning_rate": 2.1884645061728397e-05, "loss": 0.0193, "step": 76290 }, { "epoch": 0.563998699033145, "grad_norm": 0.09471497684717178, "learning_rate": 2.188093542260209e-05, "loss": 0.0187, "step": 76300 }, { "epoch": 0.5640726176044469, "grad_norm": 0.07443129271268845, "learning_rate": 2.1877225783475786e-05, "loss": 0.0157, "step": 76310 }, { "epoch": 0.5641465361757488, "grad_norm": 0.07455494999885559, "learning_rate": 2.1873516144349478e-05, "loss": 0.0164, "step": 76320 }, { "epoch": 0.5642204547470506, "grad_norm": 0.0900912880897522, "learning_rate": 2.1869806505223174e-05, "loss": 0.0176, "step": 76330 }, { "epoch": 0.5642943733183525, "grad_norm": 0.06420095264911652, "learning_rate": 2.1866096866096867e-05, "loss": 0.0173, "step": 76340 }, { "epoch": 0.5643682918896543, "grad_norm": 0.09643217921257019, "learning_rate": 2.1862387226970563e-05, "loss": 0.0178, "step": 76350 }, { "epoch": 0.5644422104609562, "grad_norm": 0.07020307332277298, "learning_rate": 2.1858677587844255e-05, "loss": 0.0192, "step": 76360 }, { "epoch": 0.5645161290322581, "grad_norm": 0.09751828014850616, "learning_rate": 2.185496794871795e-05, "loss": 0.0157, "step": 76370 }, { "epoch": 0.5645900476035599, "grad_norm": 0.053897056728601456, "learning_rate": 2.1851258309591644e-05, "loss": 0.019, "step": 76380 }, { "epoch": 0.5646639661748618, "grad_norm": 0.07476173341274261, "learning_rate": 2.1847548670465336e-05, "loss": 0.0201, "step": 76390 }, { "epoch": 0.5647378847461636, "grad_norm": 0.07188209146261215, "learning_rate": 2.1843839031339032e-05, "loss": 0.0177, "step": 76400 }, { "epoch": 0.5648118033174655, "grad_norm": 0.05990707501769066, "learning_rate": 2.184012939221273e-05, "loss": 0.0166, "step": 76410 }, { "epoch": 0.5648857218887673, "grad_norm": 0.07479406148195267, "learning_rate": 2.183641975308642e-05, "loss": 0.0174, "step": 76420 }, { "epoch": 0.5649596404600692, "grad_norm": 0.05558224022388458, "learning_rate": 2.1832710113960113e-05, "loss": 0.0187, "step": 76430 }, { "epoch": 0.5650335590313711, "grad_norm": 0.08270638436079025, "learning_rate": 2.182900047483381e-05, "loss": 0.0166, "step": 76440 }, { "epoch": 0.5651074776026729, "grad_norm": 0.06895186752080917, "learning_rate": 2.1825290835707505e-05, "loss": 0.0178, "step": 76450 }, { "epoch": 0.5651813961739748, "grad_norm": 0.05761091411113739, "learning_rate": 2.1821581196581198e-05, "loss": 0.0171, "step": 76460 }, { "epoch": 0.5652553147452766, "grad_norm": 0.0714200958609581, "learning_rate": 2.181787155745489e-05, "loss": 0.0185, "step": 76470 }, { "epoch": 0.5653292333165785, "grad_norm": 0.08116467297077179, "learning_rate": 2.1814161918328587e-05, "loss": 0.0171, "step": 76480 }, { "epoch": 0.5654031518878803, "grad_norm": 0.09137962013483047, "learning_rate": 2.1810452279202282e-05, "loss": 0.0182, "step": 76490 }, { "epoch": 0.5654770704591822, "grad_norm": 0.05624079331755638, "learning_rate": 2.1806742640075975e-05, "loss": 0.0168, "step": 76500 }, { "epoch": 0.5655509890304841, "grad_norm": 0.08908656239509583, "learning_rate": 2.1803033000949668e-05, "loss": 0.0169, "step": 76510 }, { "epoch": 0.5656249076017859, "grad_norm": 0.06865982711315155, "learning_rate": 2.1799323361823364e-05, "loss": 0.0185, "step": 76520 }, { "epoch": 0.5656988261730878, "grad_norm": 0.08398845791816711, "learning_rate": 2.1795613722697056e-05, "loss": 0.0199, "step": 76530 }, { "epoch": 0.5657727447443895, "grad_norm": 0.06882167607545853, "learning_rate": 2.1791904083570752e-05, "loss": 0.0188, "step": 76540 }, { "epoch": 0.5658466633156914, "grad_norm": 0.11541710793972015, "learning_rate": 2.1788194444444445e-05, "loss": 0.0181, "step": 76550 }, { "epoch": 0.5659205818869932, "grad_norm": 0.08537382632493973, "learning_rate": 2.178448480531814e-05, "loss": 0.0164, "step": 76560 }, { "epoch": 0.5659945004582951, "grad_norm": 0.08711043000221252, "learning_rate": 2.1780775166191833e-05, "loss": 0.0166, "step": 76570 }, { "epoch": 0.566068419029597, "grad_norm": 0.08303167670965195, "learning_rate": 2.177706552706553e-05, "loss": 0.0172, "step": 76580 }, { "epoch": 0.5661423376008988, "grad_norm": 0.13246089220046997, "learning_rate": 2.1773355887939222e-05, "loss": 0.0194, "step": 76590 }, { "epoch": 0.5662162561722007, "grad_norm": 0.07567055523395538, "learning_rate": 2.1769646248812918e-05, "loss": 0.0167, "step": 76600 }, { "epoch": 0.5662901747435025, "grad_norm": 0.10139036178588867, "learning_rate": 2.176593660968661e-05, "loss": 0.017, "step": 76610 }, { "epoch": 0.5663640933148044, "grad_norm": 0.057844486087560654, "learning_rate": 2.1762226970560303e-05, "loss": 0.0192, "step": 76620 }, { "epoch": 0.5664380118861063, "grad_norm": 0.0815180242061615, "learning_rate": 2.1758517331434e-05, "loss": 0.0188, "step": 76630 }, { "epoch": 0.5665119304574081, "grad_norm": 0.0749938040971756, "learning_rate": 2.1754807692307695e-05, "loss": 0.0179, "step": 76640 }, { "epoch": 0.56658584902871, "grad_norm": 0.0814565122127533, "learning_rate": 2.1751098053181387e-05, "loss": 0.0154, "step": 76650 }, { "epoch": 0.5666597676000118, "grad_norm": 0.08643099665641785, "learning_rate": 2.174738841405508e-05, "loss": 0.0178, "step": 76660 }, { "epoch": 0.5667336861713137, "grad_norm": 0.07690934091806412, "learning_rate": 2.1743678774928776e-05, "loss": 0.016, "step": 76670 }, { "epoch": 0.5668076047426155, "grad_norm": 0.06089937686920166, "learning_rate": 2.1739969135802472e-05, "loss": 0.0147, "step": 76680 }, { "epoch": 0.5668815233139174, "grad_norm": 0.10612459480762482, "learning_rate": 2.1736259496676165e-05, "loss": 0.013, "step": 76690 }, { "epoch": 0.5669554418852193, "grad_norm": 0.07758487015962601, "learning_rate": 2.1732549857549857e-05, "loss": 0.0176, "step": 76700 }, { "epoch": 0.5670293604565211, "grad_norm": 0.13286489248275757, "learning_rate": 2.1728840218423553e-05, "loss": 0.0217, "step": 76710 }, { "epoch": 0.567103279027823, "grad_norm": 0.06863997131586075, "learning_rate": 2.172513057929725e-05, "loss": 0.018, "step": 76720 }, { "epoch": 0.5671771975991248, "grad_norm": 0.0733184888958931, "learning_rate": 2.172142094017094e-05, "loss": 0.0164, "step": 76730 }, { "epoch": 0.5672511161704267, "grad_norm": 0.0709184855222702, "learning_rate": 2.1717711301044634e-05, "loss": 0.0155, "step": 76740 }, { "epoch": 0.5673250347417285, "grad_norm": 0.07036468386650085, "learning_rate": 2.171400166191833e-05, "loss": 0.0151, "step": 76750 }, { "epoch": 0.5673989533130304, "grad_norm": 0.08693096786737442, "learning_rate": 2.1710292022792023e-05, "loss": 0.0183, "step": 76760 }, { "epoch": 0.5674728718843323, "grad_norm": 0.09758399426937103, "learning_rate": 2.170658238366572e-05, "loss": 0.017, "step": 76770 }, { "epoch": 0.567546790455634, "grad_norm": 0.09273570775985718, "learning_rate": 2.170287274453941e-05, "loss": 0.0171, "step": 76780 }, { "epoch": 0.567620709026936, "grad_norm": 0.0994814783334732, "learning_rate": 2.1699163105413107e-05, "loss": 0.019, "step": 76790 }, { "epoch": 0.5676946275982377, "grad_norm": 0.08835679292678833, "learning_rate": 2.16954534662868e-05, "loss": 0.0139, "step": 76800 }, { "epoch": 0.5677685461695396, "grad_norm": 0.07272812724113464, "learning_rate": 2.1691743827160496e-05, "loss": 0.0183, "step": 76810 }, { "epoch": 0.5678424647408414, "grad_norm": 0.05673498660326004, "learning_rate": 2.168803418803419e-05, "loss": 0.0187, "step": 76820 }, { "epoch": 0.5679163833121433, "grad_norm": 0.05819728597998619, "learning_rate": 2.1684324548907884e-05, "loss": 0.0186, "step": 76830 }, { "epoch": 0.5679903018834452, "grad_norm": 0.08914889395236969, "learning_rate": 2.1680614909781577e-05, "loss": 0.0167, "step": 76840 }, { "epoch": 0.568064220454747, "grad_norm": 0.07220537960529327, "learning_rate": 2.167690527065527e-05, "loss": 0.0173, "step": 76850 }, { "epoch": 0.5681381390260489, "grad_norm": 0.09979695081710815, "learning_rate": 2.1673195631528966e-05, "loss": 0.0172, "step": 76860 }, { "epoch": 0.5682120575973507, "grad_norm": 0.08128748089075089, "learning_rate": 2.166948599240266e-05, "loss": 0.0169, "step": 76870 }, { "epoch": 0.5682859761686526, "grad_norm": 0.07112763822078705, "learning_rate": 2.1665776353276354e-05, "loss": 0.0173, "step": 76880 }, { "epoch": 0.5683598947399545, "grad_norm": 0.06230897083878517, "learning_rate": 2.1662066714150047e-05, "loss": 0.0191, "step": 76890 }, { "epoch": 0.5684338133112563, "grad_norm": 0.07018118351697922, "learning_rate": 2.1658357075023743e-05, "loss": 0.0166, "step": 76900 }, { "epoch": 0.5685077318825582, "grad_norm": 0.08045455813407898, "learning_rate": 2.165464743589744e-05, "loss": 0.0193, "step": 76910 }, { "epoch": 0.56858165045386, "grad_norm": 0.063961461186409, "learning_rate": 2.165093779677113e-05, "loss": 0.0186, "step": 76920 }, { "epoch": 0.5686555690251619, "grad_norm": 0.09592575579881668, "learning_rate": 2.1647228157644824e-05, "loss": 0.0161, "step": 76930 }, { "epoch": 0.5687294875964637, "grad_norm": 0.07979429513216019, "learning_rate": 2.164351851851852e-05, "loss": 0.0163, "step": 76940 }, { "epoch": 0.5688034061677656, "grad_norm": 0.10558890551328659, "learning_rate": 2.1639808879392216e-05, "loss": 0.0173, "step": 76950 }, { "epoch": 0.5688773247390675, "grad_norm": 0.08926460146903992, "learning_rate": 2.1636099240265908e-05, "loss": 0.0194, "step": 76960 }, { "epoch": 0.5689512433103693, "grad_norm": 0.05744050443172455, "learning_rate": 2.16323896011396e-05, "loss": 0.0183, "step": 76970 }, { "epoch": 0.5690251618816712, "grad_norm": 0.11004228889942169, "learning_rate": 2.1628679962013297e-05, "loss": 0.0201, "step": 76980 }, { "epoch": 0.569099080452973, "grad_norm": 0.08529970794916153, "learning_rate": 2.162497032288699e-05, "loss": 0.0175, "step": 76990 }, { "epoch": 0.5691729990242749, "grad_norm": 0.07577558606863022, "learning_rate": 2.1621260683760685e-05, "loss": 0.0166, "step": 77000 }, { "epoch": 0.5692469175955767, "grad_norm": 0.10471208393573761, "learning_rate": 2.1617551044634378e-05, "loss": 0.0194, "step": 77010 }, { "epoch": 0.5693208361668786, "grad_norm": 0.0808945968747139, "learning_rate": 2.1613841405508074e-05, "loss": 0.0193, "step": 77020 }, { "epoch": 0.5693947547381805, "grad_norm": 0.12748804688453674, "learning_rate": 2.1610131766381766e-05, "loss": 0.0192, "step": 77030 }, { "epoch": 0.5694686733094823, "grad_norm": 0.09266991168260574, "learning_rate": 2.1606422127255462e-05, "loss": 0.0174, "step": 77040 }, { "epoch": 0.5695425918807842, "grad_norm": 0.0642351359128952, "learning_rate": 2.1602712488129155e-05, "loss": 0.0188, "step": 77050 }, { "epoch": 0.569616510452086, "grad_norm": 0.06646756082773209, "learning_rate": 2.159900284900285e-05, "loss": 0.018, "step": 77060 }, { "epoch": 0.5696904290233878, "grad_norm": 0.07489508390426636, "learning_rate": 2.1595293209876544e-05, "loss": 0.0183, "step": 77070 }, { "epoch": 0.5697643475946896, "grad_norm": 0.0715007334947586, "learning_rate": 2.1591583570750236e-05, "loss": 0.019, "step": 77080 }, { "epoch": 0.5698382661659915, "grad_norm": 0.07386353611946106, "learning_rate": 2.1587873931623932e-05, "loss": 0.0194, "step": 77090 }, { "epoch": 0.5699121847372934, "grad_norm": 0.09321268647909164, "learning_rate": 2.1584164292497628e-05, "loss": 0.0159, "step": 77100 }, { "epoch": 0.5699861033085952, "grad_norm": 0.08199749141931534, "learning_rate": 2.158045465337132e-05, "loss": 0.0185, "step": 77110 }, { "epoch": 0.5700600218798971, "grad_norm": 0.10026410222053528, "learning_rate": 2.1576745014245013e-05, "loss": 0.0169, "step": 77120 }, { "epoch": 0.5701339404511989, "grad_norm": 0.12471500039100647, "learning_rate": 2.157303537511871e-05, "loss": 0.0197, "step": 77130 }, { "epoch": 0.5702078590225008, "grad_norm": 0.08403502404689789, "learning_rate": 2.1569325735992405e-05, "loss": 0.017, "step": 77140 }, { "epoch": 0.5702817775938027, "grad_norm": 0.07287797331809998, "learning_rate": 2.1565616096866098e-05, "loss": 0.0175, "step": 77150 }, { "epoch": 0.5703556961651045, "grad_norm": 0.09309650212526321, "learning_rate": 2.156190645773979e-05, "loss": 0.0168, "step": 77160 }, { "epoch": 0.5704296147364064, "grad_norm": 0.07405105978250504, "learning_rate": 2.1558196818613486e-05, "loss": 0.0179, "step": 77170 }, { "epoch": 0.5705035333077082, "grad_norm": 0.08167034387588501, "learning_rate": 2.1554487179487182e-05, "loss": 0.0167, "step": 77180 }, { "epoch": 0.5705774518790101, "grad_norm": 0.1163032054901123, "learning_rate": 2.1550777540360875e-05, "loss": 0.0174, "step": 77190 }, { "epoch": 0.5706513704503119, "grad_norm": 0.09522049129009247, "learning_rate": 2.1547067901234567e-05, "loss": 0.0179, "step": 77200 }, { "epoch": 0.5707252890216138, "grad_norm": 0.07914343476295471, "learning_rate": 2.1543358262108263e-05, "loss": 0.021, "step": 77210 }, { "epoch": 0.5707992075929157, "grad_norm": 0.08064424246549606, "learning_rate": 2.1539648622981956e-05, "loss": 0.0194, "step": 77220 }, { "epoch": 0.5708731261642175, "grad_norm": 0.0969173014163971, "learning_rate": 2.1535938983855652e-05, "loss": 0.0168, "step": 77230 }, { "epoch": 0.5709470447355194, "grad_norm": 0.08027642965316772, "learning_rate": 2.1532229344729344e-05, "loss": 0.0169, "step": 77240 }, { "epoch": 0.5710209633068212, "grad_norm": 0.06907127797603607, "learning_rate": 2.152851970560304e-05, "loss": 0.0206, "step": 77250 }, { "epoch": 0.5710948818781231, "grad_norm": 0.10103422403335571, "learning_rate": 2.1524810066476733e-05, "loss": 0.0183, "step": 77260 }, { "epoch": 0.5711688004494249, "grad_norm": 0.07240621000528336, "learning_rate": 2.152110042735043e-05, "loss": 0.0168, "step": 77270 }, { "epoch": 0.5712427190207268, "grad_norm": 0.0701412558555603, "learning_rate": 2.151739078822412e-05, "loss": 0.0172, "step": 77280 }, { "epoch": 0.5713166375920287, "grad_norm": 0.05859806761145592, "learning_rate": 2.1513681149097818e-05, "loss": 0.0192, "step": 77290 }, { "epoch": 0.5713905561633305, "grad_norm": 0.0658048465847969, "learning_rate": 2.150997150997151e-05, "loss": 0.0162, "step": 77300 }, { "epoch": 0.5714644747346324, "grad_norm": 0.10993514955043793, "learning_rate": 2.1506261870845203e-05, "loss": 0.02, "step": 77310 }, { "epoch": 0.5715383933059341, "grad_norm": 0.07890800386667252, "learning_rate": 2.1502552231718902e-05, "loss": 0.0183, "step": 77320 }, { "epoch": 0.571612311877236, "grad_norm": 0.07548778504133224, "learning_rate": 2.1498842592592595e-05, "loss": 0.0176, "step": 77330 }, { "epoch": 0.5716862304485378, "grad_norm": 0.11432471871376038, "learning_rate": 2.1495132953466287e-05, "loss": 0.0186, "step": 77340 }, { "epoch": 0.5717601490198397, "grad_norm": 0.07823914289474487, "learning_rate": 2.149142331433998e-05, "loss": 0.0177, "step": 77350 }, { "epoch": 0.5718340675911416, "grad_norm": 0.08516088128089905, "learning_rate": 2.148771367521368e-05, "loss": 0.0161, "step": 77360 }, { "epoch": 0.5719079861624434, "grad_norm": 0.06825272738933563, "learning_rate": 2.1484004036087372e-05, "loss": 0.0174, "step": 77370 }, { "epoch": 0.5719819047337453, "grad_norm": 0.06376885622739792, "learning_rate": 2.1480294396961064e-05, "loss": 0.0174, "step": 77380 }, { "epoch": 0.5720558233050471, "grad_norm": 0.08210685849189758, "learning_rate": 2.1476584757834757e-05, "loss": 0.0189, "step": 77390 }, { "epoch": 0.572129741876349, "grad_norm": 0.07939564436674118, "learning_rate": 2.1472875118708453e-05, "loss": 0.0173, "step": 77400 }, { "epoch": 0.5722036604476509, "grad_norm": 0.08778122067451477, "learning_rate": 2.146916547958215e-05, "loss": 0.0188, "step": 77410 }, { "epoch": 0.5722775790189527, "grad_norm": 0.06430813670158386, "learning_rate": 2.146545584045584e-05, "loss": 0.0185, "step": 77420 }, { "epoch": 0.5723514975902546, "grad_norm": 0.07405303418636322, "learning_rate": 2.1461746201329534e-05, "loss": 0.0183, "step": 77430 }, { "epoch": 0.5724254161615564, "grad_norm": 0.08082497864961624, "learning_rate": 2.145803656220323e-05, "loss": 0.0179, "step": 77440 }, { "epoch": 0.5724993347328583, "grad_norm": 0.09143561124801636, "learning_rate": 2.1454326923076923e-05, "loss": 0.0173, "step": 77450 }, { "epoch": 0.5725732533041601, "grad_norm": 0.08747171610593796, "learning_rate": 2.145061728395062e-05, "loss": 0.0174, "step": 77460 }, { "epoch": 0.572647171875462, "grad_norm": 0.09300205111503601, "learning_rate": 2.1446907644824314e-05, "loss": 0.0176, "step": 77470 }, { "epoch": 0.5727210904467639, "grad_norm": 0.08413171768188477, "learning_rate": 2.1443198005698007e-05, "loss": 0.0173, "step": 77480 }, { "epoch": 0.5727950090180657, "grad_norm": 0.06460338830947876, "learning_rate": 2.14394883665717e-05, "loss": 0.0159, "step": 77490 }, { "epoch": 0.5728689275893676, "grad_norm": 0.08256684243679047, "learning_rate": 2.1435778727445396e-05, "loss": 0.0167, "step": 77500 }, { "epoch": 0.5729428461606694, "grad_norm": 0.08620408177375793, "learning_rate": 2.143206908831909e-05, "loss": 0.0193, "step": 77510 }, { "epoch": 0.5730167647319713, "grad_norm": 0.060469850897789, "learning_rate": 2.1428359449192784e-05, "loss": 0.0176, "step": 77520 }, { "epoch": 0.5730906833032731, "grad_norm": 0.09427706897258759, "learning_rate": 2.1424649810066477e-05, "loss": 0.017, "step": 77530 }, { "epoch": 0.573164601874575, "grad_norm": 0.07587603479623795, "learning_rate": 2.142094017094017e-05, "loss": 0.0202, "step": 77540 }, { "epoch": 0.5732385204458769, "grad_norm": 0.05992022901773453, "learning_rate": 2.141723053181387e-05, "loss": 0.0151, "step": 77550 }, { "epoch": 0.5733124390171787, "grad_norm": 0.1014205813407898, "learning_rate": 2.141352089268756e-05, "loss": 0.0187, "step": 77560 }, { "epoch": 0.5733863575884806, "grad_norm": 0.08910097926855087, "learning_rate": 2.1409811253561254e-05, "loss": 0.0176, "step": 77570 }, { "epoch": 0.5734602761597823, "grad_norm": 0.08204387873411179, "learning_rate": 2.1406101614434946e-05, "loss": 0.0208, "step": 77580 }, { "epoch": 0.5735341947310842, "grad_norm": 0.10908210277557373, "learning_rate": 2.1402391975308646e-05, "loss": 0.0183, "step": 77590 }, { "epoch": 0.5736081133023861, "grad_norm": 0.04895222187042236, "learning_rate": 2.139868233618234e-05, "loss": 0.0172, "step": 77600 }, { "epoch": 0.5736820318736879, "grad_norm": 0.07253724336624146, "learning_rate": 2.139497269705603e-05, "loss": 0.0179, "step": 77610 }, { "epoch": 0.5737559504449898, "grad_norm": 0.07849972695112228, "learning_rate": 2.1391263057929727e-05, "loss": 0.0176, "step": 77620 }, { "epoch": 0.5738298690162916, "grad_norm": 0.06313782930374146, "learning_rate": 2.138755341880342e-05, "loss": 0.0172, "step": 77630 }, { "epoch": 0.5739037875875935, "grad_norm": 0.08818871527910233, "learning_rate": 2.1383843779677115e-05, "loss": 0.0154, "step": 77640 }, { "epoch": 0.5739777061588953, "grad_norm": 0.07989152520895004, "learning_rate": 2.1380134140550808e-05, "loss": 0.0192, "step": 77650 }, { "epoch": 0.5740516247301972, "grad_norm": 0.08319340646266937, "learning_rate": 2.1376424501424504e-05, "loss": 0.0177, "step": 77660 }, { "epoch": 0.5741255433014991, "grad_norm": 0.08299017697572708, "learning_rate": 2.1372714862298197e-05, "loss": 0.0161, "step": 77670 }, { "epoch": 0.5741994618728009, "grad_norm": 0.09096242487430573, "learning_rate": 2.136900522317189e-05, "loss": 0.0175, "step": 77680 }, { "epoch": 0.5742733804441028, "grad_norm": 0.08916709572076797, "learning_rate": 2.1365295584045585e-05, "loss": 0.0195, "step": 77690 }, { "epoch": 0.5743472990154046, "grad_norm": 0.1071515679359436, "learning_rate": 2.136158594491928e-05, "loss": 0.0195, "step": 77700 }, { "epoch": 0.5744212175867065, "grad_norm": 0.09372571110725403, "learning_rate": 2.1357876305792974e-05, "loss": 0.0197, "step": 77710 }, { "epoch": 0.5744951361580083, "grad_norm": 0.06699743866920471, "learning_rate": 2.1354166666666666e-05, "loss": 0.0136, "step": 77720 }, { "epoch": 0.5745690547293102, "grad_norm": 0.10548382997512817, "learning_rate": 2.1350457027540362e-05, "loss": 0.0189, "step": 77730 }, { "epoch": 0.5746429733006121, "grad_norm": 0.10325492918491364, "learning_rate": 2.1346747388414058e-05, "loss": 0.0184, "step": 77740 }, { "epoch": 0.5747168918719139, "grad_norm": 0.07018305361270905, "learning_rate": 2.134303774928775e-05, "loss": 0.0177, "step": 77750 }, { "epoch": 0.5747908104432158, "grad_norm": 0.06633090227842331, "learning_rate": 2.1339328110161443e-05, "loss": 0.0153, "step": 77760 }, { "epoch": 0.5748647290145176, "grad_norm": 0.08085515350103378, "learning_rate": 2.1335618471035136e-05, "loss": 0.0182, "step": 77770 }, { "epoch": 0.5749386475858195, "grad_norm": 0.07745050638914108, "learning_rate": 2.1331908831908835e-05, "loss": 0.0167, "step": 77780 }, { "epoch": 0.5750125661571213, "grad_norm": 0.1009209081530571, "learning_rate": 2.1328199192782528e-05, "loss": 0.0181, "step": 77790 }, { "epoch": 0.5750864847284232, "grad_norm": 0.0653989389538765, "learning_rate": 2.132448955365622e-05, "loss": 0.0182, "step": 77800 }, { "epoch": 0.5751604032997251, "grad_norm": 0.07263030856847763, "learning_rate": 2.1320779914529916e-05, "loss": 0.0167, "step": 77810 }, { "epoch": 0.5752343218710269, "grad_norm": 0.0782294049859047, "learning_rate": 2.1317070275403612e-05, "loss": 0.0187, "step": 77820 }, { "epoch": 0.5753082404423288, "grad_norm": 0.07321447879076004, "learning_rate": 2.1313360636277305e-05, "loss": 0.0184, "step": 77830 }, { "epoch": 0.5753821590136305, "grad_norm": 0.10173796117305756, "learning_rate": 2.1309650997150997e-05, "loss": 0.0187, "step": 77840 }, { "epoch": 0.5754560775849324, "grad_norm": 0.09293046593666077, "learning_rate": 2.1305941358024693e-05, "loss": 0.0192, "step": 77850 }, { "epoch": 0.5755299961562343, "grad_norm": 0.09862498193979263, "learning_rate": 2.1302231718898386e-05, "loss": 0.0188, "step": 77860 }, { "epoch": 0.5756039147275361, "grad_norm": 0.06980642676353455, "learning_rate": 2.1298522079772082e-05, "loss": 0.0164, "step": 77870 }, { "epoch": 0.575677833298838, "grad_norm": 0.08679135888814926, "learning_rate": 2.1294812440645775e-05, "loss": 0.0188, "step": 77880 }, { "epoch": 0.5757517518701398, "grad_norm": 0.0878649652004242, "learning_rate": 2.129110280151947e-05, "loss": 0.0197, "step": 77890 }, { "epoch": 0.5758256704414417, "grad_norm": 0.07993719726800919, "learning_rate": 2.1287393162393163e-05, "loss": 0.018, "step": 77900 }, { "epoch": 0.5758995890127435, "grad_norm": 0.09472255408763885, "learning_rate": 2.1283683523266856e-05, "loss": 0.0194, "step": 77910 }, { "epoch": 0.5759735075840454, "grad_norm": 0.09329091012477875, "learning_rate": 2.127997388414055e-05, "loss": 0.0172, "step": 77920 }, { "epoch": 0.5760474261553473, "grad_norm": 0.07562713325023651, "learning_rate": 2.1276264245014248e-05, "loss": 0.0165, "step": 77930 }, { "epoch": 0.5761213447266491, "grad_norm": 0.08897285908460617, "learning_rate": 2.127255460588794e-05, "loss": 0.0154, "step": 77940 }, { "epoch": 0.576195263297951, "grad_norm": 0.06445154547691345, "learning_rate": 2.1268844966761633e-05, "loss": 0.0176, "step": 77950 }, { "epoch": 0.5762691818692528, "grad_norm": 0.06542029231786728, "learning_rate": 2.126513532763533e-05, "loss": 0.0171, "step": 77960 }, { "epoch": 0.5763431004405547, "grad_norm": 0.06990020722150803, "learning_rate": 2.1261425688509025e-05, "loss": 0.0178, "step": 77970 }, { "epoch": 0.5764170190118565, "grad_norm": 0.09482400119304657, "learning_rate": 2.1257716049382717e-05, "loss": 0.0186, "step": 77980 }, { "epoch": 0.5764909375831584, "grad_norm": 0.07218655198812485, "learning_rate": 2.125400641025641e-05, "loss": 0.0192, "step": 77990 }, { "epoch": 0.5765648561544603, "grad_norm": 0.11474903672933578, "learning_rate": 2.1250296771130106e-05, "loss": 0.0167, "step": 78000 }, { "epoch": 0.5766387747257621, "grad_norm": 0.10138155519962311, "learning_rate": 2.1246587132003802e-05, "loss": 0.0191, "step": 78010 }, { "epoch": 0.576712693297064, "grad_norm": 0.0677545964717865, "learning_rate": 2.1242877492877494e-05, "loss": 0.0166, "step": 78020 }, { "epoch": 0.5767866118683658, "grad_norm": 0.07511387765407562, "learning_rate": 2.1239167853751187e-05, "loss": 0.0184, "step": 78030 }, { "epoch": 0.5768605304396677, "grad_norm": 0.08694437146186829, "learning_rate": 2.1235458214624883e-05, "loss": 0.0174, "step": 78040 }, { "epoch": 0.5769344490109695, "grad_norm": 0.07573015242815018, "learning_rate": 2.123174857549858e-05, "loss": 0.0181, "step": 78050 }, { "epoch": 0.5770083675822714, "grad_norm": 0.09312745183706284, "learning_rate": 2.122803893637227e-05, "loss": 0.0187, "step": 78060 }, { "epoch": 0.5770822861535733, "grad_norm": 0.07375273108482361, "learning_rate": 2.1224329297245964e-05, "loss": 0.0171, "step": 78070 }, { "epoch": 0.577156204724875, "grad_norm": 0.1048007383942604, "learning_rate": 2.122061965811966e-05, "loss": 0.019, "step": 78080 }, { "epoch": 0.577230123296177, "grad_norm": 0.06581725925207138, "learning_rate": 2.1216910018993353e-05, "loss": 0.019, "step": 78090 }, { "epoch": 0.5773040418674787, "grad_norm": 0.07203702628612518, "learning_rate": 2.121320037986705e-05, "loss": 0.0202, "step": 78100 }, { "epoch": 0.5773779604387806, "grad_norm": 0.1035374104976654, "learning_rate": 2.120949074074074e-05, "loss": 0.0187, "step": 78110 }, { "epoch": 0.5774518790100825, "grad_norm": 0.079229436814785, "learning_rate": 2.1205781101614437e-05, "loss": 0.0155, "step": 78120 }, { "epoch": 0.5775257975813843, "grad_norm": 0.08558651059865952, "learning_rate": 2.120207146248813e-05, "loss": 0.0186, "step": 78130 }, { "epoch": 0.5775997161526862, "grad_norm": 0.07611927390098572, "learning_rate": 2.1198361823361822e-05, "loss": 0.0197, "step": 78140 }, { "epoch": 0.577673634723988, "grad_norm": 0.08735854178667068, "learning_rate": 2.1194652184235518e-05, "loss": 0.0173, "step": 78150 }, { "epoch": 0.5777475532952899, "grad_norm": 0.07931400835514069, "learning_rate": 2.1190942545109214e-05, "loss": 0.0201, "step": 78160 }, { "epoch": 0.5778214718665917, "grad_norm": 0.09490150213241577, "learning_rate": 2.1187232905982907e-05, "loss": 0.018, "step": 78170 }, { "epoch": 0.5778953904378936, "grad_norm": 0.08102288097143173, "learning_rate": 2.11835232668566e-05, "loss": 0.0186, "step": 78180 }, { "epoch": 0.5779693090091955, "grad_norm": 0.08374160528182983, "learning_rate": 2.1179813627730295e-05, "loss": 0.0185, "step": 78190 }, { "epoch": 0.5780432275804973, "grad_norm": 0.07512281835079193, "learning_rate": 2.117610398860399e-05, "loss": 0.0157, "step": 78200 }, { "epoch": 0.5781171461517992, "grad_norm": 0.1545741707086563, "learning_rate": 2.1172394349477684e-05, "loss": 0.0194, "step": 78210 }, { "epoch": 0.578191064723101, "grad_norm": 0.05544229596853256, "learning_rate": 2.1168684710351376e-05, "loss": 0.0166, "step": 78220 }, { "epoch": 0.5782649832944029, "grad_norm": 0.0926881954073906, "learning_rate": 2.1164975071225072e-05, "loss": 0.0168, "step": 78230 }, { "epoch": 0.5783389018657047, "grad_norm": 0.057462915778160095, "learning_rate": 2.116126543209877e-05, "loss": 0.0169, "step": 78240 }, { "epoch": 0.5784128204370066, "grad_norm": 0.08461698144674301, "learning_rate": 2.115755579297246e-05, "loss": 0.0179, "step": 78250 }, { "epoch": 0.5784867390083085, "grad_norm": 0.09049886465072632, "learning_rate": 2.1153846153846154e-05, "loss": 0.0177, "step": 78260 }, { "epoch": 0.5785606575796103, "grad_norm": 0.0894893929362297, "learning_rate": 2.115013651471985e-05, "loss": 0.0159, "step": 78270 }, { "epoch": 0.5786345761509122, "grad_norm": 0.0849931612610817, "learning_rate": 2.1146426875593545e-05, "loss": 0.0165, "step": 78280 }, { "epoch": 0.578708494722214, "grad_norm": 0.09327192604541779, "learning_rate": 2.1142717236467238e-05, "loss": 0.0189, "step": 78290 }, { "epoch": 0.5787824132935159, "grad_norm": 0.0816856399178505, "learning_rate": 2.113900759734093e-05, "loss": 0.0193, "step": 78300 }, { "epoch": 0.5788563318648177, "grad_norm": 0.06551526486873627, "learning_rate": 2.1135297958214627e-05, "loss": 0.0186, "step": 78310 }, { "epoch": 0.5789302504361196, "grad_norm": 0.0740165114402771, "learning_rate": 2.113158831908832e-05, "loss": 0.0167, "step": 78320 }, { "epoch": 0.5790041690074215, "grad_norm": 0.08765026926994324, "learning_rate": 2.1127878679962015e-05, "loss": 0.0201, "step": 78330 }, { "epoch": 0.5790780875787233, "grad_norm": 0.05869891867041588, "learning_rate": 2.1124169040835708e-05, "loss": 0.0178, "step": 78340 }, { "epoch": 0.5791520061500252, "grad_norm": 0.08589767664670944, "learning_rate": 2.1120459401709404e-05, "loss": 0.0178, "step": 78350 }, { "epoch": 0.579225924721327, "grad_norm": 0.07567378878593445, "learning_rate": 2.1116749762583096e-05, "loss": 0.0186, "step": 78360 }, { "epoch": 0.5792998432926288, "grad_norm": 0.11730443686246872, "learning_rate": 2.111304012345679e-05, "loss": 0.0164, "step": 78370 }, { "epoch": 0.5793737618639307, "grad_norm": 0.09426385164260864, "learning_rate": 2.1109330484330485e-05, "loss": 0.0175, "step": 78380 }, { "epoch": 0.5794476804352325, "grad_norm": 0.09733286499977112, "learning_rate": 2.110562084520418e-05, "loss": 0.0179, "step": 78390 }, { "epoch": 0.5795215990065344, "grad_norm": 0.08981124311685562, "learning_rate": 2.1101911206077873e-05, "loss": 0.0168, "step": 78400 }, { "epoch": 0.5795955175778362, "grad_norm": 0.08097285032272339, "learning_rate": 2.1098201566951566e-05, "loss": 0.0196, "step": 78410 }, { "epoch": 0.5796694361491381, "grad_norm": 0.08661559224128723, "learning_rate": 2.1094491927825262e-05, "loss": 0.0188, "step": 78420 }, { "epoch": 0.5797433547204399, "grad_norm": 0.1040518581867218, "learning_rate": 2.1090782288698958e-05, "loss": 0.0173, "step": 78430 }, { "epoch": 0.5798172732917418, "grad_norm": 0.08766549080610275, "learning_rate": 2.108707264957265e-05, "loss": 0.0201, "step": 78440 }, { "epoch": 0.5798911918630437, "grad_norm": 0.07151542603969574, "learning_rate": 2.1083363010446343e-05, "loss": 0.0174, "step": 78450 }, { "epoch": 0.5799651104343455, "grad_norm": 0.07667539268732071, "learning_rate": 2.107965337132004e-05, "loss": 0.0158, "step": 78460 }, { "epoch": 0.5800390290056474, "grad_norm": 0.0796733871102333, "learning_rate": 2.1075943732193735e-05, "loss": 0.0187, "step": 78470 }, { "epoch": 0.5801129475769492, "grad_norm": 0.10920443385839462, "learning_rate": 2.1072234093067428e-05, "loss": 0.0211, "step": 78480 }, { "epoch": 0.5801868661482511, "grad_norm": 0.08107868582010269, "learning_rate": 2.106852445394112e-05, "loss": 0.0196, "step": 78490 }, { "epoch": 0.5802607847195529, "grad_norm": 0.08896715193986893, "learning_rate": 2.1064814814814816e-05, "loss": 0.0205, "step": 78500 }, { "epoch": 0.5803347032908548, "grad_norm": 0.07335825264453888, "learning_rate": 2.1061105175688512e-05, "loss": 0.0194, "step": 78510 }, { "epoch": 0.5804086218621567, "grad_norm": 0.08255856484174728, "learning_rate": 2.1057395536562205e-05, "loss": 0.0144, "step": 78520 }, { "epoch": 0.5804825404334585, "grad_norm": 0.059211425483226776, "learning_rate": 2.1053685897435897e-05, "loss": 0.0178, "step": 78530 }, { "epoch": 0.5805564590047604, "grad_norm": 0.08221136778593063, "learning_rate": 2.1049976258309593e-05, "loss": 0.0216, "step": 78540 }, { "epoch": 0.5806303775760622, "grad_norm": 0.08132661134004593, "learning_rate": 2.1046266619183286e-05, "loss": 0.0177, "step": 78550 }, { "epoch": 0.5807042961473641, "grad_norm": 0.05896592140197754, "learning_rate": 2.1042556980056982e-05, "loss": 0.0169, "step": 78560 }, { "epoch": 0.5807782147186659, "grad_norm": 0.059992872178554535, "learning_rate": 2.1038847340930674e-05, "loss": 0.0152, "step": 78570 }, { "epoch": 0.5808521332899678, "grad_norm": 0.04415227100253105, "learning_rate": 2.103513770180437e-05, "loss": 0.0171, "step": 78580 }, { "epoch": 0.5809260518612697, "grad_norm": 0.058747969567775726, "learning_rate": 2.1031428062678063e-05, "loss": 0.0175, "step": 78590 }, { "epoch": 0.5809999704325715, "grad_norm": 0.061170656234025955, "learning_rate": 2.1027718423551755e-05, "loss": 0.019, "step": 78600 }, { "epoch": 0.5810738890038734, "grad_norm": 0.1113303005695343, "learning_rate": 2.102400878442545e-05, "loss": 0.0186, "step": 78610 }, { "epoch": 0.5811478075751751, "grad_norm": 0.09334623068571091, "learning_rate": 2.1020299145299147e-05, "loss": 0.0176, "step": 78620 }, { "epoch": 0.581221726146477, "grad_norm": 0.05836963653564453, "learning_rate": 2.101658950617284e-05, "loss": 0.0153, "step": 78630 }, { "epoch": 0.581295644717779, "grad_norm": 0.0612514466047287, "learning_rate": 2.1012879867046533e-05, "loss": 0.0183, "step": 78640 }, { "epoch": 0.5813695632890807, "grad_norm": 0.06752336025238037, "learning_rate": 2.100917022792023e-05, "loss": 0.0141, "step": 78650 }, { "epoch": 0.5814434818603826, "grad_norm": 0.05256432294845581, "learning_rate": 2.1005460588793924e-05, "loss": 0.0163, "step": 78660 }, { "epoch": 0.5815174004316844, "grad_norm": 0.06495155394077301, "learning_rate": 2.1001750949667617e-05, "loss": 0.0178, "step": 78670 }, { "epoch": 0.5815913190029863, "grad_norm": 0.0711987167596817, "learning_rate": 2.099804131054131e-05, "loss": 0.0173, "step": 78680 }, { "epoch": 0.5816652375742881, "grad_norm": 0.09095991402864456, "learning_rate": 2.0994331671415006e-05, "loss": 0.0195, "step": 78690 }, { "epoch": 0.58173915614559, "grad_norm": 0.1315537840127945, "learning_rate": 2.09906220322887e-05, "loss": 0.0176, "step": 78700 }, { "epoch": 0.5818130747168919, "grad_norm": 0.06469413638114929, "learning_rate": 2.0986912393162394e-05, "loss": 0.0167, "step": 78710 }, { "epoch": 0.5818869932881937, "grad_norm": 0.08639881759881973, "learning_rate": 2.0983202754036087e-05, "loss": 0.0193, "step": 78720 }, { "epoch": 0.5819609118594956, "grad_norm": 0.07865394651889801, "learning_rate": 2.0979493114909783e-05, "loss": 0.0173, "step": 78730 }, { "epoch": 0.5820348304307974, "grad_norm": 0.05969080328941345, "learning_rate": 2.097578347578348e-05, "loss": 0.0159, "step": 78740 }, { "epoch": 0.5821087490020993, "grad_norm": 0.08935252577066422, "learning_rate": 2.097207383665717e-05, "loss": 0.0172, "step": 78750 }, { "epoch": 0.5821826675734011, "grad_norm": 0.07992496341466904, "learning_rate": 2.0968364197530864e-05, "loss": 0.0174, "step": 78760 }, { "epoch": 0.582256586144703, "grad_norm": 0.07541343569755554, "learning_rate": 2.096465455840456e-05, "loss": 0.0172, "step": 78770 }, { "epoch": 0.5823305047160049, "grad_norm": 0.09297898411750793, "learning_rate": 2.0960944919278252e-05, "loss": 0.0159, "step": 78780 }, { "epoch": 0.5824044232873067, "grad_norm": 0.12045703828334808, "learning_rate": 2.095723528015195e-05, "loss": 0.0186, "step": 78790 }, { "epoch": 0.5824783418586086, "grad_norm": 0.08663397282361984, "learning_rate": 2.095352564102564e-05, "loss": 0.018, "step": 78800 }, { "epoch": 0.5825522604299104, "grad_norm": 0.06644534319639206, "learning_rate": 2.0949816001899337e-05, "loss": 0.0191, "step": 78810 }, { "epoch": 0.5826261790012123, "grad_norm": 0.08833138644695282, "learning_rate": 2.094610636277303e-05, "loss": 0.017, "step": 78820 }, { "epoch": 0.5827000975725141, "grad_norm": 0.07349563390016556, "learning_rate": 2.0942396723646722e-05, "loss": 0.0185, "step": 78830 }, { "epoch": 0.582774016143816, "grad_norm": 0.08034585416316986, "learning_rate": 2.093868708452042e-05, "loss": 0.0193, "step": 78840 }, { "epoch": 0.5828479347151179, "grad_norm": 0.07185887545347214, "learning_rate": 2.0934977445394114e-05, "loss": 0.016, "step": 78850 }, { "epoch": 0.5829218532864197, "grad_norm": 0.06577017903327942, "learning_rate": 2.0931267806267807e-05, "loss": 0.0172, "step": 78860 }, { "epoch": 0.5829957718577216, "grad_norm": 0.05835145711898804, "learning_rate": 2.09275581671415e-05, "loss": 0.0181, "step": 78870 }, { "epoch": 0.5830696904290233, "grad_norm": 0.06854193657636642, "learning_rate": 2.09238485280152e-05, "loss": 0.0172, "step": 78880 }, { "epoch": 0.5831436090003252, "grad_norm": 0.07310711592435837, "learning_rate": 2.092013888888889e-05, "loss": 0.0192, "step": 78890 }, { "epoch": 0.5832175275716271, "grad_norm": 0.08630604296922684, "learning_rate": 2.0916429249762584e-05, "loss": 0.0162, "step": 78900 }, { "epoch": 0.5832914461429289, "grad_norm": 0.12226032465696335, "learning_rate": 2.0912719610636276e-05, "loss": 0.0167, "step": 78910 }, { "epoch": 0.5833653647142308, "grad_norm": 0.07864919304847717, "learning_rate": 2.0909009971509972e-05, "loss": 0.0161, "step": 78920 }, { "epoch": 0.5834392832855326, "grad_norm": 0.09677169471979141, "learning_rate": 2.0905300332383668e-05, "loss": 0.0179, "step": 78930 }, { "epoch": 0.5835132018568345, "grad_norm": 0.09349837899208069, "learning_rate": 2.090159069325736e-05, "loss": 0.0197, "step": 78940 }, { "epoch": 0.5835871204281363, "grad_norm": 0.08077096939086914, "learning_rate": 2.0897881054131053e-05, "loss": 0.0159, "step": 78950 }, { "epoch": 0.5836610389994382, "grad_norm": 0.06563491374254227, "learning_rate": 2.089417141500475e-05, "loss": 0.0172, "step": 78960 }, { "epoch": 0.5837349575707401, "grad_norm": 0.060831811279058456, "learning_rate": 2.0890461775878445e-05, "loss": 0.0158, "step": 78970 }, { "epoch": 0.5838088761420419, "grad_norm": 0.08679822087287903, "learning_rate": 2.0886752136752138e-05, "loss": 0.0161, "step": 78980 }, { "epoch": 0.5838827947133438, "grad_norm": 0.07423678040504456, "learning_rate": 2.0883042497625834e-05, "loss": 0.0184, "step": 78990 }, { "epoch": 0.5839567132846456, "grad_norm": 0.1013701856136322, "learning_rate": 2.0879332858499526e-05, "loss": 0.0201, "step": 79000 }, { "epoch": 0.5840306318559475, "grad_norm": 0.0821235179901123, "learning_rate": 2.087562321937322e-05, "loss": 0.0161, "step": 79010 }, { "epoch": 0.5841045504272493, "grad_norm": 0.04412822797894478, "learning_rate": 2.0871913580246915e-05, "loss": 0.0161, "step": 79020 }, { "epoch": 0.5841784689985512, "grad_norm": 0.07753975689411163, "learning_rate": 2.086820394112061e-05, "loss": 0.0179, "step": 79030 }, { "epoch": 0.5842523875698531, "grad_norm": 0.08966545760631561, "learning_rate": 2.0864494301994303e-05, "loss": 0.0193, "step": 79040 }, { "epoch": 0.5843263061411549, "grad_norm": 0.07493555545806885, "learning_rate": 2.0860784662867996e-05, "loss": 0.0174, "step": 79050 }, { "epoch": 0.5844002247124568, "grad_norm": 0.09181392192840576, "learning_rate": 2.085707502374169e-05, "loss": 0.0205, "step": 79060 }, { "epoch": 0.5844741432837586, "grad_norm": 0.07202092558145523, "learning_rate": 2.0853365384615388e-05, "loss": 0.0198, "step": 79070 }, { "epoch": 0.5845480618550605, "grad_norm": 0.07976742833852768, "learning_rate": 2.084965574548908e-05, "loss": 0.0172, "step": 79080 }, { "epoch": 0.5846219804263623, "grad_norm": 0.08119291812181473, "learning_rate": 2.0845946106362773e-05, "loss": 0.0198, "step": 79090 }, { "epoch": 0.5846958989976642, "grad_norm": 0.08092097193002701, "learning_rate": 2.0842236467236466e-05, "loss": 0.0155, "step": 79100 }, { "epoch": 0.5847698175689661, "grad_norm": 0.06528455018997192, "learning_rate": 2.0838526828110165e-05, "loss": 0.0172, "step": 79110 }, { "epoch": 0.5848437361402679, "grad_norm": 0.07378751039505005, "learning_rate": 2.0834817188983858e-05, "loss": 0.0172, "step": 79120 }, { "epoch": 0.5849176547115698, "grad_norm": 0.11115199327468872, "learning_rate": 2.083110754985755e-05, "loss": 0.0184, "step": 79130 }, { "epoch": 0.5849915732828715, "grad_norm": 0.10814355313777924, "learning_rate": 2.0827397910731243e-05, "loss": 0.0193, "step": 79140 }, { "epoch": 0.5850654918541734, "grad_norm": 0.07689186185598373, "learning_rate": 2.082368827160494e-05, "loss": 0.0157, "step": 79150 }, { "epoch": 0.5851394104254753, "grad_norm": 0.09198521077632904, "learning_rate": 2.0819978632478635e-05, "loss": 0.0177, "step": 79160 }, { "epoch": 0.5852133289967771, "grad_norm": 0.04869784787297249, "learning_rate": 2.0816268993352327e-05, "loss": 0.0174, "step": 79170 }, { "epoch": 0.585287247568079, "grad_norm": 0.09584268927574158, "learning_rate": 2.0812559354226023e-05, "loss": 0.0184, "step": 79180 }, { "epoch": 0.5853611661393808, "grad_norm": 0.10546091198921204, "learning_rate": 2.0808849715099716e-05, "loss": 0.018, "step": 79190 }, { "epoch": 0.5854350847106827, "grad_norm": 0.07282552123069763, "learning_rate": 2.0805140075973412e-05, "loss": 0.0187, "step": 79200 }, { "epoch": 0.5855090032819845, "grad_norm": 0.07551319152116776, "learning_rate": 2.0801430436847104e-05, "loss": 0.0177, "step": 79210 }, { "epoch": 0.5855829218532864, "grad_norm": 0.08180715143680573, "learning_rate": 2.07977207977208e-05, "loss": 0.0187, "step": 79220 }, { "epoch": 0.5856568404245883, "grad_norm": 0.07415753602981567, "learning_rate": 2.0794011158594493e-05, "loss": 0.0193, "step": 79230 }, { "epoch": 0.5857307589958901, "grad_norm": 0.07355595380067825, "learning_rate": 2.0790301519468186e-05, "loss": 0.0172, "step": 79240 }, { "epoch": 0.585804677567192, "grad_norm": 0.08238102495670319, "learning_rate": 2.078659188034188e-05, "loss": 0.019, "step": 79250 }, { "epoch": 0.5858785961384938, "grad_norm": 0.07160931080579758, "learning_rate": 2.0782882241215577e-05, "loss": 0.0183, "step": 79260 }, { "epoch": 0.5859525147097957, "grad_norm": 0.07847066223621368, "learning_rate": 2.077917260208927e-05, "loss": 0.0187, "step": 79270 }, { "epoch": 0.5860264332810975, "grad_norm": 0.05874091759324074, "learning_rate": 2.0775462962962963e-05, "loss": 0.0168, "step": 79280 }, { "epoch": 0.5861003518523994, "grad_norm": 0.08555573225021362, "learning_rate": 2.0771753323836655e-05, "loss": 0.0179, "step": 79290 }, { "epoch": 0.5861742704237013, "grad_norm": 0.07176050543785095, "learning_rate": 2.0768043684710355e-05, "loss": 0.0163, "step": 79300 }, { "epoch": 0.5862481889950031, "grad_norm": 0.07672320306301117, "learning_rate": 2.0764334045584047e-05, "loss": 0.0186, "step": 79310 }, { "epoch": 0.586322107566305, "grad_norm": 0.10934194177389145, "learning_rate": 2.076062440645774e-05, "loss": 0.0188, "step": 79320 }, { "epoch": 0.5863960261376068, "grad_norm": 0.10554488748311996, "learning_rate": 2.0756914767331436e-05, "loss": 0.0155, "step": 79330 }, { "epoch": 0.5864699447089087, "grad_norm": 0.06040872260928154, "learning_rate": 2.075320512820513e-05, "loss": 0.0143, "step": 79340 }, { "epoch": 0.5865438632802105, "grad_norm": 0.10152992606163025, "learning_rate": 2.0749495489078824e-05, "loss": 0.0206, "step": 79350 }, { "epoch": 0.5866177818515124, "grad_norm": 0.07255376875400543, "learning_rate": 2.0745785849952517e-05, "loss": 0.0198, "step": 79360 }, { "epoch": 0.5866917004228143, "grad_norm": 0.09351497143507004, "learning_rate": 2.0742076210826213e-05, "loss": 0.0184, "step": 79370 }, { "epoch": 0.5867656189941161, "grad_norm": 0.1424107849597931, "learning_rate": 2.0738366571699905e-05, "loss": 0.0208, "step": 79380 }, { "epoch": 0.586839537565418, "grad_norm": 0.09308390319347382, "learning_rate": 2.07346569325736e-05, "loss": 0.0185, "step": 79390 }, { "epoch": 0.5869134561367197, "grad_norm": 0.06494449079036713, "learning_rate": 2.0730947293447294e-05, "loss": 0.0172, "step": 79400 }, { "epoch": 0.5869873747080216, "grad_norm": 0.08240706473588943, "learning_rate": 2.072723765432099e-05, "loss": 0.0174, "step": 79410 }, { "epoch": 0.5870612932793235, "grad_norm": 0.08911719918251038, "learning_rate": 2.0723528015194682e-05, "loss": 0.0185, "step": 79420 }, { "epoch": 0.5871352118506253, "grad_norm": 0.08278301358222961, "learning_rate": 2.071981837606838e-05, "loss": 0.0186, "step": 79430 }, { "epoch": 0.5872091304219272, "grad_norm": 0.08317048102617264, "learning_rate": 2.071610873694207e-05, "loss": 0.0195, "step": 79440 }, { "epoch": 0.587283048993229, "grad_norm": 0.08313658088445663, "learning_rate": 2.0712399097815767e-05, "loss": 0.0187, "step": 79450 }, { "epoch": 0.5873569675645309, "grad_norm": 0.09306693822145462, "learning_rate": 2.070868945868946e-05, "loss": 0.0167, "step": 79460 }, { "epoch": 0.5874308861358327, "grad_norm": 0.08138968795537949, "learning_rate": 2.0704979819563152e-05, "loss": 0.0149, "step": 79470 }, { "epoch": 0.5875048047071346, "grad_norm": 0.07695699483156204, "learning_rate": 2.0701270180436848e-05, "loss": 0.0171, "step": 79480 }, { "epoch": 0.5875787232784365, "grad_norm": 0.08786536753177643, "learning_rate": 2.0697560541310544e-05, "loss": 0.0175, "step": 79490 }, { "epoch": 0.5876526418497383, "grad_norm": 0.06110033392906189, "learning_rate": 2.0693850902184237e-05, "loss": 0.0175, "step": 79500 }, { "epoch": 0.5877265604210402, "grad_norm": 0.1047593280673027, "learning_rate": 2.069014126305793e-05, "loss": 0.0206, "step": 79510 }, { "epoch": 0.587800478992342, "grad_norm": 0.07371040433645248, "learning_rate": 2.0686431623931625e-05, "loss": 0.0173, "step": 79520 }, { "epoch": 0.5878743975636439, "grad_norm": 0.06224002316594124, "learning_rate": 2.068272198480532e-05, "loss": 0.0186, "step": 79530 }, { "epoch": 0.5879483161349457, "grad_norm": 0.08960548043251038, "learning_rate": 2.0679012345679014e-05, "loss": 0.0195, "step": 79540 }, { "epoch": 0.5880222347062476, "grad_norm": 0.06475990265607834, "learning_rate": 2.0675302706552706e-05, "loss": 0.0176, "step": 79550 }, { "epoch": 0.5880961532775495, "grad_norm": 0.07483542710542679, "learning_rate": 2.0671593067426402e-05, "loss": 0.0153, "step": 79560 }, { "epoch": 0.5881700718488513, "grad_norm": 0.08100137859582901, "learning_rate": 2.0667883428300098e-05, "loss": 0.0184, "step": 79570 }, { "epoch": 0.5882439904201532, "grad_norm": 0.08143702149391174, "learning_rate": 2.066417378917379e-05, "loss": 0.016, "step": 79580 }, { "epoch": 0.588317908991455, "grad_norm": 0.09400355070829391, "learning_rate": 2.0660464150047483e-05, "loss": 0.0179, "step": 79590 }, { "epoch": 0.5883918275627569, "grad_norm": 0.07985514402389526, "learning_rate": 2.065675451092118e-05, "loss": 0.0223, "step": 79600 }, { "epoch": 0.5884657461340588, "grad_norm": 0.07633242011070251, "learning_rate": 2.0653044871794872e-05, "loss": 0.0156, "step": 79610 }, { "epoch": 0.5885396647053606, "grad_norm": 0.07383780181407928, "learning_rate": 2.0649335232668568e-05, "loss": 0.0162, "step": 79620 }, { "epoch": 0.5886135832766625, "grad_norm": 0.0776839628815651, "learning_rate": 2.064562559354226e-05, "loss": 0.0174, "step": 79630 }, { "epoch": 0.5886875018479643, "grad_norm": 0.07054539769887924, "learning_rate": 2.0641915954415956e-05, "loss": 0.0166, "step": 79640 }, { "epoch": 0.5887614204192662, "grad_norm": 0.08717437088489532, "learning_rate": 2.063820631528965e-05, "loss": 0.0163, "step": 79650 }, { "epoch": 0.588835338990568, "grad_norm": 0.09364652633666992, "learning_rate": 2.0634496676163345e-05, "loss": 0.0183, "step": 79660 }, { "epoch": 0.5889092575618698, "grad_norm": 0.07449059188365936, "learning_rate": 2.0630787037037038e-05, "loss": 0.0162, "step": 79670 }, { "epoch": 0.5889831761331717, "grad_norm": 0.05628354474902153, "learning_rate": 2.0627077397910734e-05, "loss": 0.0179, "step": 79680 }, { "epoch": 0.5890570947044735, "grad_norm": 0.09294017404317856, "learning_rate": 2.0623367758784426e-05, "loss": 0.016, "step": 79690 }, { "epoch": 0.5891310132757754, "grad_norm": 0.08671052753925323, "learning_rate": 2.061965811965812e-05, "loss": 0.0192, "step": 79700 }, { "epoch": 0.5892049318470772, "grad_norm": 0.08170660585165024, "learning_rate": 2.0615948480531815e-05, "loss": 0.02, "step": 79710 }, { "epoch": 0.5892788504183791, "grad_norm": 0.07939363270998001, "learning_rate": 2.061223884140551e-05, "loss": 0.0174, "step": 79720 }, { "epoch": 0.5893527689896809, "grad_norm": 0.08600367605686188, "learning_rate": 2.0608529202279203e-05, "loss": 0.0163, "step": 79730 }, { "epoch": 0.5894266875609828, "grad_norm": 0.08043445646762848, "learning_rate": 2.0604819563152896e-05, "loss": 0.0176, "step": 79740 }, { "epoch": 0.5895006061322847, "grad_norm": 0.08205698430538177, "learning_rate": 2.0601109924026592e-05, "loss": 0.0171, "step": 79750 }, { "epoch": 0.5895745247035865, "grad_norm": 0.07406076043844223, "learning_rate": 2.0597400284900288e-05, "loss": 0.0193, "step": 79760 }, { "epoch": 0.5896484432748884, "grad_norm": 0.07343707233667374, "learning_rate": 2.059369064577398e-05, "loss": 0.0166, "step": 79770 }, { "epoch": 0.5897223618461902, "grad_norm": 0.07175429910421371, "learning_rate": 2.0589981006647673e-05, "loss": 0.0171, "step": 79780 }, { "epoch": 0.5897962804174921, "grad_norm": 0.08020445704460144, "learning_rate": 2.058627136752137e-05, "loss": 0.0154, "step": 79790 }, { "epoch": 0.5898701989887939, "grad_norm": 0.089165598154068, "learning_rate": 2.0582561728395065e-05, "loss": 0.0191, "step": 79800 }, { "epoch": 0.5899441175600958, "grad_norm": 0.0934486910700798, "learning_rate": 2.0578852089268757e-05, "loss": 0.0185, "step": 79810 }, { "epoch": 0.5900180361313977, "grad_norm": 0.06639142334461212, "learning_rate": 2.057514245014245e-05, "loss": 0.0187, "step": 79820 }, { "epoch": 0.5900919547026995, "grad_norm": 0.07277216017246246, "learning_rate": 2.0571432811016146e-05, "loss": 0.0169, "step": 79830 }, { "epoch": 0.5901658732740014, "grad_norm": 0.08275559544563293, "learning_rate": 2.056772317188984e-05, "loss": 0.0205, "step": 79840 }, { "epoch": 0.5902397918453032, "grad_norm": 0.07960959523916245, "learning_rate": 2.0564013532763534e-05, "loss": 0.0186, "step": 79850 }, { "epoch": 0.5903137104166051, "grad_norm": 0.08382745087146759, "learning_rate": 2.0560303893637227e-05, "loss": 0.0172, "step": 79860 }, { "epoch": 0.590387628987907, "grad_norm": 0.07545735687017441, "learning_rate": 2.0556594254510923e-05, "loss": 0.0185, "step": 79870 }, { "epoch": 0.5904615475592088, "grad_norm": 0.07180724292993546, "learning_rate": 2.0552884615384616e-05, "loss": 0.016, "step": 79880 }, { "epoch": 0.5905354661305107, "grad_norm": 0.07791922241449356, "learning_rate": 2.054917497625831e-05, "loss": 0.0187, "step": 79890 }, { "epoch": 0.5906093847018125, "grad_norm": 0.06921916455030441, "learning_rate": 2.0545465337132004e-05, "loss": 0.0195, "step": 79900 }, { "epoch": 0.5906833032731144, "grad_norm": 0.06241946294903755, "learning_rate": 2.05417556980057e-05, "loss": 0.0163, "step": 79910 }, { "epoch": 0.5907572218444161, "grad_norm": 0.08319985866546631, "learning_rate": 2.0538046058879393e-05, "loss": 0.0181, "step": 79920 }, { "epoch": 0.590831140415718, "grad_norm": 0.05606095865368843, "learning_rate": 2.0534336419753085e-05, "loss": 0.0183, "step": 79930 }, { "epoch": 0.59090505898702, "grad_norm": 0.08617392182350159, "learning_rate": 2.053062678062678e-05, "loss": 0.0159, "step": 79940 }, { "epoch": 0.5909789775583217, "grad_norm": 0.09617938101291656, "learning_rate": 2.0526917141500477e-05, "loss": 0.017, "step": 79950 }, { "epoch": 0.5910528961296236, "grad_norm": 0.08484801650047302, "learning_rate": 2.052320750237417e-05, "loss": 0.0187, "step": 79960 }, { "epoch": 0.5911268147009254, "grad_norm": 0.06967838108539581, "learning_rate": 2.0519497863247862e-05, "loss": 0.0174, "step": 79970 }, { "epoch": 0.5912007332722273, "grad_norm": 0.10206121951341629, "learning_rate": 2.051578822412156e-05, "loss": 0.0184, "step": 79980 }, { "epoch": 0.5912746518435291, "grad_norm": 0.09198049455881119, "learning_rate": 2.0512078584995254e-05, "loss": 0.019, "step": 79990 }, { "epoch": 0.591348570414831, "grad_norm": 0.06630239635705948, "learning_rate": 2.0508368945868947e-05, "loss": 0.0178, "step": 80000 }, { "epoch": 0.591348570414831, "eval_f1": 0.6219793357433911, "eval_loss": 0.01734846830368042, "eval_precision": 0.49293097739254205, "eval_recall": 0.8425598416692311, "eval_runtime": 2919.4187, "eval_samples_per_second": 185.357, "eval_steps_per_second": 2.896, "step": 80000 }, { "epoch": 0.5914224889861329, "grad_norm": 0.083350270986557, "learning_rate": 2.050465930674264e-05, "loss": 0.0153, "step": 80010 }, { "epoch": 0.5914964075574347, "grad_norm": 0.08730822801589966, "learning_rate": 2.0500949667616335e-05, "loss": 0.0183, "step": 80020 }, { "epoch": 0.5915703261287366, "grad_norm": 0.063474640250206, "learning_rate": 2.049724002849003e-05, "loss": 0.0183, "step": 80030 }, { "epoch": 0.5916442447000384, "grad_norm": 0.07405190169811249, "learning_rate": 2.0493530389363724e-05, "loss": 0.0178, "step": 80040 }, { "epoch": 0.5917181632713403, "grad_norm": 0.09950005263090134, "learning_rate": 2.0489820750237417e-05, "loss": 0.021, "step": 80050 }, { "epoch": 0.5917920818426421, "grad_norm": 0.08904198557138443, "learning_rate": 2.0486111111111113e-05, "loss": 0.0186, "step": 80060 }, { "epoch": 0.591866000413944, "grad_norm": 0.08764451742172241, "learning_rate": 2.0482401471984805e-05, "loss": 0.0177, "step": 80070 }, { "epoch": 0.5919399189852459, "grad_norm": 0.07702360302209854, "learning_rate": 2.04786918328585e-05, "loss": 0.0196, "step": 80080 }, { "epoch": 0.5920138375565477, "grad_norm": 0.07286150008440018, "learning_rate": 2.0474982193732194e-05, "loss": 0.0173, "step": 80090 }, { "epoch": 0.5920877561278496, "grad_norm": 0.1036733090877533, "learning_rate": 2.047127255460589e-05, "loss": 0.0195, "step": 80100 }, { "epoch": 0.5921616746991514, "grad_norm": 0.06630636751651764, "learning_rate": 2.0467562915479582e-05, "loss": 0.0171, "step": 80110 }, { "epoch": 0.5922355932704533, "grad_norm": 0.09572155773639679, "learning_rate": 2.0463853276353278e-05, "loss": 0.02, "step": 80120 }, { "epoch": 0.5923095118417552, "grad_norm": 0.10689663887023926, "learning_rate": 2.046014363722697e-05, "loss": 0.0186, "step": 80130 }, { "epoch": 0.592383430413057, "grad_norm": 0.09138118475675583, "learning_rate": 2.0456433998100667e-05, "loss": 0.0176, "step": 80140 }, { "epoch": 0.5924573489843589, "grad_norm": 0.07343235611915588, "learning_rate": 2.045272435897436e-05, "loss": 0.0176, "step": 80150 }, { "epoch": 0.5925312675556607, "grad_norm": 0.07991829514503479, "learning_rate": 2.0449014719848052e-05, "loss": 0.0166, "step": 80160 }, { "epoch": 0.5926051861269626, "grad_norm": 0.06455441564321518, "learning_rate": 2.0445305080721748e-05, "loss": 0.0174, "step": 80170 }, { "epoch": 0.5926791046982643, "grad_norm": 0.09613152593374252, "learning_rate": 2.0441595441595444e-05, "loss": 0.0211, "step": 80180 }, { "epoch": 0.5927530232695662, "grad_norm": 0.06977544724941254, "learning_rate": 2.0437885802469136e-05, "loss": 0.018, "step": 80190 }, { "epoch": 0.5928269418408681, "grad_norm": 0.0790705755352974, "learning_rate": 2.043417616334283e-05, "loss": 0.0169, "step": 80200 }, { "epoch": 0.5929008604121699, "grad_norm": 0.07312140613794327, "learning_rate": 2.0430466524216525e-05, "loss": 0.0161, "step": 80210 }, { "epoch": 0.5929747789834718, "grad_norm": 0.07595586776733398, "learning_rate": 2.042675688509022e-05, "loss": 0.018, "step": 80220 }, { "epoch": 0.5930486975547736, "grad_norm": 0.08415449410676956, "learning_rate": 2.0423047245963913e-05, "loss": 0.0158, "step": 80230 }, { "epoch": 0.5931226161260755, "grad_norm": 0.07658253610134125, "learning_rate": 2.0419337606837606e-05, "loss": 0.0165, "step": 80240 }, { "epoch": 0.5931965346973773, "grad_norm": 0.09636301547288895, "learning_rate": 2.0415627967711302e-05, "loss": 0.0183, "step": 80250 }, { "epoch": 0.5932704532686792, "grad_norm": 0.07571303844451904, "learning_rate": 2.0411918328584998e-05, "loss": 0.0159, "step": 80260 }, { "epoch": 0.5933443718399811, "grad_norm": 0.07676825672388077, "learning_rate": 2.040820868945869e-05, "loss": 0.0189, "step": 80270 }, { "epoch": 0.5934182904112829, "grad_norm": 0.08915860950946808, "learning_rate": 2.0404499050332383e-05, "loss": 0.0175, "step": 80280 }, { "epoch": 0.5934922089825848, "grad_norm": 0.06634003669023514, "learning_rate": 2.040078941120608e-05, "loss": 0.0185, "step": 80290 }, { "epoch": 0.5935661275538866, "grad_norm": 0.08090776950120926, "learning_rate": 2.039707977207977e-05, "loss": 0.0201, "step": 80300 }, { "epoch": 0.5936400461251885, "grad_norm": 0.08503460884094238, "learning_rate": 2.0393370132953468e-05, "loss": 0.0184, "step": 80310 }, { "epoch": 0.5937139646964903, "grad_norm": 0.10272512584924698, "learning_rate": 2.038966049382716e-05, "loss": 0.017, "step": 80320 }, { "epoch": 0.5937878832677922, "grad_norm": 0.09803182631731033, "learning_rate": 2.0385950854700856e-05, "loss": 0.0172, "step": 80330 }, { "epoch": 0.5938618018390941, "grad_norm": 0.05736514553427696, "learning_rate": 2.038224121557455e-05, "loss": 0.0165, "step": 80340 }, { "epoch": 0.5939357204103959, "grad_norm": 0.09937681257724762, "learning_rate": 2.0378531576448245e-05, "loss": 0.0199, "step": 80350 }, { "epoch": 0.5940096389816978, "grad_norm": 0.0747910812497139, "learning_rate": 2.0374821937321937e-05, "loss": 0.0165, "step": 80360 }, { "epoch": 0.5940835575529996, "grad_norm": 0.07158013433218002, "learning_rate": 2.0371112298195633e-05, "loss": 0.0184, "step": 80370 }, { "epoch": 0.5941574761243015, "grad_norm": 0.06482820957899094, "learning_rate": 2.0367402659069326e-05, "loss": 0.0164, "step": 80380 }, { "epoch": 0.5942313946956034, "grad_norm": 0.09437862038612366, "learning_rate": 2.036369301994302e-05, "loss": 0.0161, "step": 80390 }, { "epoch": 0.5943053132669052, "grad_norm": 0.0751989334821701, "learning_rate": 2.0359983380816718e-05, "loss": 0.0178, "step": 80400 }, { "epoch": 0.5943792318382071, "grad_norm": 0.057128626853227615, "learning_rate": 2.035627374169041e-05, "loss": 0.0192, "step": 80410 }, { "epoch": 0.5944531504095089, "grad_norm": 0.062380481511354446, "learning_rate": 2.0352564102564103e-05, "loss": 0.0175, "step": 80420 }, { "epoch": 0.5945270689808108, "grad_norm": 0.09042806178331375, "learning_rate": 2.0348854463437796e-05, "loss": 0.0186, "step": 80430 }, { "epoch": 0.5946009875521125, "grad_norm": 0.06878332048654556, "learning_rate": 2.034514482431149e-05, "loss": 0.0184, "step": 80440 }, { "epoch": 0.5946749061234144, "grad_norm": 0.09018473327159882, "learning_rate": 2.0341435185185187e-05, "loss": 0.0169, "step": 80450 }, { "epoch": 0.5947488246947163, "grad_norm": 0.07117787003517151, "learning_rate": 2.033772554605888e-05, "loss": 0.02, "step": 80460 }, { "epoch": 0.5948227432660181, "grad_norm": 0.09593405574560165, "learning_rate": 2.0334015906932573e-05, "loss": 0.0152, "step": 80470 }, { "epoch": 0.59489666183732, "grad_norm": 0.10332407057285309, "learning_rate": 2.033030626780627e-05, "loss": 0.0185, "step": 80480 }, { "epoch": 0.5949705804086218, "grad_norm": 0.09714365005493164, "learning_rate": 2.0326596628679965e-05, "loss": 0.0182, "step": 80490 }, { "epoch": 0.5950444989799237, "grad_norm": 0.10157936811447144, "learning_rate": 2.0322886989553657e-05, "loss": 0.0179, "step": 80500 }, { "epoch": 0.5951184175512255, "grad_norm": 0.07549656182527542, "learning_rate": 2.031917735042735e-05, "loss": 0.017, "step": 80510 }, { "epoch": 0.5951923361225274, "grad_norm": 0.07607118040323257, "learning_rate": 2.0315467711301046e-05, "loss": 0.0184, "step": 80520 }, { "epoch": 0.5952662546938293, "grad_norm": 0.08963571488857269, "learning_rate": 2.0311758072174738e-05, "loss": 0.0168, "step": 80530 }, { "epoch": 0.5953401732651311, "grad_norm": 0.07405596226453781, "learning_rate": 2.0308048433048434e-05, "loss": 0.0168, "step": 80540 }, { "epoch": 0.595414091836433, "grad_norm": 0.09124046564102173, "learning_rate": 2.030433879392213e-05, "loss": 0.0193, "step": 80550 }, { "epoch": 0.5954880104077348, "grad_norm": 0.07387691736221313, "learning_rate": 2.0300629154795823e-05, "loss": 0.0182, "step": 80560 }, { "epoch": 0.5955619289790367, "grad_norm": 0.07532805949449539, "learning_rate": 2.0296919515669515e-05, "loss": 0.0161, "step": 80570 }, { "epoch": 0.5956358475503385, "grad_norm": 0.12049363553524017, "learning_rate": 2.029320987654321e-05, "loss": 0.0182, "step": 80580 }, { "epoch": 0.5957097661216404, "grad_norm": 0.13337494432926178, "learning_rate": 2.0289500237416907e-05, "loss": 0.019, "step": 80590 }, { "epoch": 0.5957836846929423, "grad_norm": 0.09054873883724213, "learning_rate": 2.02857905982906e-05, "loss": 0.019, "step": 80600 }, { "epoch": 0.5958576032642441, "grad_norm": 0.0526256337761879, "learning_rate": 2.0282080959164292e-05, "loss": 0.0175, "step": 80610 }, { "epoch": 0.595931521835546, "grad_norm": 0.06578093022108078, "learning_rate": 2.0278371320037985e-05, "loss": 0.0162, "step": 80620 }, { "epoch": 0.5960054404068478, "grad_norm": 0.07064341008663177, "learning_rate": 2.0274661680911684e-05, "loss": 0.0185, "step": 80630 }, { "epoch": 0.5960793589781497, "grad_norm": 0.07011621445417404, "learning_rate": 2.0270952041785377e-05, "loss": 0.0173, "step": 80640 }, { "epoch": 0.5961532775494516, "grad_norm": 0.0939406305551529, "learning_rate": 2.026724240265907e-05, "loss": 0.0189, "step": 80650 }, { "epoch": 0.5962271961207534, "grad_norm": 0.08310779184103012, "learning_rate": 2.0263532763532762e-05, "loss": 0.0153, "step": 80660 }, { "epoch": 0.5963011146920553, "grad_norm": 0.07875712215900421, "learning_rate": 2.0259823124406458e-05, "loss": 0.0187, "step": 80670 }, { "epoch": 0.5963750332633571, "grad_norm": 0.08235958963632584, "learning_rate": 2.0256113485280154e-05, "loss": 0.0164, "step": 80680 }, { "epoch": 0.596448951834659, "grad_norm": 0.08048681169748306, "learning_rate": 2.0252403846153847e-05, "loss": 0.0174, "step": 80690 }, { "epoch": 0.5965228704059607, "grad_norm": 0.07104280591011047, "learning_rate": 2.0248694207027543e-05, "loss": 0.0165, "step": 80700 }, { "epoch": 0.5965967889772626, "grad_norm": 0.09004362672567368, "learning_rate": 2.0244984567901235e-05, "loss": 0.016, "step": 80710 }, { "epoch": 0.5966707075485645, "grad_norm": 0.0726613849401474, "learning_rate": 2.024127492877493e-05, "loss": 0.0187, "step": 80720 }, { "epoch": 0.5967446261198663, "grad_norm": 0.07129136472940445, "learning_rate": 2.0237565289648624e-05, "loss": 0.017, "step": 80730 }, { "epoch": 0.5968185446911682, "grad_norm": 0.07636060565710068, "learning_rate": 2.023385565052232e-05, "loss": 0.0168, "step": 80740 }, { "epoch": 0.59689246326247, "grad_norm": 0.06925743818283081, "learning_rate": 2.0230146011396012e-05, "loss": 0.0158, "step": 80750 }, { "epoch": 0.5969663818337719, "grad_norm": 0.07321954518556595, "learning_rate": 2.0226436372269705e-05, "loss": 0.0158, "step": 80760 }, { "epoch": 0.5970403004050737, "grad_norm": 0.06690855324268341, "learning_rate": 2.02227267331434e-05, "loss": 0.0157, "step": 80770 }, { "epoch": 0.5971142189763756, "grad_norm": 0.08284644782543182, "learning_rate": 2.0219017094017097e-05, "loss": 0.0181, "step": 80780 }, { "epoch": 0.5971881375476775, "grad_norm": 0.09947702288627625, "learning_rate": 2.021530745489079e-05, "loss": 0.0176, "step": 80790 }, { "epoch": 0.5972620561189793, "grad_norm": 0.08260909467935562, "learning_rate": 2.0211597815764482e-05, "loss": 0.0158, "step": 80800 }, { "epoch": 0.5973359746902812, "grad_norm": 0.06769290566444397, "learning_rate": 2.0207888176638178e-05, "loss": 0.0172, "step": 80810 }, { "epoch": 0.597409893261583, "grad_norm": 0.08128570765256882, "learning_rate": 2.0204178537511874e-05, "loss": 0.0188, "step": 80820 }, { "epoch": 0.5974838118328849, "grad_norm": 0.0641016960144043, "learning_rate": 2.0200468898385566e-05, "loss": 0.0168, "step": 80830 }, { "epoch": 0.5975577304041867, "grad_norm": 0.06617254763841629, "learning_rate": 2.019675925925926e-05, "loss": 0.0169, "step": 80840 }, { "epoch": 0.5976316489754886, "grad_norm": 0.09616333246231079, "learning_rate": 2.0193049620132955e-05, "loss": 0.0212, "step": 80850 }, { "epoch": 0.5977055675467905, "grad_norm": 0.0783003717660904, "learning_rate": 2.018933998100665e-05, "loss": 0.0169, "step": 80860 }, { "epoch": 0.5977794861180923, "grad_norm": 0.07168202847242355, "learning_rate": 2.0185630341880344e-05, "loss": 0.0166, "step": 80870 }, { "epoch": 0.5978534046893942, "grad_norm": 0.11167970299720764, "learning_rate": 2.0181920702754036e-05, "loss": 0.0179, "step": 80880 }, { "epoch": 0.597927323260696, "grad_norm": 0.09261804819107056, "learning_rate": 2.0178211063627732e-05, "loss": 0.0178, "step": 80890 }, { "epoch": 0.5980012418319979, "grad_norm": 0.0917130634188652, "learning_rate": 2.0174501424501425e-05, "loss": 0.0193, "step": 80900 }, { "epoch": 0.5980751604032998, "grad_norm": 0.07518907636404037, "learning_rate": 2.017079178537512e-05, "loss": 0.0171, "step": 80910 }, { "epoch": 0.5981490789746016, "grad_norm": 0.07371152192354202, "learning_rate": 2.0167082146248813e-05, "loss": 0.0183, "step": 80920 }, { "epoch": 0.5982229975459035, "grad_norm": 0.07565153390169144, "learning_rate": 2.016337250712251e-05, "loss": 0.0193, "step": 80930 }, { "epoch": 0.5982969161172053, "grad_norm": 0.07934678345918655, "learning_rate": 2.0159662867996202e-05, "loss": 0.0195, "step": 80940 }, { "epoch": 0.5983708346885072, "grad_norm": 0.08630355447530746, "learning_rate": 2.0155953228869898e-05, "loss": 0.0166, "step": 80950 }, { "epoch": 0.598444753259809, "grad_norm": 0.08241811394691467, "learning_rate": 2.015224358974359e-05, "loss": 0.0174, "step": 80960 }, { "epoch": 0.5985186718311108, "grad_norm": 0.07798541337251663, "learning_rate": 2.0148533950617286e-05, "loss": 0.0199, "step": 80970 }, { "epoch": 0.5985925904024127, "grad_norm": 0.11176567524671555, "learning_rate": 2.014482431149098e-05, "loss": 0.0189, "step": 80980 }, { "epoch": 0.5986665089737145, "grad_norm": 0.11033938825130463, "learning_rate": 2.014111467236467e-05, "loss": 0.02, "step": 80990 }, { "epoch": 0.5987404275450164, "grad_norm": 0.07284007966518402, "learning_rate": 2.0137405033238367e-05, "loss": 0.0167, "step": 81000 }, { "epoch": 0.5988143461163182, "grad_norm": 0.05665259063243866, "learning_rate": 2.0133695394112063e-05, "loss": 0.0176, "step": 81010 }, { "epoch": 0.5988882646876201, "grad_norm": 0.09139882773160934, "learning_rate": 2.0129985754985756e-05, "loss": 0.0169, "step": 81020 }, { "epoch": 0.5989621832589219, "grad_norm": 0.08543331921100616, "learning_rate": 2.012627611585945e-05, "loss": 0.0168, "step": 81030 }, { "epoch": 0.5990361018302238, "grad_norm": 0.0778161883354187, "learning_rate": 2.0122566476733144e-05, "loss": 0.0171, "step": 81040 }, { "epoch": 0.5991100204015257, "grad_norm": 0.07842028886079788, "learning_rate": 2.011885683760684e-05, "loss": 0.0155, "step": 81050 }, { "epoch": 0.5991839389728275, "grad_norm": 0.06531845778226852, "learning_rate": 2.0115147198480533e-05, "loss": 0.0187, "step": 81060 }, { "epoch": 0.5992578575441294, "grad_norm": 0.07770530134439468, "learning_rate": 2.0111437559354226e-05, "loss": 0.0165, "step": 81070 }, { "epoch": 0.5993317761154312, "grad_norm": 0.0701184794306755, "learning_rate": 2.010772792022792e-05, "loss": 0.0158, "step": 81080 }, { "epoch": 0.5994056946867331, "grad_norm": 0.07867050170898438, "learning_rate": 2.0104018281101618e-05, "loss": 0.0166, "step": 81090 }, { "epoch": 0.5994796132580349, "grad_norm": 0.07159710675477982, "learning_rate": 2.010030864197531e-05, "loss": 0.0184, "step": 81100 }, { "epoch": 0.5995535318293368, "grad_norm": 0.08176423609256744, "learning_rate": 2.0096599002849003e-05, "loss": 0.0153, "step": 81110 }, { "epoch": 0.5996274504006387, "grad_norm": 0.05352728068828583, "learning_rate": 2.00928893637227e-05, "loss": 0.0189, "step": 81120 }, { "epoch": 0.5997013689719405, "grad_norm": 0.08773167431354523, "learning_rate": 2.008917972459639e-05, "loss": 0.0181, "step": 81130 }, { "epoch": 0.5997752875432424, "grad_norm": 0.12341810762882233, "learning_rate": 2.0085470085470087e-05, "loss": 0.0191, "step": 81140 }, { "epoch": 0.5998492061145442, "grad_norm": 0.08232131600379944, "learning_rate": 2.008176044634378e-05, "loss": 0.0169, "step": 81150 }, { "epoch": 0.5999231246858461, "grad_norm": 0.1366821676492691, "learning_rate": 2.0078050807217476e-05, "loss": 0.0171, "step": 81160 }, { "epoch": 0.599997043257148, "grad_norm": 0.07837974280118942, "learning_rate": 2.007434116809117e-05, "loss": 0.0184, "step": 81170 }, { "epoch": 0.6000709618284498, "grad_norm": 0.08063607662916183, "learning_rate": 2.0070631528964864e-05, "loss": 0.0156, "step": 81180 }, { "epoch": 0.6001448803997517, "grad_norm": 0.07672372460365295, "learning_rate": 2.0066921889838557e-05, "loss": 0.0197, "step": 81190 }, { "epoch": 0.6002187989710535, "grad_norm": 0.1100495308637619, "learning_rate": 2.0063212250712253e-05, "loss": 0.0216, "step": 81200 }, { "epoch": 0.6002927175423554, "grad_norm": 0.09978067874908447, "learning_rate": 2.0059502611585945e-05, "loss": 0.0186, "step": 81210 }, { "epoch": 0.6003666361136571, "grad_norm": 0.07662851363420486, "learning_rate": 2.0055792972459638e-05, "loss": 0.0163, "step": 81220 }, { "epoch": 0.600440554684959, "grad_norm": 0.08676115423440933, "learning_rate": 2.0052083333333334e-05, "loss": 0.0169, "step": 81230 }, { "epoch": 0.600514473256261, "grad_norm": 0.07154218852519989, "learning_rate": 2.004837369420703e-05, "loss": 0.0177, "step": 81240 }, { "epoch": 0.6005883918275627, "grad_norm": 0.0777452364563942, "learning_rate": 2.0044664055080723e-05, "loss": 0.0192, "step": 81250 }, { "epoch": 0.6006623103988646, "grad_norm": 0.07932627201080322, "learning_rate": 2.0040954415954415e-05, "loss": 0.0174, "step": 81260 }, { "epoch": 0.6007362289701664, "grad_norm": 0.07561071217060089, "learning_rate": 2.003724477682811e-05, "loss": 0.0159, "step": 81270 }, { "epoch": 0.6008101475414683, "grad_norm": 0.07187164574861526, "learning_rate": 2.0033535137701807e-05, "loss": 0.0173, "step": 81280 }, { "epoch": 0.6008840661127701, "grad_norm": 0.08690813928842545, "learning_rate": 2.00298254985755e-05, "loss": 0.0184, "step": 81290 }, { "epoch": 0.600957984684072, "grad_norm": 0.06971681118011475, "learning_rate": 2.0026115859449192e-05, "loss": 0.0172, "step": 81300 }, { "epoch": 0.6010319032553739, "grad_norm": 0.06447634100914001, "learning_rate": 2.0022406220322888e-05, "loss": 0.0185, "step": 81310 }, { "epoch": 0.6011058218266757, "grad_norm": 0.09539581835269928, "learning_rate": 2.0018696581196584e-05, "loss": 0.018, "step": 81320 }, { "epoch": 0.6011797403979776, "grad_norm": 0.05732736736536026, "learning_rate": 2.0014986942070277e-05, "loss": 0.0163, "step": 81330 }, { "epoch": 0.6012536589692794, "grad_norm": 0.059371933341026306, "learning_rate": 2.001127730294397e-05, "loss": 0.0172, "step": 81340 }, { "epoch": 0.6013275775405813, "grad_norm": 0.20023475587368011, "learning_rate": 2.0007567663817665e-05, "loss": 0.0197, "step": 81350 }, { "epoch": 0.6014014961118832, "grad_norm": 0.09249341487884521, "learning_rate": 2.0003858024691358e-05, "loss": 0.0175, "step": 81360 }, { "epoch": 0.601475414683185, "grad_norm": 0.07810152322053909, "learning_rate": 2.0000148385565054e-05, "loss": 0.018, "step": 81370 }, { "epoch": 0.6015493332544869, "grad_norm": 0.08528342097997665, "learning_rate": 1.9996438746438746e-05, "loss": 0.0163, "step": 81380 }, { "epoch": 0.6016232518257887, "grad_norm": 0.05830449238419533, "learning_rate": 1.9992729107312442e-05, "loss": 0.0185, "step": 81390 }, { "epoch": 0.6016971703970906, "grad_norm": 0.06526520848274231, "learning_rate": 1.9989019468186135e-05, "loss": 0.0175, "step": 81400 }, { "epoch": 0.6017710889683924, "grad_norm": 0.06398095190525055, "learning_rate": 1.998530982905983e-05, "loss": 0.016, "step": 81410 }, { "epoch": 0.6018450075396943, "grad_norm": 0.05629360303282738, "learning_rate": 1.9981600189933523e-05, "loss": 0.015, "step": 81420 }, { "epoch": 0.6019189261109962, "grad_norm": 0.0880153700709343, "learning_rate": 1.997789055080722e-05, "loss": 0.0182, "step": 81430 }, { "epoch": 0.601992844682298, "grad_norm": 0.10312674194574356, "learning_rate": 1.9974180911680912e-05, "loss": 0.0163, "step": 81440 }, { "epoch": 0.6020667632535999, "grad_norm": 0.06881557404994965, "learning_rate": 1.9970471272554605e-05, "loss": 0.0178, "step": 81450 }, { "epoch": 0.6021406818249017, "grad_norm": 0.07419392466545105, "learning_rate": 1.99667616334283e-05, "loss": 0.0149, "step": 81460 }, { "epoch": 0.6022146003962036, "grad_norm": 0.08452188968658447, "learning_rate": 1.9963051994301997e-05, "loss": 0.0162, "step": 81470 }, { "epoch": 0.6022885189675053, "grad_norm": 0.08976190537214279, "learning_rate": 1.995934235517569e-05, "loss": 0.0179, "step": 81480 }, { "epoch": 0.6023624375388072, "grad_norm": 0.05598026141524315, "learning_rate": 1.995563271604938e-05, "loss": 0.0152, "step": 81490 }, { "epoch": 0.6024363561101092, "grad_norm": 0.08023150265216827, "learning_rate": 1.9951923076923078e-05, "loss": 0.0202, "step": 81500 }, { "epoch": 0.6025102746814109, "grad_norm": 0.08681504428386688, "learning_rate": 1.9948213437796774e-05, "loss": 0.0181, "step": 81510 }, { "epoch": 0.6025841932527128, "grad_norm": 0.1389748752117157, "learning_rate": 1.9944503798670466e-05, "loss": 0.0192, "step": 81520 }, { "epoch": 0.6026581118240146, "grad_norm": 0.11852090805768967, "learning_rate": 1.994079415954416e-05, "loss": 0.0175, "step": 81530 }, { "epoch": 0.6027320303953165, "grad_norm": 0.07455288618803024, "learning_rate": 1.9937084520417855e-05, "loss": 0.0178, "step": 81540 }, { "epoch": 0.6028059489666183, "grad_norm": 0.075650155544281, "learning_rate": 1.993337488129155e-05, "loss": 0.0186, "step": 81550 }, { "epoch": 0.6028798675379202, "grad_norm": 0.0989387258887291, "learning_rate": 1.9929665242165243e-05, "loss": 0.0185, "step": 81560 }, { "epoch": 0.6029537861092221, "grad_norm": 0.07722626626491547, "learning_rate": 1.9925955603038936e-05, "loss": 0.0168, "step": 81570 }, { "epoch": 0.6030277046805239, "grad_norm": 0.05953269824385643, "learning_rate": 1.9922245963912632e-05, "loss": 0.0174, "step": 81580 }, { "epoch": 0.6031016232518258, "grad_norm": 0.08750326186418533, "learning_rate": 1.9918536324786324e-05, "loss": 0.0176, "step": 81590 }, { "epoch": 0.6031755418231276, "grad_norm": 0.0637708455324173, "learning_rate": 1.991482668566002e-05, "loss": 0.0162, "step": 81600 }, { "epoch": 0.6032494603944295, "grad_norm": 0.07842204719781876, "learning_rate": 1.9911117046533713e-05, "loss": 0.0145, "step": 81610 }, { "epoch": 0.6033233789657314, "grad_norm": 0.07083271443843842, "learning_rate": 1.990740740740741e-05, "loss": 0.0179, "step": 81620 }, { "epoch": 0.6033972975370332, "grad_norm": 0.04399878531694412, "learning_rate": 1.99036977682811e-05, "loss": 0.0173, "step": 81630 }, { "epoch": 0.6034712161083351, "grad_norm": 0.08544055372476578, "learning_rate": 1.9899988129154797e-05, "loss": 0.0177, "step": 81640 }, { "epoch": 0.6035451346796369, "grad_norm": 0.0625818744301796, "learning_rate": 1.989627849002849e-05, "loss": 0.0171, "step": 81650 }, { "epoch": 0.6036190532509388, "grad_norm": 0.11756463348865509, "learning_rate": 1.9892568850902186e-05, "loss": 0.0199, "step": 81660 }, { "epoch": 0.6036929718222406, "grad_norm": 0.059727754443883896, "learning_rate": 1.988885921177588e-05, "loss": 0.0185, "step": 81670 }, { "epoch": 0.6037668903935425, "grad_norm": 0.06694986671209335, "learning_rate": 1.988514957264957e-05, "loss": 0.0164, "step": 81680 }, { "epoch": 0.6038408089648444, "grad_norm": 0.08270717412233353, "learning_rate": 1.9881439933523267e-05, "loss": 0.0175, "step": 81690 }, { "epoch": 0.6039147275361462, "grad_norm": 0.11556769162416458, "learning_rate": 1.9877730294396963e-05, "loss": 0.018, "step": 81700 }, { "epoch": 0.6039886461074481, "grad_norm": 0.06787298619747162, "learning_rate": 1.9874020655270656e-05, "loss": 0.0186, "step": 81710 }, { "epoch": 0.6040625646787499, "grad_norm": 0.09240677207708359, "learning_rate": 1.9870311016144348e-05, "loss": 0.0176, "step": 81720 }, { "epoch": 0.6041364832500518, "grad_norm": 0.0830911323428154, "learning_rate": 1.9866601377018044e-05, "loss": 0.0161, "step": 81730 }, { "epoch": 0.6042104018213535, "grad_norm": 0.08775690943002701, "learning_rate": 1.986289173789174e-05, "loss": 0.0197, "step": 81740 }, { "epoch": 0.6042843203926554, "grad_norm": 0.09807612001895905, "learning_rate": 1.9859182098765433e-05, "loss": 0.0191, "step": 81750 }, { "epoch": 0.6043582389639574, "grad_norm": 0.08120745420455933, "learning_rate": 1.9855472459639125e-05, "loss": 0.0173, "step": 81760 }, { "epoch": 0.6044321575352591, "grad_norm": 0.08104580640792847, "learning_rate": 1.985176282051282e-05, "loss": 0.0227, "step": 81770 }, { "epoch": 0.604506076106561, "grad_norm": 0.09069859981536865, "learning_rate": 1.9848053181386517e-05, "loss": 0.0202, "step": 81780 }, { "epoch": 0.6045799946778628, "grad_norm": 0.06171071156859398, "learning_rate": 1.984434354226021e-05, "loss": 0.0161, "step": 81790 }, { "epoch": 0.6046539132491647, "grad_norm": 0.09475076198577881, "learning_rate": 1.9840633903133902e-05, "loss": 0.0175, "step": 81800 }, { "epoch": 0.6047278318204665, "grad_norm": 0.07772573083639145, "learning_rate": 1.98369242640076e-05, "loss": 0.0176, "step": 81810 }, { "epoch": 0.6048017503917684, "grad_norm": 0.0816749632358551, "learning_rate": 1.983321462488129e-05, "loss": 0.0195, "step": 81820 }, { "epoch": 0.6048756689630703, "grad_norm": 0.06907261908054352, "learning_rate": 1.9829504985754987e-05, "loss": 0.0179, "step": 81830 }, { "epoch": 0.6049495875343721, "grad_norm": 0.07266615331172943, "learning_rate": 1.982579534662868e-05, "loss": 0.0176, "step": 81840 }, { "epoch": 0.605023506105674, "grad_norm": 0.1005522757768631, "learning_rate": 1.9822085707502376e-05, "loss": 0.0167, "step": 81850 }, { "epoch": 0.6050974246769758, "grad_norm": 0.07866062223911285, "learning_rate": 1.9818376068376068e-05, "loss": 0.017, "step": 81860 }, { "epoch": 0.6051713432482777, "grad_norm": 0.08237247169017792, "learning_rate": 1.9814666429249764e-05, "loss": 0.0163, "step": 81870 }, { "epoch": 0.6052452618195796, "grad_norm": 0.06381018459796906, "learning_rate": 1.9810956790123457e-05, "loss": 0.0175, "step": 81880 }, { "epoch": 0.6053191803908814, "grad_norm": 0.09133058041334152, "learning_rate": 1.9807247150997153e-05, "loss": 0.0166, "step": 81890 }, { "epoch": 0.6053930989621833, "grad_norm": 0.09503191709518433, "learning_rate": 1.9803537511870845e-05, "loss": 0.0172, "step": 81900 }, { "epoch": 0.6054670175334851, "grad_norm": 0.09173522889614105, "learning_rate": 1.9799827872744538e-05, "loss": 0.0185, "step": 81910 }, { "epoch": 0.605540936104787, "grad_norm": 0.10468512773513794, "learning_rate": 1.9796118233618237e-05, "loss": 0.0172, "step": 81920 }, { "epoch": 0.6056148546760888, "grad_norm": 0.06426660716533661, "learning_rate": 1.979240859449193e-05, "loss": 0.0174, "step": 81930 }, { "epoch": 0.6056887732473907, "grad_norm": 0.1315983235836029, "learning_rate": 1.9788698955365622e-05, "loss": 0.0179, "step": 81940 }, { "epoch": 0.6057626918186926, "grad_norm": 0.09482461959123611, "learning_rate": 1.9784989316239315e-05, "loss": 0.0173, "step": 81950 }, { "epoch": 0.6058366103899944, "grad_norm": 0.07064099609851837, "learning_rate": 1.9781279677113014e-05, "loss": 0.0167, "step": 81960 }, { "epoch": 0.6059105289612963, "grad_norm": 0.17217977344989777, "learning_rate": 1.9777570037986707e-05, "loss": 0.0179, "step": 81970 }, { "epoch": 0.6059844475325981, "grad_norm": 0.0605187863111496, "learning_rate": 1.97738603988604e-05, "loss": 0.0205, "step": 81980 }, { "epoch": 0.6060583661039, "grad_norm": 0.1200895830988884, "learning_rate": 1.9770150759734092e-05, "loss": 0.0159, "step": 81990 }, { "epoch": 0.6061322846752017, "grad_norm": 0.08941507339477539, "learning_rate": 1.9766441120607788e-05, "loss": 0.0158, "step": 82000 }, { "epoch": 0.6062062032465036, "grad_norm": 0.0896550789475441, "learning_rate": 1.9762731481481484e-05, "loss": 0.0172, "step": 82010 }, { "epoch": 0.6062801218178056, "grad_norm": 0.06692972034215927, "learning_rate": 1.9759021842355176e-05, "loss": 0.0176, "step": 82020 }, { "epoch": 0.6063540403891073, "grad_norm": 0.05622977018356323, "learning_rate": 1.975531220322887e-05, "loss": 0.0176, "step": 82030 }, { "epoch": 0.6064279589604092, "grad_norm": 0.10331122577190399, "learning_rate": 1.9751602564102565e-05, "loss": 0.0181, "step": 82040 }, { "epoch": 0.606501877531711, "grad_norm": 0.09501251578330994, "learning_rate": 1.9747892924976258e-05, "loss": 0.0185, "step": 82050 }, { "epoch": 0.6065757961030129, "grad_norm": 0.059741511940956116, "learning_rate": 1.9744183285849954e-05, "loss": 0.0148, "step": 82060 }, { "epoch": 0.6066497146743147, "grad_norm": 0.06147574260830879, "learning_rate": 1.974047364672365e-05, "loss": 0.0169, "step": 82070 }, { "epoch": 0.6067236332456166, "grad_norm": 0.08298580348491669, "learning_rate": 1.9736764007597342e-05, "loss": 0.0203, "step": 82080 }, { "epoch": 0.6067975518169185, "grad_norm": 0.07622501999139786, "learning_rate": 1.9733054368471035e-05, "loss": 0.0177, "step": 82090 }, { "epoch": 0.6068714703882203, "grad_norm": 0.07855395972728729, "learning_rate": 1.972934472934473e-05, "loss": 0.0165, "step": 82100 }, { "epoch": 0.6069453889595222, "grad_norm": 0.07775694131851196, "learning_rate": 1.9725635090218427e-05, "loss": 0.0181, "step": 82110 }, { "epoch": 0.607019307530824, "grad_norm": 0.07085943222045898, "learning_rate": 1.972192545109212e-05, "loss": 0.018, "step": 82120 }, { "epoch": 0.6070932261021259, "grad_norm": 0.08171843737363815, "learning_rate": 1.9718215811965812e-05, "loss": 0.0186, "step": 82130 }, { "epoch": 0.6071671446734278, "grad_norm": 0.09936392307281494, "learning_rate": 1.9714506172839504e-05, "loss": 0.0185, "step": 82140 }, { "epoch": 0.6072410632447296, "grad_norm": 0.07809259742498398, "learning_rate": 1.9710796533713204e-05, "loss": 0.0189, "step": 82150 }, { "epoch": 0.6073149818160315, "grad_norm": 0.10185842961072922, "learning_rate": 1.9707086894586896e-05, "loss": 0.0184, "step": 82160 }, { "epoch": 0.6073889003873333, "grad_norm": 0.08506341278553009, "learning_rate": 1.970337725546059e-05, "loss": 0.0211, "step": 82170 }, { "epoch": 0.6074628189586352, "grad_norm": 0.0753314197063446, "learning_rate": 1.969966761633428e-05, "loss": 0.0185, "step": 82180 }, { "epoch": 0.607536737529937, "grad_norm": 0.08406726270914078, "learning_rate": 1.969595797720798e-05, "loss": 0.0173, "step": 82190 }, { "epoch": 0.6076106561012389, "grad_norm": 0.07717745751142502, "learning_rate": 1.9692248338081673e-05, "loss": 0.0162, "step": 82200 }, { "epoch": 0.6076845746725408, "grad_norm": 0.07151953876018524, "learning_rate": 1.9688538698955366e-05, "loss": 0.0198, "step": 82210 }, { "epoch": 0.6077584932438426, "grad_norm": 0.07603351771831512, "learning_rate": 1.9684829059829062e-05, "loss": 0.0162, "step": 82220 }, { "epoch": 0.6078324118151445, "grad_norm": 0.0788840651512146, "learning_rate": 1.9681119420702754e-05, "loss": 0.016, "step": 82230 }, { "epoch": 0.6079063303864463, "grad_norm": 0.08803824335336685, "learning_rate": 1.967740978157645e-05, "loss": 0.0178, "step": 82240 }, { "epoch": 0.6079802489577482, "grad_norm": 0.09685607254505157, "learning_rate": 1.9673700142450143e-05, "loss": 0.0173, "step": 82250 }, { "epoch": 0.60805416752905, "grad_norm": 0.067377969622612, "learning_rate": 1.966999050332384e-05, "loss": 0.0194, "step": 82260 }, { "epoch": 0.6081280861003519, "grad_norm": 0.09533512592315674, "learning_rate": 1.966628086419753e-05, "loss": 0.0164, "step": 82270 }, { "epoch": 0.6082020046716538, "grad_norm": 0.08822904527187347, "learning_rate": 1.9662571225071228e-05, "loss": 0.0172, "step": 82280 }, { "epoch": 0.6082759232429555, "grad_norm": 0.06911545246839523, "learning_rate": 1.965886158594492e-05, "loss": 0.0183, "step": 82290 }, { "epoch": 0.6083498418142574, "grad_norm": 0.08410735428333282, "learning_rate": 1.9655151946818616e-05, "loss": 0.0181, "step": 82300 }, { "epoch": 0.6084237603855592, "grad_norm": 0.07128669321537018, "learning_rate": 1.965144230769231e-05, "loss": 0.016, "step": 82310 }, { "epoch": 0.6084976789568611, "grad_norm": 0.07649806886911392, "learning_rate": 1.9647732668566e-05, "loss": 0.0189, "step": 82320 }, { "epoch": 0.6085715975281629, "grad_norm": 0.07032273709774017, "learning_rate": 1.9644023029439697e-05, "loss": 0.018, "step": 82330 }, { "epoch": 0.6086455160994648, "grad_norm": 0.09526366740465164, "learning_rate": 1.9640313390313393e-05, "loss": 0.0182, "step": 82340 }, { "epoch": 0.6087194346707667, "grad_norm": 0.0940188616514206, "learning_rate": 1.9636603751187086e-05, "loss": 0.0171, "step": 82350 }, { "epoch": 0.6087933532420685, "grad_norm": 0.07495230436325073, "learning_rate": 1.963289411206078e-05, "loss": 0.0169, "step": 82360 }, { "epoch": 0.6088672718133704, "grad_norm": 0.09661861509084702, "learning_rate": 1.962918447293447e-05, "loss": 0.0172, "step": 82370 }, { "epoch": 0.6089411903846722, "grad_norm": 0.08764498680830002, "learning_rate": 1.962547483380817e-05, "loss": 0.0155, "step": 82380 }, { "epoch": 0.6090151089559741, "grad_norm": 0.06950998306274414, "learning_rate": 1.9621765194681863e-05, "loss": 0.0166, "step": 82390 }, { "epoch": 0.609089027527276, "grad_norm": 0.040111687034368515, "learning_rate": 1.9618055555555555e-05, "loss": 0.0175, "step": 82400 }, { "epoch": 0.6091629460985778, "grad_norm": 0.07911848276853561, "learning_rate": 1.961434591642925e-05, "loss": 0.0195, "step": 82410 }, { "epoch": 0.6092368646698797, "grad_norm": 0.07476948946714401, "learning_rate": 1.9610636277302947e-05, "loss": 0.0178, "step": 82420 }, { "epoch": 0.6093107832411815, "grad_norm": 0.1279289871454239, "learning_rate": 1.960692663817664e-05, "loss": 0.0174, "step": 82430 }, { "epoch": 0.6093847018124834, "grad_norm": 0.0703182965517044, "learning_rate": 1.9603216999050333e-05, "loss": 0.0177, "step": 82440 }, { "epoch": 0.6094586203837852, "grad_norm": 0.0812310054898262, "learning_rate": 1.959950735992403e-05, "loss": 0.018, "step": 82450 }, { "epoch": 0.6095325389550871, "grad_norm": 0.09021375328302383, "learning_rate": 1.959579772079772e-05, "loss": 0.0157, "step": 82460 }, { "epoch": 0.609606457526389, "grad_norm": 0.09041917324066162, "learning_rate": 1.9592088081671417e-05, "loss": 0.0159, "step": 82470 }, { "epoch": 0.6096803760976908, "grad_norm": 0.08076930791139603, "learning_rate": 1.958837844254511e-05, "loss": 0.0164, "step": 82480 }, { "epoch": 0.6097542946689927, "grad_norm": 0.08362049609422684, "learning_rate": 1.9584668803418806e-05, "loss": 0.0196, "step": 82490 }, { "epoch": 0.6098282132402945, "grad_norm": 0.09628446400165558, "learning_rate": 1.9580959164292498e-05, "loss": 0.0177, "step": 82500 }, { "epoch": 0.6099021318115964, "grad_norm": 0.07797731459140778, "learning_rate": 1.9577249525166194e-05, "loss": 0.0202, "step": 82510 }, { "epoch": 0.6099760503828981, "grad_norm": 0.08127598464488983, "learning_rate": 1.9573539886039887e-05, "loss": 0.0198, "step": 82520 }, { "epoch": 0.6100499689542, "grad_norm": 0.060320500284433365, "learning_rate": 1.9569830246913583e-05, "loss": 0.0173, "step": 82530 }, { "epoch": 0.610123887525502, "grad_norm": 0.05279785394668579, "learning_rate": 1.9566120607787275e-05, "loss": 0.0152, "step": 82540 }, { "epoch": 0.6101978060968037, "grad_norm": 0.06562897562980652, "learning_rate": 1.9562410968660968e-05, "loss": 0.0186, "step": 82550 }, { "epoch": 0.6102717246681056, "grad_norm": 0.08916452527046204, "learning_rate": 1.9558701329534664e-05, "loss": 0.0177, "step": 82560 }, { "epoch": 0.6103456432394074, "grad_norm": 0.05119860917329788, "learning_rate": 1.955499169040836e-05, "loss": 0.017, "step": 82570 }, { "epoch": 0.6104195618107093, "grad_norm": 0.07321562618017197, "learning_rate": 1.9551282051282052e-05, "loss": 0.0183, "step": 82580 }, { "epoch": 0.6104934803820111, "grad_norm": 0.06944682449102402, "learning_rate": 1.9547572412155745e-05, "loss": 0.0194, "step": 82590 }, { "epoch": 0.610567398953313, "grad_norm": 0.09687232226133347, "learning_rate": 1.954386277302944e-05, "loss": 0.0169, "step": 82600 }, { "epoch": 0.6106413175246149, "grad_norm": 0.07312414050102234, "learning_rate": 1.9540153133903137e-05, "loss": 0.018, "step": 82610 }, { "epoch": 0.6107152360959167, "grad_norm": 0.07167432457208633, "learning_rate": 1.953644349477683e-05, "loss": 0.0202, "step": 82620 }, { "epoch": 0.6107891546672186, "grad_norm": 0.07102657854557037, "learning_rate": 1.9532733855650522e-05, "loss": 0.0179, "step": 82630 }, { "epoch": 0.6108630732385204, "grad_norm": 0.07418309897184372, "learning_rate": 1.9529024216524218e-05, "loss": 0.0152, "step": 82640 }, { "epoch": 0.6109369918098223, "grad_norm": 0.0991952046751976, "learning_rate": 1.9525314577397914e-05, "loss": 0.0188, "step": 82650 }, { "epoch": 0.6110109103811242, "grad_norm": 0.0658475011587143, "learning_rate": 1.9521604938271607e-05, "loss": 0.0181, "step": 82660 }, { "epoch": 0.611084828952426, "grad_norm": 0.08346283435821533, "learning_rate": 1.95178952991453e-05, "loss": 0.0188, "step": 82670 }, { "epoch": 0.6111587475237279, "grad_norm": 0.08464224636554718, "learning_rate": 1.9514185660018995e-05, "loss": 0.0164, "step": 82680 }, { "epoch": 0.6112326660950297, "grad_norm": 0.06308498233556747, "learning_rate": 1.9510476020892688e-05, "loss": 0.0167, "step": 82690 }, { "epoch": 0.6113065846663316, "grad_norm": 0.06456910073757172, "learning_rate": 1.9506766381766384e-05, "loss": 0.0171, "step": 82700 }, { "epoch": 0.6113805032376334, "grad_norm": 0.06762517243623734, "learning_rate": 1.9503056742640076e-05, "loss": 0.018, "step": 82710 }, { "epoch": 0.6114544218089353, "grad_norm": 0.09544017910957336, "learning_rate": 1.9499347103513772e-05, "loss": 0.0149, "step": 82720 }, { "epoch": 0.6115283403802372, "grad_norm": 0.07939791679382324, "learning_rate": 1.9495637464387465e-05, "loss": 0.0182, "step": 82730 }, { "epoch": 0.611602258951539, "grad_norm": 0.07914235442876816, "learning_rate": 1.949192782526116e-05, "loss": 0.018, "step": 82740 }, { "epoch": 0.6116761775228409, "grad_norm": 0.09582500159740448, "learning_rate": 1.9488218186134853e-05, "loss": 0.0168, "step": 82750 }, { "epoch": 0.6117500960941427, "grad_norm": 0.07404599338769913, "learning_rate": 1.948450854700855e-05, "loss": 0.0198, "step": 82760 }, { "epoch": 0.6118240146654446, "grad_norm": 0.07443395256996155, "learning_rate": 1.9480798907882242e-05, "loss": 0.0166, "step": 82770 }, { "epoch": 0.6118979332367463, "grad_norm": 0.07942095398902893, "learning_rate": 1.9477089268755934e-05, "loss": 0.0174, "step": 82780 }, { "epoch": 0.6119718518080483, "grad_norm": 0.07103811949491501, "learning_rate": 1.947337962962963e-05, "loss": 0.0171, "step": 82790 }, { "epoch": 0.6120457703793502, "grad_norm": 0.09235479682683945, "learning_rate": 1.9469669990503326e-05, "loss": 0.0196, "step": 82800 }, { "epoch": 0.6121196889506519, "grad_norm": 0.08370334655046463, "learning_rate": 1.946596035137702e-05, "loss": 0.0175, "step": 82810 }, { "epoch": 0.6121936075219538, "grad_norm": 0.09434487670660019, "learning_rate": 1.946225071225071e-05, "loss": 0.0174, "step": 82820 }, { "epoch": 0.6122675260932556, "grad_norm": 0.0804063156247139, "learning_rate": 1.9458541073124407e-05, "loss": 0.0175, "step": 82830 }, { "epoch": 0.6123414446645575, "grad_norm": 0.08051107078790665, "learning_rate": 1.9454831433998103e-05, "loss": 0.0167, "step": 82840 }, { "epoch": 0.6124153632358593, "grad_norm": 0.09527001529932022, "learning_rate": 1.9451121794871796e-05, "loss": 0.018, "step": 82850 }, { "epoch": 0.6124892818071612, "grad_norm": 0.09901162981987, "learning_rate": 1.944741215574549e-05, "loss": 0.0182, "step": 82860 }, { "epoch": 0.6125632003784631, "grad_norm": 0.11223585158586502, "learning_rate": 1.9443702516619185e-05, "loss": 0.0213, "step": 82870 }, { "epoch": 0.6126371189497649, "grad_norm": 0.10098253935575485, "learning_rate": 1.943999287749288e-05, "loss": 0.0188, "step": 82880 }, { "epoch": 0.6127110375210668, "grad_norm": 0.126252681016922, "learning_rate": 1.9436283238366573e-05, "loss": 0.0171, "step": 82890 }, { "epoch": 0.6127849560923686, "grad_norm": 0.08134482800960541, "learning_rate": 1.9432573599240266e-05, "loss": 0.0178, "step": 82900 }, { "epoch": 0.6128588746636705, "grad_norm": 0.06653578579425812, "learning_rate": 1.942886396011396e-05, "loss": 0.0183, "step": 82910 }, { "epoch": 0.6129327932349724, "grad_norm": 0.05563788861036301, "learning_rate": 1.9425154320987654e-05, "loss": 0.0182, "step": 82920 }, { "epoch": 0.6130067118062742, "grad_norm": 0.07811429351568222, "learning_rate": 1.942144468186135e-05, "loss": 0.0154, "step": 82930 }, { "epoch": 0.6130806303775761, "grad_norm": 0.0799199566245079, "learning_rate": 1.9417735042735043e-05, "loss": 0.02, "step": 82940 }, { "epoch": 0.6131545489488779, "grad_norm": 0.08699478954076767, "learning_rate": 1.941402540360874e-05, "loss": 0.0184, "step": 82950 }, { "epoch": 0.6132284675201798, "grad_norm": 0.08317042887210846, "learning_rate": 1.941031576448243e-05, "loss": 0.0163, "step": 82960 }, { "epoch": 0.6133023860914816, "grad_norm": 0.08908155560493469, "learning_rate": 1.9406606125356127e-05, "loss": 0.017, "step": 82970 }, { "epoch": 0.6133763046627835, "grad_norm": 0.0903390496969223, "learning_rate": 1.940289648622982e-05, "loss": 0.0178, "step": 82980 }, { "epoch": 0.6134502232340854, "grad_norm": 0.0837148055434227, "learning_rate": 1.9399186847103516e-05, "loss": 0.0186, "step": 82990 }, { "epoch": 0.6135241418053872, "grad_norm": 0.07399854809045792, "learning_rate": 1.939547720797721e-05, "loss": 0.0176, "step": 83000 }, { "epoch": 0.6135980603766891, "grad_norm": 0.05894295498728752, "learning_rate": 1.93917675688509e-05, "loss": 0.0183, "step": 83010 }, { "epoch": 0.6136719789479909, "grad_norm": 0.09080424159765244, "learning_rate": 1.9388057929724597e-05, "loss": 0.0159, "step": 83020 }, { "epoch": 0.6137458975192928, "grad_norm": 0.06074220687150955, "learning_rate": 1.9384348290598293e-05, "loss": 0.0194, "step": 83030 }, { "epoch": 0.6138198160905945, "grad_norm": 0.07715459167957306, "learning_rate": 1.9380638651471986e-05, "loss": 0.0177, "step": 83040 }, { "epoch": 0.6138937346618965, "grad_norm": 0.1121053695678711, "learning_rate": 1.9376929012345678e-05, "loss": 0.0184, "step": 83050 }, { "epoch": 0.6139676532331984, "grad_norm": 0.07647407799959183, "learning_rate": 1.9373219373219374e-05, "loss": 0.0174, "step": 83060 }, { "epoch": 0.6140415718045001, "grad_norm": 0.08576580882072449, "learning_rate": 1.936950973409307e-05, "loss": 0.0185, "step": 83070 }, { "epoch": 0.614115490375802, "grad_norm": 0.06949552893638611, "learning_rate": 1.9365800094966763e-05, "loss": 0.0175, "step": 83080 }, { "epoch": 0.6141894089471038, "grad_norm": 0.05394889414310455, "learning_rate": 1.9362090455840455e-05, "loss": 0.016, "step": 83090 }, { "epoch": 0.6142633275184057, "grad_norm": 0.1143985167145729, "learning_rate": 1.935838081671415e-05, "loss": 0.0194, "step": 83100 }, { "epoch": 0.6143372460897075, "grad_norm": 0.07003039121627808, "learning_rate": 1.9354671177587847e-05, "loss": 0.0182, "step": 83110 }, { "epoch": 0.6144111646610094, "grad_norm": 0.0878969207406044, "learning_rate": 1.935096153846154e-05, "loss": 0.0195, "step": 83120 }, { "epoch": 0.6144850832323113, "grad_norm": 0.062459979206323624, "learning_rate": 1.9347251899335232e-05, "loss": 0.0188, "step": 83130 }, { "epoch": 0.6145590018036131, "grad_norm": 0.0716971904039383, "learning_rate": 1.9343542260208928e-05, "loss": 0.0168, "step": 83140 }, { "epoch": 0.614632920374915, "grad_norm": 0.07524064928293228, "learning_rate": 1.933983262108262e-05, "loss": 0.0187, "step": 83150 }, { "epoch": 0.6147068389462168, "grad_norm": 0.08315975219011307, "learning_rate": 1.9336122981956317e-05, "loss": 0.0182, "step": 83160 }, { "epoch": 0.6147807575175187, "grad_norm": 0.07940053939819336, "learning_rate": 1.933241334283001e-05, "loss": 0.0162, "step": 83170 }, { "epoch": 0.6148546760888206, "grad_norm": 0.06539808958768845, "learning_rate": 1.9328703703703705e-05, "loss": 0.0167, "step": 83180 }, { "epoch": 0.6149285946601224, "grad_norm": 0.09697232395410538, "learning_rate": 1.9324994064577398e-05, "loss": 0.0189, "step": 83190 }, { "epoch": 0.6150025132314243, "grad_norm": 0.09977428615093231, "learning_rate": 1.9321284425451094e-05, "loss": 0.0189, "step": 83200 }, { "epoch": 0.6150764318027261, "grad_norm": 0.0542060025036335, "learning_rate": 1.9317574786324786e-05, "loss": 0.017, "step": 83210 }, { "epoch": 0.615150350374028, "grad_norm": 0.0710158422589302, "learning_rate": 1.9313865147198482e-05, "loss": 0.0178, "step": 83220 }, { "epoch": 0.6152242689453298, "grad_norm": 0.07952819764614105, "learning_rate": 1.9310155508072175e-05, "loss": 0.0179, "step": 83230 }, { "epoch": 0.6152981875166317, "grad_norm": 0.0634431466460228, "learning_rate": 1.9306445868945868e-05, "loss": 0.0177, "step": 83240 }, { "epoch": 0.6153721060879336, "grad_norm": 0.07674331963062286, "learning_rate": 1.9302736229819564e-05, "loss": 0.0183, "step": 83250 }, { "epoch": 0.6154460246592354, "grad_norm": 0.10375528037548065, "learning_rate": 1.929902659069326e-05, "loss": 0.0164, "step": 83260 }, { "epoch": 0.6155199432305373, "grad_norm": 0.06967686861753464, "learning_rate": 1.9295316951566952e-05, "loss": 0.02, "step": 83270 }, { "epoch": 0.6155938618018391, "grad_norm": 0.08993130922317505, "learning_rate": 1.9291607312440645e-05, "loss": 0.0188, "step": 83280 }, { "epoch": 0.615667780373141, "grad_norm": 0.06792949140071869, "learning_rate": 1.928789767331434e-05, "loss": 0.0176, "step": 83290 }, { "epoch": 0.6157416989444428, "grad_norm": 0.07722378522157669, "learning_rate": 1.9284188034188037e-05, "loss": 0.0169, "step": 83300 }, { "epoch": 0.6158156175157447, "grad_norm": 0.0698406994342804, "learning_rate": 1.928047839506173e-05, "loss": 0.0188, "step": 83310 }, { "epoch": 0.6158895360870466, "grad_norm": 0.07145354151725769, "learning_rate": 1.9276768755935422e-05, "loss": 0.017, "step": 83320 }, { "epoch": 0.6159634546583483, "grad_norm": 0.06953489035367966, "learning_rate": 1.9273059116809118e-05, "loss": 0.0156, "step": 83330 }, { "epoch": 0.6160373732296502, "grad_norm": 0.09008972346782684, "learning_rate": 1.9269349477682814e-05, "loss": 0.0169, "step": 83340 }, { "epoch": 0.616111291800952, "grad_norm": 0.07456735521554947, "learning_rate": 1.9265639838556506e-05, "loss": 0.0152, "step": 83350 }, { "epoch": 0.6161852103722539, "grad_norm": 0.07110676914453506, "learning_rate": 1.92619301994302e-05, "loss": 0.0196, "step": 83360 }, { "epoch": 0.6162591289435558, "grad_norm": 0.06726035475730896, "learning_rate": 1.9258220560303895e-05, "loss": 0.0178, "step": 83370 }, { "epoch": 0.6163330475148576, "grad_norm": 0.07207488268613815, "learning_rate": 1.9254510921177587e-05, "loss": 0.0184, "step": 83380 }, { "epoch": 0.6164069660861595, "grad_norm": 0.06204327568411827, "learning_rate": 1.9250801282051283e-05, "loss": 0.0159, "step": 83390 }, { "epoch": 0.6164808846574613, "grad_norm": 0.13019336760044098, "learning_rate": 1.9247091642924976e-05, "loss": 0.018, "step": 83400 }, { "epoch": 0.6165548032287632, "grad_norm": 0.08293792605400085, "learning_rate": 1.9243382003798672e-05, "loss": 0.0192, "step": 83410 }, { "epoch": 0.616628721800065, "grad_norm": 0.05020857974886894, "learning_rate": 1.9239672364672364e-05, "loss": 0.0156, "step": 83420 }, { "epoch": 0.6167026403713669, "grad_norm": 0.06632091850042343, "learning_rate": 1.923596272554606e-05, "loss": 0.0174, "step": 83430 }, { "epoch": 0.6167765589426688, "grad_norm": 0.11432168632745743, "learning_rate": 1.9232253086419756e-05, "loss": 0.0182, "step": 83440 }, { "epoch": 0.6168504775139706, "grad_norm": 0.08506050705909729, "learning_rate": 1.922854344729345e-05, "loss": 0.0181, "step": 83450 }, { "epoch": 0.6169243960852725, "grad_norm": 0.08166781812906265, "learning_rate": 1.922483380816714e-05, "loss": 0.0191, "step": 83460 }, { "epoch": 0.6169983146565743, "grad_norm": 0.06657709181308746, "learning_rate": 1.9221124169040834e-05, "loss": 0.0181, "step": 83470 }, { "epoch": 0.6170722332278762, "grad_norm": 0.06887469440698624, "learning_rate": 1.9217414529914534e-05, "loss": 0.0178, "step": 83480 }, { "epoch": 0.617146151799178, "grad_norm": 0.07855060696601868, "learning_rate": 1.9213704890788226e-05, "loss": 0.017, "step": 83490 }, { "epoch": 0.6172200703704799, "grad_norm": 0.0891873836517334, "learning_rate": 1.920999525166192e-05, "loss": 0.0179, "step": 83500 }, { "epoch": 0.6172939889417818, "grad_norm": 0.1001485213637352, "learning_rate": 1.920628561253561e-05, "loss": 0.0173, "step": 83510 }, { "epoch": 0.6173679075130836, "grad_norm": 0.0711141899228096, "learning_rate": 1.9202575973409307e-05, "loss": 0.0182, "step": 83520 }, { "epoch": 0.6174418260843855, "grad_norm": 0.07405520975589752, "learning_rate": 1.9198866334283003e-05, "loss": 0.0177, "step": 83530 }, { "epoch": 0.6175157446556873, "grad_norm": 0.06708526611328125, "learning_rate": 1.9195156695156696e-05, "loss": 0.0194, "step": 83540 }, { "epoch": 0.6175896632269892, "grad_norm": 0.09064970910549164, "learning_rate": 1.919144705603039e-05, "loss": 0.0197, "step": 83550 }, { "epoch": 0.617663581798291, "grad_norm": 0.07799121737480164, "learning_rate": 1.9187737416904084e-05, "loss": 0.0188, "step": 83560 }, { "epoch": 0.6177375003695929, "grad_norm": 0.049574632197618484, "learning_rate": 1.918402777777778e-05, "loss": 0.0173, "step": 83570 }, { "epoch": 0.6178114189408948, "grad_norm": 0.06921504437923431, "learning_rate": 1.9180318138651473e-05, "loss": 0.0188, "step": 83580 }, { "epoch": 0.6178853375121965, "grad_norm": 0.06067037582397461, "learning_rate": 1.917660849952517e-05, "loss": 0.0186, "step": 83590 }, { "epoch": 0.6179592560834984, "grad_norm": 0.09824355691671371, "learning_rate": 1.917289886039886e-05, "loss": 0.0183, "step": 83600 }, { "epoch": 0.6180331746548002, "grad_norm": 0.07331540435552597, "learning_rate": 1.9169189221272554e-05, "loss": 0.0155, "step": 83610 }, { "epoch": 0.6181070932261021, "grad_norm": 0.09476131200790405, "learning_rate": 1.916547958214625e-05, "loss": 0.0176, "step": 83620 }, { "epoch": 0.618181011797404, "grad_norm": 0.07724323868751526, "learning_rate": 1.9161769943019946e-05, "loss": 0.015, "step": 83630 }, { "epoch": 0.6182549303687058, "grad_norm": 0.08021007478237152, "learning_rate": 1.915806030389364e-05, "loss": 0.0177, "step": 83640 }, { "epoch": 0.6183288489400077, "grad_norm": 0.07071489095687866, "learning_rate": 1.915435066476733e-05, "loss": 0.0186, "step": 83650 }, { "epoch": 0.6184027675113095, "grad_norm": 0.10465199500322342, "learning_rate": 1.9150641025641027e-05, "loss": 0.0179, "step": 83660 }, { "epoch": 0.6184766860826114, "grad_norm": 0.09959176927804947, "learning_rate": 1.9146931386514723e-05, "loss": 0.0177, "step": 83670 }, { "epoch": 0.6185506046539132, "grad_norm": 0.0952177420258522, "learning_rate": 1.9143221747388416e-05, "loss": 0.0193, "step": 83680 }, { "epoch": 0.6186245232252151, "grad_norm": 0.08493653684854507, "learning_rate": 1.9139512108262108e-05, "loss": 0.0165, "step": 83690 }, { "epoch": 0.618698441796517, "grad_norm": 0.07825871556997299, "learning_rate": 1.91358024691358e-05, "loss": 0.0164, "step": 83700 }, { "epoch": 0.6187723603678188, "grad_norm": 0.09615447372198105, "learning_rate": 1.91320928300095e-05, "loss": 0.0186, "step": 83710 }, { "epoch": 0.6188462789391207, "grad_norm": 0.06349712610244751, "learning_rate": 1.9128383190883193e-05, "loss": 0.0145, "step": 83720 }, { "epoch": 0.6189201975104225, "grad_norm": 0.09628452360630035, "learning_rate": 1.9124673551756885e-05, "loss": 0.0161, "step": 83730 }, { "epoch": 0.6189941160817244, "grad_norm": 0.07538719475269318, "learning_rate": 1.9120963912630578e-05, "loss": 0.0181, "step": 83740 }, { "epoch": 0.6190680346530262, "grad_norm": 0.0743245929479599, "learning_rate": 1.9117254273504274e-05, "loss": 0.0189, "step": 83750 }, { "epoch": 0.6191419532243281, "grad_norm": 0.08122391253709793, "learning_rate": 1.911354463437797e-05, "loss": 0.0199, "step": 83760 }, { "epoch": 0.61921587179563, "grad_norm": 0.06733874976634979, "learning_rate": 1.9109834995251662e-05, "loss": 0.0168, "step": 83770 }, { "epoch": 0.6192897903669318, "grad_norm": 0.06824064254760742, "learning_rate": 1.910612535612536e-05, "loss": 0.0174, "step": 83780 }, { "epoch": 0.6193637089382337, "grad_norm": 0.06514638662338257, "learning_rate": 1.910241571699905e-05, "loss": 0.0176, "step": 83790 }, { "epoch": 0.6194376275095355, "grad_norm": 0.08393329381942749, "learning_rate": 1.9098706077872747e-05, "loss": 0.0172, "step": 83800 }, { "epoch": 0.6195115460808374, "grad_norm": 0.0601193904876709, "learning_rate": 1.909499643874644e-05, "loss": 0.0168, "step": 83810 }, { "epoch": 0.6195854646521392, "grad_norm": 0.09605354815721512, "learning_rate": 1.9091286799620135e-05, "loss": 0.019, "step": 83820 }, { "epoch": 0.619659383223441, "grad_norm": 0.08356858789920807, "learning_rate": 1.9087577160493828e-05, "loss": 0.0187, "step": 83830 }, { "epoch": 0.619733301794743, "grad_norm": 0.09216849505901337, "learning_rate": 1.908386752136752e-05, "loss": 0.018, "step": 83840 }, { "epoch": 0.6198072203660447, "grad_norm": 0.07293610274791718, "learning_rate": 1.9080157882241217e-05, "loss": 0.0164, "step": 83850 }, { "epoch": 0.6198811389373466, "grad_norm": 0.09722217172384262, "learning_rate": 1.9076448243114913e-05, "loss": 0.0199, "step": 83860 }, { "epoch": 0.6199550575086484, "grad_norm": 0.060842424631118774, "learning_rate": 1.9072738603988605e-05, "loss": 0.0167, "step": 83870 }, { "epoch": 0.6200289760799503, "grad_norm": 0.08243682980537415, "learning_rate": 1.9069028964862298e-05, "loss": 0.0173, "step": 83880 }, { "epoch": 0.6201028946512522, "grad_norm": 0.09843390434980392, "learning_rate": 1.9065319325735994e-05, "loss": 0.0192, "step": 83890 }, { "epoch": 0.620176813222554, "grad_norm": 0.0666450783610344, "learning_rate": 1.906160968660969e-05, "loss": 0.0184, "step": 83900 }, { "epoch": 0.6202507317938559, "grad_norm": 0.07498643547296524, "learning_rate": 1.9057900047483382e-05, "loss": 0.0173, "step": 83910 }, { "epoch": 0.6203246503651577, "grad_norm": 0.0821881964802742, "learning_rate": 1.9054190408357075e-05, "loss": 0.0172, "step": 83920 }, { "epoch": 0.6203985689364596, "grad_norm": 0.06200087442994118, "learning_rate": 1.905048076923077e-05, "loss": 0.0167, "step": 83930 }, { "epoch": 0.6204724875077614, "grad_norm": 0.08189740777015686, "learning_rate": 1.9046771130104467e-05, "loss": 0.0199, "step": 83940 }, { "epoch": 0.6205464060790633, "grad_norm": 0.08127987384796143, "learning_rate": 1.904306149097816e-05, "loss": 0.0188, "step": 83950 }, { "epoch": 0.6206203246503652, "grad_norm": 0.08441877365112305, "learning_rate": 1.9039351851851852e-05, "loss": 0.0167, "step": 83960 }, { "epoch": 0.620694243221667, "grad_norm": 0.04807879403233528, "learning_rate": 1.9035642212725548e-05, "loss": 0.0168, "step": 83970 }, { "epoch": 0.6207681617929689, "grad_norm": 0.07149063050746918, "learning_rate": 1.903193257359924e-05, "loss": 0.0181, "step": 83980 }, { "epoch": 0.6208420803642707, "grad_norm": 0.08318059891462326, "learning_rate": 1.9028222934472936e-05, "loss": 0.0182, "step": 83990 }, { "epoch": 0.6209159989355726, "grad_norm": 0.08298517763614655, "learning_rate": 1.902451329534663e-05, "loss": 0.0185, "step": 84000 }, { "epoch": 0.6209899175068744, "grad_norm": 0.06689772754907608, "learning_rate": 1.9020803656220325e-05, "loss": 0.0152, "step": 84010 }, { "epoch": 0.6210638360781763, "grad_norm": 0.1090644896030426, "learning_rate": 1.9017094017094017e-05, "loss": 0.0203, "step": 84020 }, { "epoch": 0.6211377546494782, "grad_norm": 0.09569858759641647, "learning_rate": 1.9013384377967713e-05, "loss": 0.0185, "step": 84030 }, { "epoch": 0.62121167322078, "grad_norm": 0.07898285239934921, "learning_rate": 1.9009674738841406e-05, "loss": 0.0158, "step": 84040 }, { "epoch": 0.6212855917920819, "grad_norm": 0.08022938668727875, "learning_rate": 1.9005965099715102e-05, "loss": 0.0172, "step": 84050 }, { "epoch": 0.6213595103633837, "grad_norm": 0.07045149058103561, "learning_rate": 1.9002255460588795e-05, "loss": 0.0182, "step": 84060 }, { "epoch": 0.6214334289346856, "grad_norm": 0.09920522570610046, "learning_rate": 1.8998545821462487e-05, "loss": 0.017, "step": 84070 }, { "epoch": 0.6215073475059874, "grad_norm": 0.10555408149957657, "learning_rate": 1.8994836182336183e-05, "loss": 0.0209, "step": 84080 }, { "epoch": 0.6215812660772893, "grad_norm": 0.06650474667549133, "learning_rate": 1.899112654320988e-05, "loss": 0.0176, "step": 84090 }, { "epoch": 0.6216551846485912, "grad_norm": 0.07660243660211563, "learning_rate": 1.898741690408357e-05, "loss": 0.0179, "step": 84100 }, { "epoch": 0.6217291032198929, "grad_norm": 0.07393896579742432, "learning_rate": 1.8983707264957264e-05, "loss": 0.0171, "step": 84110 }, { "epoch": 0.6218030217911948, "grad_norm": 0.09908831864595413, "learning_rate": 1.897999762583096e-05, "loss": 0.0198, "step": 84120 }, { "epoch": 0.6218769403624966, "grad_norm": 0.07033517956733704, "learning_rate": 1.8976287986704656e-05, "loss": 0.0192, "step": 84130 }, { "epoch": 0.6219508589337985, "grad_norm": 0.0860503613948822, "learning_rate": 1.897257834757835e-05, "loss": 0.0177, "step": 84140 }, { "epoch": 0.6220247775051004, "grad_norm": 0.10883115977048874, "learning_rate": 1.896886870845204e-05, "loss": 0.0158, "step": 84150 }, { "epoch": 0.6220986960764022, "grad_norm": 0.07544694095849991, "learning_rate": 1.8965159069325737e-05, "loss": 0.0195, "step": 84160 }, { "epoch": 0.6221726146477041, "grad_norm": 0.06770365685224533, "learning_rate": 1.8961449430199433e-05, "loss": 0.017, "step": 84170 }, { "epoch": 0.6222465332190059, "grad_norm": 0.09239034354686737, "learning_rate": 1.8957739791073126e-05, "loss": 0.0165, "step": 84180 }, { "epoch": 0.6223204517903078, "grad_norm": 0.0541006475687027, "learning_rate": 1.895403015194682e-05, "loss": 0.0172, "step": 84190 }, { "epoch": 0.6223943703616096, "grad_norm": 0.08181953430175781, "learning_rate": 1.8950320512820514e-05, "loss": 0.0184, "step": 84200 }, { "epoch": 0.6224682889329115, "grad_norm": 0.085248202085495, "learning_rate": 1.8946610873694207e-05, "loss": 0.0183, "step": 84210 }, { "epoch": 0.6225422075042134, "grad_norm": 0.07147057354450226, "learning_rate": 1.8942901234567903e-05, "loss": 0.0165, "step": 84220 }, { "epoch": 0.6226161260755152, "grad_norm": 0.08153613656759262, "learning_rate": 1.8939191595441596e-05, "loss": 0.0174, "step": 84230 }, { "epoch": 0.6226900446468171, "grad_norm": 0.07802841067314148, "learning_rate": 1.893548195631529e-05, "loss": 0.018, "step": 84240 }, { "epoch": 0.6227639632181189, "grad_norm": 0.08233258128166199, "learning_rate": 1.8931772317188984e-05, "loss": 0.0158, "step": 84250 }, { "epoch": 0.6228378817894208, "grad_norm": 0.08503064513206482, "learning_rate": 1.892806267806268e-05, "loss": 0.0135, "step": 84260 }, { "epoch": 0.6229118003607226, "grad_norm": 0.07782771438360214, "learning_rate": 1.8924353038936373e-05, "loss": 0.019, "step": 84270 }, { "epoch": 0.6229857189320245, "grad_norm": 0.07316979765892029, "learning_rate": 1.892064339981007e-05, "loss": 0.0154, "step": 84280 }, { "epoch": 0.6230596375033264, "grad_norm": 0.06318678706884384, "learning_rate": 1.891693376068376e-05, "loss": 0.0154, "step": 84290 }, { "epoch": 0.6231335560746282, "grad_norm": 0.09239519387483597, "learning_rate": 1.8913224121557454e-05, "loss": 0.016, "step": 84300 }, { "epoch": 0.6232074746459301, "grad_norm": 0.0915965810418129, "learning_rate": 1.890951448243115e-05, "loss": 0.016, "step": 84310 }, { "epoch": 0.6232813932172319, "grad_norm": 0.07295511662960052, "learning_rate": 1.8905804843304846e-05, "loss": 0.0182, "step": 84320 }, { "epoch": 0.6233553117885338, "grad_norm": 0.09266003966331482, "learning_rate": 1.8902095204178538e-05, "loss": 0.0161, "step": 84330 }, { "epoch": 0.6234292303598356, "grad_norm": 0.08012060821056366, "learning_rate": 1.889838556505223e-05, "loss": 0.0175, "step": 84340 }, { "epoch": 0.6235031489311375, "grad_norm": 0.10466712713241577, "learning_rate": 1.8894675925925927e-05, "loss": 0.0177, "step": 84350 }, { "epoch": 0.6235770675024394, "grad_norm": 0.08725877106189728, "learning_rate": 1.8890966286799623e-05, "loss": 0.0171, "step": 84360 }, { "epoch": 0.6236509860737411, "grad_norm": 0.13205556571483612, "learning_rate": 1.8887256647673315e-05, "loss": 0.0152, "step": 84370 }, { "epoch": 0.623724904645043, "grad_norm": 0.09634332358837128, "learning_rate": 1.8883547008547008e-05, "loss": 0.0183, "step": 84380 }, { "epoch": 0.6237988232163448, "grad_norm": 0.08334216475486755, "learning_rate": 1.8879837369420704e-05, "loss": 0.0159, "step": 84390 }, { "epoch": 0.6238727417876467, "grad_norm": 0.06158098205924034, "learning_rate": 1.88761277302944e-05, "loss": 0.0161, "step": 84400 }, { "epoch": 0.6239466603589486, "grad_norm": 0.08288898319005966, "learning_rate": 1.8872418091168092e-05, "loss": 0.0183, "step": 84410 }, { "epoch": 0.6240205789302504, "grad_norm": 0.10982749611139297, "learning_rate": 1.8868708452041785e-05, "loss": 0.019, "step": 84420 }, { "epoch": 0.6240944975015523, "grad_norm": 0.07673147320747375, "learning_rate": 1.886499881291548e-05, "loss": 0.0165, "step": 84430 }, { "epoch": 0.6241684160728541, "grad_norm": 0.0847049355506897, "learning_rate": 1.8861289173789174e-05, "loss": 0.0153, "step": 84440 }, { "epoch": 0.624242334644156, "grad_norm": 0.13826969265937805, "learning_rate": 1.885757953466287e-05, "loss": 0.0198, "step": 84450 }, { "epoch": 0.6243162532154578, "grad_norm": 0.08034597337245941, "learning_rate": 1.8853869895536562e-05, "loss": 0.0162, "step": 84460 }, { "epoch": 0.6243901717867597, "grad_norm": 0.06714905798435211, "learning_rate": 1.8850160256410258e-05, "loss": 0.0172, "step": 84470 }, { "epoch": 0.6244640903580616, "grad_norm": 0.09450197964906693, "learning_rate": 1.884645061728395e-05, "loss": 0.0189, "step": 84480 }, { "epoch": 0.6245380089293634, "grad_norm": 0.07406625896692276, "learning_rate": 1.8842740978157647e-05, "loss": 0.0159, "step": 84490 }, { "epoch": 0.6246119275006653, "grad_norm": 0.07484360784292221, "learning_rate": 1.883903133903134e-05, "loss": 0.019, "step": 84500 }, { "epoch": 0.6246858460719671, "grad_norm": 0.0986456349492073, "learning_rate": 1.8835321699905035e-05, "loss": 0.0177, "step": 84510 }, { "epoch": 0.624759764643269, "grad_norm": 0.06627759337425232, "learning_rate": 1.8831612060778728e-05, "loss": 0.0192, "step": 84520 }, { "epoch": 0.6248336832145708, "grad_norm": 0.07380781322717667, "learning_rate": 1.882790242165242e-05, "loss": 0.0183, "step": 84530 }, { "epoch": 0.6249076017858727, "grad_norm": 0.0757727399468422, "learning_rate": 1.8824192782526116e-05, "loss": 0.0166, "step": 84540 }, { "epoch": 0.6249815203571746, "grad_norm": 0.06838218867778778, "learning_rate": 1.8820483143399812e-05, "loss": 0.017, "step": 84550 }, { "epoch": 0.6250554389284764, "grad_norm": 0.06704081594944, "learning_rate": 1.8816773504273505e-05, "loss": 0.0149, "step": 84560 }, { "epoch": 0.6251293574997783, "grad_norm": 0.072157122194767, "learning_rate": 1.8813063865147197e-05, "loss": 0.0156, "step": 84570 }, { "epoch": 0.6252032760710801, "grad_norm": 0.091456338763237, "learning_rate": 1.8809354226020893e-05, "loss": 0.0188, "step": 84580 }, { "epoch": 0.625277194642382, "grad_norm": 0.047500334680080414, "learning_rate": 1.880564458689459e-05, "loss": 0.0158, "step": 84590 }, { "epoch": 0.6253511132136838, "grad_norm": 0.07546786218881607, "learning_rate": 1.8801934947768282e-05, "loss": 0.0165, "step": 84600 }, { "epoch": 0.6254250317849857, "grad_norm": 0.07692951709032059, "learning_rate": 1.8798225308641975e-05, "loss": 0.0175, "step": 84610 }, { "epoch": 0.6254989503562876, "grad_norm": 0.0885094627737999, "learning_rate": 1.879451566951567e-05, "loss": 0.0162, "step": 84620 }, { "epoch": 0.6255728689275893, "grad_norm": 0.0944632887840271, "learning_rate": 1.8790806030389366e-05, "loss": 0.0188, "step": 84630 }, { "epoch": 0.6256467874988912, "grad_norm": 0.08889977633953094, "learning_rate": 1.878709639126306e-05, "loss": 0.0188, "step": 84640 }, { "epoch": 0.625720706070193, "grad_norm": 0.07446218281984329, "learning_rate": 1.878338675213675e-05, "loss": 0.0199, "step": 84650 }, { "epoch": 0.6257946246414949, "grad_norm": 0.07708046585321426, "learning_rate": 1.8779677113010448e-05, "loss": 0.0165, "step": 84660 }, { "epoch": 0.6258685432127968, "grad_norm": 0.10142193734645844, "learning_rate": 1.877596747388414e-05, "loss": 0.0179, "step": 84670 }, { "epoch": 0.6259424617840986, "grad_norm": 0.08232563734054565, "learning_rate": 1.8772257834757836e-05, "loss": 0.0172, "step": 84680 }, { "epoch": 0.6260163803554005, "grad_norm": 0.09110886603593826, "learning_rate": 1.876854819563153e-05, "loss": 0.0168, "step": 84690 }, { "epoch": 0.6260902989267023, "grad_norm": 0.0754542201757431, "learning_rate": 1.8764838556505225e-05, "loss": 0.0178, "step": 84700 }, { "epoch": 0.6261642174980042, "grad_norm": 0.07504022866487503, "learning_rate": 1.8761128917378917e-05, "loss": 0.0182, "step": 84710 }, { "epoch": 0.626238136069306, "grad_norm": 0.053055763244628906, "learning_rate": 1.8757419278252613e-05, "loss": 0.0164, "step": 84720 }, { "epoch": 0.6263120546406079, "grad_norm": 0.10610322654247284, "learning_rate": 1.8753709639126306e-05, "loss": 0.0169, "step": 84730 }, { "epoch": 0.6263859732119098, "grad_norm": 0.08564948290586472, "learning_rate": 1.8750000000000002e-05, "loss": 0.0163, "step": 84740 }, { "epoch": 0.6264598917832116, "grad_norm": 0.062062621116638184, "learning_rate": 1.8746290360873694e-05, "loss": 0.0181, "step": 84750 }, { "epoch": 0.6265338103545135, "grad_norm": 0.08934297412633896, "learning_rate": 1.8742580721747387e-05, "loss": 0.0151, "step": 84760 }, { "epoch": 0.6266077289258153, "grad_norm": 0.057570990175008774, "learning_rate": 1.8738871082621083e-05, "loss": 0.0185, "step": 84770 }, { "epoch": 0.6266816474971172, "grad_norm": 0.07890599966049194, "learning_rate": 1.873516144349478e-05, "loss": 0.0182, "step": 84780 }, { "epoch": 0.626755566068419, "grad_norm": 0.060687899589538574, "learning_rate": 1.873145180436847e-05, "loss": 0.0192, "step": 84790 }, { "epoch": 0.6268294846397209, "grad_norm": 0.07368257641792297, "learning_rate": 1.8727742165242164e-05, "loss": 0.0166, "step": 84800 }, { "epoch": 0.6269034032110228, "grad_norm": 0.08597444742918015, "learning_rate": 1.8724032526115863e-05, "loss": 0.0184, "step": 84810 }, { "epoch": 0.6269773217823246, "grad_norm": 0.06118566542863846, "learning_rate": 1.8720322886989556e-05, "loss": 0.0176, "step": 84820 }, { "epoch": 0.6270512403536265, "grad_norm": 0.10047478973865509, "learning_rate": 1.871661324786325e-05, "loss": 0.0178, "step": 84830 }, { "epoch": 0.6271251589249283, "grad_norm": 0.07671331614255905, "learning_rate": 1.871290360873694e-05, "loss": 0.017, "step": 84840 }, { "epoch": 0.6271990774962302, "grad_norm": 0.07404090464115143, "learning_rate": 1.8709193969610637e-05, "loss": 0.018, "step": 84850 }, { "epoch": 0.627272996067532, "grad_norm": 0.06930069625377655, "learning_rate": 1.8705484330484333e-05, "loss": 0.0187, "step": 84860 }, { "epoch": 0.6273469146388339, "grad_norm": 0.08201423287391663, "learning_rate": 1.8701774691358026e-05, "loss": 0.0194, "step": 84870 }, { "epoch": 0.6274208332101358, "grad_norm": 0.06555015593767166, "learning_rate": 1.8698065052231718e-05, "loss": 0.0186, "step": 84880 }, { "epoch": 0.6274947517814375, "grad_norm": 0.07549230754375458, "learning_rate": 1.8694355413105414e-05, "loss": 0.0175, "step": 84890 }, { "epoch": 0.6275686703527394, "grad_norm": 0.09163384139537811, "learning_rate": 1.8690645773979107e-05, "loss": 0.0181, "step": 84900 }, { "epoch": 0.6276425889240412, "grad_norm": 0.08533893525600433, "learning_rate": 1.8686936134852803e-05, "loss": 0.0154, "step": 84910 }, { "epoch": 0.6277165074953431, "grad_norm": 0.07470065355300903, "learning_rate": 1.8683226495726495e-05, "loss": 0.0154, "step": 84920 }, { "epoch": 0.627790426066645, "grad_norm": 0.07613536715507507, "learning_rate": 1.867951685660019e-05, "loss": 0.0166, "step": 84930 }, { "epoch": 0.6278643446379468, "grad_norm": 0.11712261289358139, "learning_rate": 1.8675807217473884e-05, "loss": 0.019, "step": 84940 }, { "epoch": 0.6279382632092487, "grad_norm": 0.10472145676612854, "learning_rate": 1.867209757834758e-05, "loss": 0.0202, "step": 84950 }, { "epoch": 0.6280121817805505, "grad_norm": 0.05405691638588905, "learning_rate": 1.8668387939221272e-05, "loss": 0.0165, "step": 84960 }, { "epoch": 0.6280861003518524, "grad_norm": 0.06644877791404724, "learning_rate": 1.866467830009497e-05, "loss": 0.0184, "step": 84970 }, { "epoch": 0.6281600189231542, "grad_norm": 0.06299781054258347, "learning_rate": 1.866096866096866e-05, "loss": 0.0167, "step": 84980 }, { "epoch": 0.6282339374944561, "grad_norm": 0.06534602493047714, "learning_rate": 1.8657259021842353e-05, "loss": 0.016, "step": 84990 }, { "epoch": 0.628307856065758, "grad_norm": 0.09402451664209366, "learning_rate": 1.8653549382716053e-05, "loss": 0.019, "step": 85000 }, { "epoch": 0.6283817746370598, "grad_norm": 0.11132051050662994, "learning_rate": 1.8649839743589745e-05, "loss": 0.0182, "step": 85010 }, { "epoch": 0.6284556932083617, "grad_norm": 0.09433764219284058, "learning_rate": 1.8646130104463438e-05, "loss": 0.0187, "step": 85020 }, { "epoch": 0.6285296117796635, "grad_norm": 0.0806804820895195, "learning_rate": 1.864242046533713e-05, "loss": 0.0201, "step": 85030 }, { "epoch": 0.6286035303509654, "grad_norm": 0.06340008974075317, "learning_rate": 1.863871082621083e-05, "loss": 0.0145, "step": 85040 }, { "epoch": 0.6286774489222672, "grad_norm": 0.07197962701320648, "learning_rate": 1.8635001187084523e-05, "loss": 0.0148, "step": 85050 }, { "epoch": 0.6287513674935691, "grad_norm": 0.07551277428865433, "learning_rate": 1.8631291547958215e-05, "loss": 0.0167, "step": 85060 }, { "epoch": 0.628825286064871, "grad_norm": 0.06895064562559128, "learning_rate": 1.8627581908831908e-05, "loss": 0.0154, "step": 85070 }, { "epoch": 0.6288992046361728, "grad_norm": 0.07740975171327591, "learning_rate": 1.8623872269705604e-05, "loss": 0.0175, "step": 85080 }, { "epoch": 0.6289731232074747, "grad_norm": 0.07699029892683029, "learning_rate": 1.86201626305793e-05, "loss": 0.017, "step": 85090 }, { "epoch": 0.6290470417787765, "grad_norm": 0.07050425559282303, "learning_rate": 1.8616452991452992e-05, "loss": 0.0204, "step": 85100 }, { "epoch": 0.6291209603500784, "grad_norm": 0.09506210684776306, "learning_rate": 1.8612743352326685e-05, "loss": 0.0194, "step": 85110 }, { "epoch": 0.6291948789213803, "grad_norm": 0.06495136767625809, "learning_rate": 1.860903371320038e-05, "loss": 0.0184, "step": 85120 }, { "epoch": 0.629268797492682, "grad_norm": 0.08276129513978958, "learning_rate": 1.8605324074074073e-05, "loss": 0.0167, "step": 85130 }, { "epoch": 0.629342716063984, "grad_norm": 0.1007828414440155, "learning_rate": 1.860161443494777e-05, "loss": 0.0164, "step": 85140 }, { "epoch": 0.6294166346352857, "grad_norm": 0.10978119820356369, "learning_rate": 1.8597904795821465e-05, "loss": 0.0192, "step": 85150 }, { "epoch": 0.6294905532065876, "grad_norm": 0.06298724561929703, "learning_rate": 1.8594195156695158e-05, "loss": 0.0156, "step": 85160 }, { "epoch": 0.6295644717778894, "grad_norm": 0.08732854574918747, "learning_rate": 1.859048551756885e-05, "loss": 0.0163, "step": 85170 }, { "epoch": 0.6296383903491913, "grad_norm": 0.07033935934305191, "learning_rate": 1.8586775878442546e-05, "loss": 0.0162, "step": 85180 }, { "epoch": 0.6297123089204932, "grad_norm": 0.08429215103387833, "learning_rate": 1.8583066239316242e-05, "loss": 0.0168, "step": 85190 }, { "epoch": 0.629786227491795, "grad_norm": 0.08191212266683578, "learning_rate": 1.8579356600189935e-05, "loss": 0.0182, "step": 85200 }, { "epoch": 0.6298601460630969, "grad_norm": 0.07270575314760208, "learning_rate": 1.8575646961063627e-05, "loss": 0.016, "step": 85210 }, { "epoch": 0.6299340646343987, "grad_norm": 0.09649472683668137, "learning_rate": 1.857193732193732e-05, "loss": 0.0185, "step": 85220 }, { "epoch": 0.6300079832057006, "grad_norm": 0.06528923660516739, "learning_rate": 1.856822768281102e-05, "loss": 0.0189, "step": 85230 }, { "epoch": 0.6300819017770024, "grad_norm": 0.05460357666015625, "learning_rate": 1.8564518043684712e-05, "loss": 0.0165, "step": 85240 }, { "epoch": 0.6301558203483043, "grad_norm": 0.07081805169582367, "learning_rate": 1.8560808404558405e-05, "loss": 0.0174, "step": 85250 }, { "epoch": 0.6302297389196062, "grad_norm": 0.0982808768749237, "learning_rate": 1.8557098765432097e-05, "loss": 0.0172, "step": 85260 }, { "epoch": 0.630303657490908, "grad_norm": 0.11132100224494934, "learning_rate": 1.8553389126305797e-05, "loss": 0.018, "step": 85270 }, { "epoch": 0.6303775760622099, "grad_norm": 0.0746561735868454, "learning_rate": 1.854967948717949e-05, "loss": 0.0169, "step": 85280 }, { "epoch": 0.6304514946335117, "grad_norm": 0.09312082082033157, "learning_rate": 1.854596984805318e-05, "loss": 0.018, "step": 85290 }, { "epoch": 0.6305254132048136, "grad_norm": 0.0683741420507431, "learning_rate": 1.8542260208926878e-05, "loss": 0.0159, "step": 85300 }, { "epoch": 0.6305993317761154, "grad_norm": 0.09346149861812592, "learning_rate": 1.853855056980057e-05, "loss": 0.0169, "step": 85310 }, { "epoch": 0.6306732503474173, "grad_norm": 0.07769722491502762, "learning_rate": 1.8534840930674266e-05, "loss": 0.0177, "step": 85320 }, { "epoch": 0.6307471689187192, "grad_norm": 0.08260294049978256, "learning_rate": 1.853113129154796e-05, "loss": 0.0175, "step": 85330 }, { "epoch": 0.630821087490021, "grad_norm": 0.08505997806787491, "learning_rate": 1.8527421652421655e-05, "loss": 0.0173, "step": 85340 }, { "epoch": 0.6308950060613229, "grad_norm": 0.08676422387361526, "learning_rate": 1.8523712013295347e-05, "loss": 0.0191, "step": 85350 }, { "epoch": 0.6309689246326247, "grad_norm": 0.09579332917928696, "learning_rate": 1.852000237416904e-05, "loss": 0.019, "step": 85360 }, { "epoch": 0.6310428432039266, "grad_norm": 0.07286658883094788, "learning_rate": 1.8516292735042736e-05, "loss": 0.0182, "step": 85370 }, { "epoch": 0.6311167617752285, "grad_norm": 0.08508472889661789, "learning_rate": 1.8512583095916432e-05, "loss": 0.0175, "step": 85380 }, { "epoch": 0.6311906803465303, "grad_norm": 0.06830597668886185, "learning_rate": 1.8508873456790124e-05, "loss": 0.0162, "step": 85390 }, { "epoch": 0.6312645989178322, "grad_norm": 0.07824182510375977, "learning_rate": 1.8505163817663817e-05, "loss": 0.0173, "step": 85400 }, { "epoch": 0.6313385174891339, "grad_norm": 0.08850301057100296, "learning_rate": 1.8501454178537513e-05, "loss": 0.0159, "step": 85410 }, { "epoch": 0.6314124360604358, "grad_norm": 0.07097941637039185, "learning_rate": 1.849774453941121e-05, "loss": 0.0197, "step": 85420 }, { "epoch": 0.6314863546317376, "grad_norm": 0.06867203861474991, "learning_rate": 1.84940349002849e-05, "loss": 0.0186, "step": 85430 }, { "epoch": 0.6315602732030395, "grad_norm": 0.06668906658887863, "learning_rate": 1.8490325261158594e-05, "loss": 0.0167, "step": 85440 }, { "epoch": 0.6316341917743414, "grad_norm": 0.0815042108297348, "learning_rate": 1.848661562203229e-05, "loss": 0.0165, "step": 85450 }, { "epoch": 0.6317081103456432, "grad_norm": 0.07367333024740219, "learning_rate": 1.8482905982905986e-05, "loss": 0.0193, "step": 85460 }, { "epoch": 0.6317820289169451, "grad_norm": 0.05753447115421295, "learning_rate": 1.847919634377968e-05, "loss": 0.0156, "step": 85470 }, { "epoch": 0.6318559474882469, "grad_norm": 0.08272279798984528, "learning_rate": 1.847548670465337e-05, "loss": 0.0187, "step": 85480 }, { "epoch": 0.6319298660595488, "grad_norm": 0.09389559179544449, "learning_rate": 1.8471777065527067e-05, "loss": 0.0175, "step": 85490 }, { "epoch": 0.6320037846308506, "grad_norm": 0.09469499439001083, "learning_rate": 1.8468067426400763e-05, "loss": 0.0184, "step": 85500 }, { "epoch": 0.6320777032021525, "grad_norm": 0.07357271015644073, "learning_rate": 1.8464357787274456e-05, "loss": 0.0162, "step": 85510 }, { "epoch": 0.6321516217734544, "grad_norm": 0.08749333024024963, "learning_rate": 1.8460648148148148e-05, "loss": 0.0192, "step": 85520 }, { "epoch": 0.6322255403447562, "grad_norm": 0.05760166421532631, "learning_rate": 1.8456938509021844e-05, "loss": 0.0146, "step": 85530 }, { "epoch": 0.6322994589160581, "grad_norm": 0.0925261601805687, "learning_rate": 1.8453228869895537e-05, "loss": 0.0186, "step": 85540 }, { "epoch": 0.6323733774873599, "grad_norm": 0.08290409296751022, "learning_rate": 1.8449519230769233e-05, "loss": 0.0163, "step": 85550 }, { "epoch": 0.6324472960586618, "grad_norm": 0.08422588557004929, "learning_rate": 1.8445809591642925e-05, "loss": 0.0196, "step": 85560 }, { "epoch": 0.6325212146299636, "grad_norm": 0.08946088701486588, "learning_rate": 1.844209995251662e-05, "loss": 0.0159, "step": 85570 }, { "epoch": 0.6325951332012655, "grad_norm": 0.11140304058790207, "learning_rate": 1.8438390313390314e-05, "loss": 0.0181, "step": 85580 }, { "epoch": 0.6326690517725674, "grad_norm": 0.09750638157129288, "learning_rate": 1.8434680674264006e-05, "loss": 0.0178, "step": 85590 }, { "epoch": 0.6327429703438692, "grad_norm": 0.061476483941078186, "learning_rate": 1.8430971035137702e-05, "loss": 0.0188, "step": 85600 }, { "epoch": 0.6328168889151711, "grad_norm": 0.08562640100717545, "learning_rate": 1.84272613960114e-05, "loss": 0.0182, "step": 85610 }, { "epoch": 0.6328908074864729, "grad_norm": 0.06430555135011673, "learning_rate": 1.842355175688509e-05, "loss": 0.0162, "step": 85620 }, { "epoch": 0.6329647260577748, "grad_norm": 0.06210716813802719, "learning_rate": 1.8419842117758784e-05, "loss": 0.0161, "step": 85630 }, { "epoch": 0.6330386446290767, "grad_norm": 0.06714742630720139, "learning_rate": 1.841613247863248e-05, "loss": 0.0186, "step": 85640 }, { "epoch": 0.6331125632003785, "grad_norm": 0.05862513184547424, "learning_rate": 1.8412422839506175e-05, "loss": 0.0151, "step": 85650 }, { "epoch": 0.6331864817716804, "grad_norm": 0.08143387734889984, "learning_rate": 1.8408713200379868e-05, "loss": 0.0188, "step": 85660 }, { "epoch": 0.6332604003429821, "grad_norm": 0.06120765581727028, "learning_rate": 1.840500356125356e-05, "loss": 0.021, "step": 85670 }, { "epoch": 0.633334318914284, "grad_norm": 0.07627062499523163, "learning_rate": 1.8401293922127257e-05, "loss": 0.0156, "step": 85680 }, { "epoch": 0.6334082374855858, "grad_norm": 0.0795658677816391, "learning_rate": 1.8397584283000953e-05, "loss": 0.0184, "step": 85690 }, { "epoch": 0.6334821560568877, "grad_norm": 0.09216859191656113, "learning_rate": 1.8393874643874645e-05, "loss": 0.019, "step": 85700 }, { "epoch": 0.6335560746281896, "grad_norm": 0.07667369395494461, "learning_rate": 1.8390165004748338e-05, "loss": 0.0187, "step": 85710 }, { "epoch": 0.6336299931994914, "grad_norm": 0.09150813519954681, "learning_rate": 1.8386455365622034e-05, "loss": 0.0202, "step": 85720 }, { "epoch": 0.6337039117707933, "grad_norm": 0.057827726006507874, "learning_rate": 1.838274572649573e-05, "loss": 0.0166, "step": 85730 }, { "epoch": 0.6337778303420951, "grad_norm": 0.07610096037387848, "learning_rate": 1.8379036087369422e-05, "loss": 0.0172, "step": 85740 }, { "epoch": 0.633851748913397, "grad_norm": 0.06838104873895645, "learning_rate": 1.8375326448243115e-05, "loss": 0.0189, "step": 85750 }, { "epoch": 0.6339256674846988, "grad_norm": 0.0750965029001236, "learning_rate": 1.837161680911681e-05, "loss": 0.0194, "step": 85760 }, { "epoch": 0.6339995860560007, "grad_norm": 0.07158775627613068, "learning_rate": 1.8367907169990503e-05, "loss": 0.0186, "step": 85770 }, { "epoch": 0.6340735046273026, "grad_norm": 0.07273725420236588, "learning_rate": 1.83641975308642e-05, "loss": 0.0165, "step": 85780 }, { "epoch": 0.6341474231986044, "grad_norm": 0.0997527688741684, "learning_rate": 1.8360487891737892e-05, "loss": 0.0177, "step": 85790 }, { "epoch": 0.6342213417699063, "grad_norm": 0.08006743341684341, "learning_rate": 1.8356778252611588e-05, "loss": 0.0181, "step": 85800 }, { "epoch": 0.6342952603412081, "grad_norm": 0.06414107233285904, "learning_rate": 1.835306861348528e-05, "loss": 0.0174, "step": 85810 }, { "epoch": 0.63436917891251, "grad_norm": 0.0821545273065567, "learning_rate": 1.8349358974358973e-05, "loss": 0.0162, "step": 85820 }, { "epoch": 0.6344430974838118, "grad_norm": 0.08555491268634796, "learning_rate": 1.834564933523267e-05, "loss": 0.0177, "step": 85830 }, { "epoch": 0.6345170160551137, "grad_norm": 0.08455513417720795, "learning_rate": 1.8341939696106365e-05, "loss": 0.0184, "step": 85840 }, { "epoch": 0.6345909346264156, "grad_norm": 0.08851484954357147, "learning_rate": 1.8338230056980058e-05, "loss": 0.0178, "step": 85850 }, { "epoch": 0.6346648531977174, "grad_norm": 0.06817267835140228, "learning_rate": 1.833452041785375e-05, "loss": 0.0146, "step": 85860 }, { "epoch": 0.6347387717690193, "grad_norm": 0.11727976053953171, "learning_rate": 1.8330810778727446e-05, "loss": 0.0169, "step": 85870 }, { "epoch": 0.6348126903403211, "grad_norm": 0.11674083024263382, "learning_rate": 1.8327101139601142e-05, "loss": 0.0171, "step": 85880 }, { "epoch": 0.634886608911623, "grad_norm": 0.09169033169746399, "learning_rate": 1.8323391500474835e-05, "loss": 0.0173, "step": 85890 }, { "epoch": 0.6349605274829249, "grad_norm": 0.09100646525621414, "learning_rate": 1.8319681861348527e-05, "loss": 0.0202, "step": 85900 }, { "epoch": 0.6350344460542267, "grad_norm": 0.09275765717029572, "learning_rate": 1.8315972222222223e-05, "loss": 0.0176, "step": 85910 }, { "epoch": 0.6351083646255286, "grad_norm": 0.06938066333532333, "learning_rate": 1.831226258309592e-05, "loss": 0.0177, "step": 85920 }, { "epoch": 0.6351822831968303, "grad_norm": 0.0831063911318779, "learning_rate": 1.8308552943969612e-05, "loss": 0.0179, "step": 85930 }, { "epoch": 0.6352562017681322, "grad_norm": 0.06474802643060684, "learning_rate": 1.8304843304843304e-05, "loss": 0.0184, "step": 85940 }, { "epoch": 0.635330120339434, "grad_norm": 0.05899034067988396, "learning_rate": 1.8301133665717e-05, "loss": 0.0167, "step": 85950 }, { "epoch": 0.6354040389107359, "grad_norm": 0.0838722288608551, "learning_rate": 1.8297424026590696e-05, "loss": 0.0194, "step": 85960 }, { "epoch": 0.6354779574820378, "grad_norm": 0.09603888541460037, "learning_rate": 1.829371438746439e-05, "loss": 0.0193, "step": 85970 }, { "epoch": 0.6355518760533396, "grad_norm": 0.07909788191318512, "learning_rate": 1.829000474833808e-05, "loss": 0.0138, "step": 85980 }, { "epoch": 0.6356257946246415, "grad_norm": 0.08713407814502716, "learning_rate": 1.8286295109211777e-05, "loss": 0.0154, "step": 85990 }, { "epoch": 0.6356997131959433, "grad_norm": 0.06677623838186264, "learning_rate": 1.828258547008547e-05, "loss": 0.0186, "step": 86000 }, { "epoch": 0.6357736317672452, "grad_norm": 0.10572227835655212, "learning_rate": 1.8278875830959166e-05, "loss": 0.0179, "step": 86010 }, { "epoch": 0.635847550338547, "grad_norm": 0.08408637344837189, "learning_rate": 1.827516619183286e-05, "loss": 0.019, "step": 86020 }, { "epoch": 0.6359214689098489, "grad_norm": 0.05936083570122719, "learning_rate": 1.8271456552706554e-05, "loss": 0.0157, "step": 86030 }, { "epoch": 0.6359953874811508, "grad_norm": 0.086244136095047, "learning_rate": 1.8267746913580247e-05, "loss": 0.0167, "step": 86040 }, { "epoch": 0.6360693060524526, "grad_norm": 0.08920861035585403, "learning_rate": 1.826403727445394e-05, "loss": 0.0185, "step": 86050 }, { "epoch": 0.6361432246237545, "grad_norm": 0.06074165552854538, "learning_rate": 1.8260327635327636e-05, "loss": 0.0173, "step": 86060 }, { "epoch": 0.6362171431950563, "grad_norm": 0.07496129721403122, "learning_rate": 1.825661799620133e-05, "loss": 0.0182, "step": 86070 }, { "epoch": 0.6362910617663582, "grad_norm": 0.08098746836185455, "learning_rate": 1.8252908357075024e-05, "loss": 0.0195, "step": 86080 }, { "epoch": 0.63636498033766, "grad_norm": 0.06507303565740585, "learning_rate": 1.8249198717948717e-05, "loss": 0.0183, "step": 86090 }, { "epoch": 0.6364388989089619, "grad_norm": 0.0749412328004837, "learning_rate": 1.8245489078822413e-05, "loss": 0.0191, "step": 86100 }, { "epoch": 0.6365128174802638, "grad_norm": 0.0722000002861023, "learning_rate": 1.824177943969611e-05, "loss": 0.0181, "step": 86110 }, { "epoch": 0.6365867360515656, "grad_norm": 0.05548422038555145, "learning_rate": 1.82380698005698e-05, "loss": 0.0152, "step": 86120 }, { "epoch": 0.6366606546228675, "grad_norm": 0.11477868258953094, "learning_rate": 1.8234360161443494e-05, "loss": 0.0163, "step": 86130 }, { "epoch": 0.6367345731941693, "grad_norm": 0.09127828478813171, "learning_rate": 1.823065052231719e-05, "loss": 0.0168, "step": 86140 }, { "epoch": 0.6368084917654712, "grad_norm": 0.08679413050413132, "learning_rate": 1.8226940883190886e-05, "loss": 0.0177, "step": 86150 }, { "epoch": 0.6368824103367731, "grad_norm": 0.07122139632701874, "learning_rate": 1.822323124406458e-05, "loss": 0.0172, "step": 86160 }, { "epoch": 0.6369563289080749, "grad_norm": 0.11077108979225159, "learning_rate": 1.821952160493827e-05, "loss": 0.0192, "step": 86170 }, { "epoch": 0.6370302474793768, "grad_norm": 0.07095395028591156, "learning_rate": 1.8215811965811967e-05, "loss": 0.0199, "step": 86180 }, { "epoch": 0.6371041660506785, "grad_norm": 0.0770963728427887, "learning_rate": 1.8212102326685663e-05, "loss": 0.0155, "step": 86190 }, { "epoch": 0.6371780846219804, "grad_norm": 0.09591083973646164, "learning_rate": 1.8208392687559355e-05, "loss": 0.0176, "step": 86200 }, { "epoch": 0.6372520031932822, "grad_norm": 0.07716168463230133, "learning_rate": 1.8204683048433048e-05, "loss": 0.0184, "step": 86210 }, { "epoch": 0.6373259217645841, "grad_norm": 0.08891519159078598, "learning_rate": 1.8200973409306744e-05, "loss": 0.0155, "step": 86220 }, { "epoch": 0.637399840335886, "grad_norm": 0.08693523705005646, "learning_rate": 1.8197263770180437e-05, "loss": 0.0176, "step": 86230 }, { "epoch": 0.6374737589071878, "grad_norm": 0.07175737619400024, "learning_rate": 1.8193554131054133e-05, "loss": 0.0186, "step": 86240 }, { "epoch": 0.6375476774784897, "grad_norm": 0.07700374722480774, "learning_rate": 1.8189844491927825e-05, "loss": 0.0181, "step": 86250 }, { "epoch": 0.6376215960497915, "grad_norm": 0.0934210941195488, "learning_rate": 1.818613485280152e-05, "loss": 0.0193, "step": 86260 }, { "epoch": 0.6376955146210934, "grad_norm": 0.08946838229894638, "learning_rate": 1.8182425213675214e-05, "loss": 0.019, "step": 86270 }, { "epoch": 0.6377694331923952, "grad_norm": 0.0774223729968071, "learning_rate": 1.8178715574548906e-05, "loss": 0.0204, "step": 86280 }, { "epoch": 0.6378433517636971, "grad_norm": 0.06967286020517349, "learning_rate": 1.8175005935422602e-05, "loss": 0.0182, "step": 86290 }, { "epoch": 0.637917270334999, "grad_norm": 0.06664400547742844, "learning_rate": 1.8171296296296298e-05, "loss": 0.0167, "step": 86300 }, { "epoch": 0.6379911889063008, "grad_norm": 0.1289106160402298, "learning_rate": 1.816758665716999e-05, "loss": 0.0183, "step": 86310 }, { "epoch": 0.6380651074776027, "grad_norm": 0.09550661593675613, "learning_rate": 1.8163877018043683e-05, "loss": 0.0163, "step": 86320 }, { "epoch": 0.6381390260489045, "grad_norm": 0.06980642676353455, "learning_rate": 1.816016737891738e-05, "loss": 0.0174, "step": 86330 }, { "epoch": 0.6382129446202064, "grad_norm": 0.07693205773830414, "learning_rate": 1.8156457739791075e-05, "loss": 0.0193, "step": 86340 }, { "epoch": 0.6382868631915082, "grad_norm": 0.07100095599889755, "learning_rate": 1.8152748100664768e-05, "loss": 0.0168, "step": 86350 }, { "epoch": 0.6383607817628101, "grad_norm": 0.07612592726945877, "learning_rate": 1.814903846153846e-05, "loss": 0.017, "step": 86360 }, { "epoch": 0.638434700334112, "grad_norm": 0.08937795460224152, "learning_rate": 1.8145328822412156e-05, "loss": 0.0175, "step": 86370 }, { "epoch": 0.6385086189054138, "grad_norm": 0.09523028135299683, "learning_rate": 1.8141619183285852e-05, "loss": 0.021, "step": 86380 }, { "epoch": 0.6385825374767157, "grad_norm": 0.07586309313774109, "learning_rate": 1.8137909544159545e-05, "loss": 0.0167, "step": 86390 }, { "epoch": 0.6386564560480175, "grad_norm": 0.08415968716144562, "learning_rate": 1.8134199905033237e-05, "loss": 0.0179, "step": 86400 }, { "epoch": 0.6387303746193194, "grad_norm": 0.07729531824588776, "learning_rate": 1.8130490265906933e-05, "loss": 0.0175, "step": 86410 }, { "epoch": 0.6388042931906213, "grad_norm": 0.07085543125867844, "learning_rate": 1.812678062678063e-05, "loss": 0.0182, "step": 86420 }, { "epoch": 0.638878211761923, "grad_norm": 0.0866665244102478, "learning_rate": 1.8123070987654322e-05, "loss": 0.0184, "step": 86430 }, { "epoch": 0.638952130333225, "grad_norm": 0.10179586708545685, "learning_rate": 1.8119361348528015e-05, "loss": 0.016, "step": 86440 }, { "epoch": 0.6390260489045267, "grad_norm": 0.0781673863530159, "learning_rate": 1.811565170940171e-05, "loss": 0.0164, "step": 86450 }, { "epoch": 0.6390999674758286, "grad_norm": 0.0726914331316948, "learning_rate": 1.8111942070275403e-05, "loss": 0.0171, "step": 86460 }, { "epoch": 0.6391738860471304, "grad_norm": 0.06358665972948074, "learning_rate": 1.81082324311491e-05, "loss": 0.0164, "step": 86470 }, { "epoch": 0.6392478046184323, "grad_norm": 0.06945040822029114, "learning_rate": 1.810452279202279e-05, "loss": 0.0159, "step": 86480 }, { "epoch": 0.6393217231897342, "grad_norm": 0.06123916804790497, "learning_rate": 1.8100813152896488e-05, "loss": 0.0178, "step": 86490 }, { "epoch": 0.639395641761036, "grad_norm": 0.08601492643356323, "learning_rate": 1.809710351377018e-05, "loss": 0.0183, "step": 86500 }, { "epoch": 0.6394695603323379, "grad_norm": 0.0838085189461708, "learning_rate": 1.8093393874643873e-05, "loss": 0.0179, "step": 86510 }, { "epoch": 0.6395434789036397, "grad_norm": 0.05589223653078079, "learning_rate": 1.8089684235517572e-05, "loss": 0.0164, "step": 86520 }, { "epoch": 0.6396173974749416, "grad_norm": 0.08448652178049088, "learning_rate": 1.8085974596391265e-05, "loss": 0.0182, "step": 86530 }, { "epoch": 0.6396913160462434, "grad_norm": 0.09148748219013214, "learning_rate": 1.8082264957264957e-05, "loss": 0.0176, "step": 86540 }, { "epoch": 0.6397652346175453, "grad_norm": 0.057749610394239426, "learning_rate": 1.807855531813865e-05, "loss": 0.0175, "step": 86550 }, { "epoch": 0.6398391531888472, "grad_norm": 0.08635307103395462, "learning_rate": 1.807484567901235e-05, "loss": 0.0195, "step": 86560 }, { "epoch": 0.639913071760149, "grad_norm": 0.09366890043020248, "learning_rate": 1.8071136039886042e-05, "loss": 0.0183, "step": 86570 }, { "epoch": 0.6399869903314509, "grad_norm": 0.0522395595908165, "learning_rate": 1.8067426400759734e-05, "loss": 0.0181, "step": 86580 }, { "epoch": 0.6400609089027527, "grad_norm": 0.07926511019468307, "learning_rate": 1.8063716761633427e-05, "loss": 0.0169, "step": 86590 }, { "epoch": 0.6401348274740546, "grad_norm": 0.08467059582471848, "learning_rate": 1.8060007122507123e-05, "loss": 0.0206, "step": 86600 }, { "epoch": 0.6402087460453564, "grad_norm": 0.07435682415962219, "learning_rate": 1.805629748338082e-05, "loss": 0.0183, "step": 86610 }, { "epoch": 0.6402826646166583, "grad_norm": 0.11792890727519989, "learning_rate": 1.805258784425451e-05, "loss": 0.0192, "step": 86620 }, { "epoch": 0.6403565831879602, "grad_norm": 0.08234578371047974, "learning_rate": 1.8048878205128204e-05, "loss": 0.0174, "step": 86630 }, { "epoch": 0.640430501759262, "grad_norm": 0.0733381137251854, "learning_rate": 1.80451685660019e-05, "loss": 0.0182, "step": 86640 }, { "epoch": 0.6405044203305639, "grad_norm": 0.10058154910802841, "learning_rate": 1.8041458926875596e-05, "loss": 0.0176, "step": 86650 }, { "epoch": 0.6405783389018657, "grad_norm": 0.08493416011333466, "learning_rate": 1.803774928774929e-05, "loss": 0.0188, "step": 86660 }, { "epoch": 0.6406522574731676, "grad_norm": 0.07821780443191528, "learning_rate": 1.8034039648622985e-05, "loss": 0.0211, "step": 86670 }, { "epoch": 0.6407261760444695, "grad_norm": 0.09611696749925613, "learning_rate": 1.8030330009496677e-05, "loss": 0.0178, "step": 86680 }, { "epoch": 0.6408000946157713, "grad_norm": 0.0972839891910553, "learning_rate": 1.802662037037037e-05, "loss": 0.0165, "step": 86690 }, { "epoch": 0.6408740131870732, "grad_norm": 0.09013646095991135, "learning_rate": 1.8022910731244066e-05, "loss": 0.017, "step": 86700 }, { "epoch": 0.640947931758375, "grad_norm": 0.07640646398067474, "learning_rate": 1.801920109211776e-05, "loss": 0.0192, "step": 86710 }, { "epoch": 0.6410218503296768, "grad_norm": 0.08092772960662842, "learning_rate": 1.8015491452991454e-05, "loss": 0.0177, "step": 86720 }, { "epoch": 0.6410957689009786, "grad_norm": 0.0772366002202034, "learning_rate": 1.8011781813865147e-05, "loss": 0.0151, "step": 86730 }, { "epoch": 0.6411696874722805, "grad_norm": 0.09616388380527496, "learning_rate": 1.800807217473884e-05, "loss": 0.018, "step": 86740 }, { "epoch": 0.6412436060435824, "grad_norm": 0.06029176712036133, "learning_rate": 1.800436253561254e-05, "loss": 0.0172, "step": 86750 }, { "epoch": 0.6413175246148842, "grad_norm": 0.09864173829555511, "learning_rate": 1.800065289648623e-05, "loss": 0.018, "step": 86760 }, { "epoch": 0.6413914431861861, "grad_norm": 0.06823432445526123, "learning_rate": 1.7996943257359924e-05, "loss": 0.0176, "step": 86770 }, { "epoch": 0.6414653617574879, "grad_norm": 0.09102225303649902, "learning_rate": 1.7993233618233616e-05, "loss": 0.0172, "step": 86780 }, { "epoch": 0.6415392803287898, "grad_norm": 0.07310714572668076, "learning_rate": 1.7989523979107316e-05, "loss": 0.0166, "step": 86790 }, { "epoch": 0.6416131989000916, "grad_norm": 0.08708073198795319, "learning_rate": 1.798581433998101e-05, "loss": 0.0153, "step": 86800 }, { "epoch": 0.6416871174713935, "grad_norm": 0.06822753697633743, "learning_rate": 1.79821047008547e-05, "loss": 0.0164, "step": 86810 }, { "epoch": 0.6417610360426954, "grad_norm": 0.07988683879375458, "learning_rate": 1.7978395061728397e-05, "loss": 0.0164, "step": 86820 }, { "epoch": 0.6418349546139972, "grad_norm": 0.10230796784162521, "learning_rate": 1.797468542260209e-05, "loss": 0.0174, "step": 86830 }, { "epoch": 0.6419088731852991, "grad_norm": 0.07685470581054688, "learning_rate": 1.7970975783475786e-05, "loss": 0.0179, "step": 86840 }, { "epoch": 0.6419827917566009, "grad_norm": 0.07872067391872406, "learning_rate": 1.7967266144349478e-05, "loss": 0.0182, "step": 86850 }, { "epoch": 0.6420567103279028, "grad_norm": 0.07615887373685837, "learning_rate": 1.7963556505223174e-05, "loss": 0.0186, "step": 86860 }, { "epoch": 0.6421306288992046, "grad_norm": 0.07310209423303604, "learning_rate": 1.7959846866096867e-05, "loss": 0.0178, "step": 86870 }, { "epoch": 0.6422045474705065, "grad_norm": 0.07288283854722977, "learning_rate": 1.7956137226970563e-05, "loss": 0.019, "step": 86880 }, { "epoch": 0.6422784660418084, "grad_norm": 0.07359371334314346, "learning_rate": 1.7952427587844255e-05, "loss": 0.0158, "step": 86890 }, { "epoch": 0.6423523846131102, "grad_norm": 0.08522066473960876, "learning_rate": 1.794871794871795e-05, "loss": 0.0184, "step": 86900 }, { "epoch": 0.6424263031844121, "grad_norm": 0.08251197636127472, "learning_rate": 1.7945008309591644e-05, "loss": 0.0169, "step": 86910 }, { "epoch": 0.6425002217557139, "grad_norm": 0.08390085399150848, "learning_rate": 1.7941298670465336e-05, "loss": 0.0164, "step": 86920 }, { "epoch": 0.6425741403270158, "grad_norm": 0.11742323637008667, "learning_rate": 1.7937589031339032e-05, "loss": 0.0185, "step": 86930 }, { "epoch": 0.6426480588983177, "grad_norm": 0.08146241307258606, "learning_rate": 1.7933879392212728e-05, "loss": 0.0194, "step": 86940 }, { "epoch": 0.6427219774696195, "grad_norm": 0.07195167243480682, "learning_rate": 1.793016975308642e-05, "loss": 0.0186, "step": 86950 }, { "epoch": 0.6427958960409214, "grad_norm": 0.056414175778627396, "learning_rate": 1.7926460113960113e-05, "loss": 0.0154, "step": 86960 }, { "epoch": 0.6428698146122231, "grad_norm": 0.0632779523730278, "learning_rate": 1.7922750474833806e-05, "loss": 0.0162, "step": 86970 }, { "epoch": 0.642943733183525, "grad_norm": 0.0912681519985199, "learning_rate": 1.7919040835707505e-05, "loss": 0.018, "step": 86980 }, { "epoch": 0.6430176517548268, "grad_norm": 0.06888893991708755, "learning_rate": 1.7915331196581198e-05, "loss": 0.0171, "step": 86990 }, { "epoch": 0.6430915703261287, "grad_norm": 0.0987488254904747, "learning_rate": 1.791162155745489e-05, "loss": 0.018, "step": 87000 }, { "epoch": 0.6431654888974306, "grad_norm": 0.08291744440793991, "learning_rate": 1.7907911918328586e-05, "loss": 0.0179, "step": 87010 }, { "epoch": 0.6432394074687324, "grad_norm": 0.12280869483947754, "learning_rate": 1.7904202279202282e-05, "loss": 0.0174, "step": 87020 }, { "epoch": 0.6433133260400343, "grad_norm": 0.09106610715389252, "learning_rate": 1.7900492640075975e-05, "loss": 0.0163, "step": 87030 }, { "epoch": 0.6433872446113361, "grad_norm": 0.051806751638650894, "learning_rate": 1.7896783000949668e-05, "loss": 0.0151, "step": 87040 }, { "epoch": 0.643461163182638, "grad_norm": 0.08872636407613754, "learning_rate": 1.7893073361823364e-05, "loss": 0.0185, "step": 87050 }, { "epoch": 0.6435350817539398, "grad_norm": 0.058952391147613525, "learning_rate": 1.7889363722697056e-05, "loss": 0.0169, "step": 87060 }, { "epoch": 0.6436090003252417, "grad_norm": 0.09729276597499847, "learning_rate": 1.7885654083570752e-05, "loss": 0.018, "step": 87070 }, { "epoch": 0.6436829188965436, "grad_norm": 0.07857722043991089, "learning_rate": 1.7881944444444445e-05, "loss": 0.0177, "step": 87080 }, { "epoch": 0.6437568374678454, "grad_norm": 0.07660865783691406, "learning_rate": 1.787823480531814e-05, "loss": 0.0168, "step": 87090 }, { "epoch": 0.6438307560391473, "grad_norm": 0.07207610458135605, "learning_rate": 1.7874525166191833e-05, "loss": 0.0165, "step": 87100 }, { "epoch": 0.6439046746104491, "grad_norm": 0.07649290561676025, "learning_rate": 1.787081552706553e-05, "loss": 0.0166, "step": 87110 }, { "epoch": 0.643978593181751, "grad_norm": 0.10615783929824829, "learning_rate": 1.7867105887939222e-05, "loss": 0.0194, "step": 87120 }, { "epoch": 0.6440525117530529, "grad_norm": 0.07937850058078766, "learning_rate": 1.7863396248812918e-05, "loss": 0.0167, "step": 87130 }, { "epoch": 0.6441264303243547, "grad_norm": 0.07615212351083755, "learning_rate": 1.785968660968661e-05, "loss": 0.0183, "step": 87140 }, { "epoch": 0.6442003488956566, "grad_norm": 0.08921714127063751, "learning_rate": 1.7855976970560303e-05, "loss": 0.0147, "step": 87150 }, { "epoch": 0.6442742674669584, "grad_norm": 0.06732451915740967, "learning_rate": 1.7852267331434e-05, "loss": 0.0205, "step": 87160 }, { "epoch": 0.6443481860382603, "grad_norm": 0.08228149265050888, "learning_rate": 1.7848557692307695e-05, "loss": 0.0193, "step": 87170 }, { "epoch": 0.6444221046095621, "grad_norm": 0.05445276200771332, "learning_rate": 1.7844848053181387e-05, "loss": 0.0168, "step": 87180 }, { "epoch": 0.644496023180864, "grad_norm": 0.09085311740636826, "learning_rate": 1.784113841405508e-05, "loss": 0.0186, "step": 87190 }, { "epoch": 0.6445699417521659, "grad_norm": 0.07711207121610641, "learning_rate": 1.7837428774928776e-05, "loss": 0.0161, "step": 87200 }, { "epoch": 0.6446438603234677, "grad_norm": 0.06373975425958633, "learning_rate": 1.7833719135802472e-05, "loss": 0.0167, "step": 87210 }, { "epoch": 0.6447177788947696, "grad_norm": 0.07344771176576614, "learning_rate": 1.7830009496676164e-05, "loss": 0.0189, "step": 87220 }, { "epoch": 0.6447916974660713, "grad_norm": 0.08050648123025894, "learning_rate": 1.7826299857549857e-05, "loss": 0.0185, "step": 87230 }, { "epoch": 0.6448656160373732, "grad_norm": 0.07578188180923462, "learning_rate": 1.7822590218423553e-05, "loss": 0.017, "step": 87240 }, { "epoch": 0.644939534608675, "grad_norm": 0.07016967982053757, "learning_rate": 1.781888057929725e-05, "loss": 0.0181, "step": 87250 }, { "epoch": 0.6450134531799769, "grad_norm": 0.08886934816837311, "learning_rate": 1.781517094017094e-05, "loss": 0.0181, "step": 87260 }, { "epoch": 0.6450873717512788, "grad_norm": 0.09196509420871735, "learning_rate": 1.7811461301044634e-05, "loss": 0.0196, "step": 87270 }, { "epoch": 0.6451612903225806, "grad_norm": 0.059960197657346725, "learning_rate": 1.780775166191833e-05, "loss": 0.0166, "step": 87280 }, { "epoch": 0.6452352088938825, "grad_norm": 0.06782221794128418, "learning_rate": 1.7804042022792023e-05, "loss": 0.0165, "step": 87290 }, { "epoch": 0.6453091274651843, "grad_norm": 0.09103874862194061, "learning_rate": 1.780033238366572e-05, "loss": 0.0171, "step": 87300 }, { "epoch": 0.6453830460364862, "grad_norm": 0.08995518088340759, "learning_rate": 1.779662274453941e-05, "loss": 0.0151, "step": 87310 }, { "epoch": 0.645456964607788, "grad_norm": 0.09144158661365509, "learning_rate": 1.7792913105413107e-05, "loss": 0.0186, "step": 87320 }, { "epoch": 0.6455308831790899, "grad_norm": 0.09462243318557739, "learning_rate": 1.77892034662868e-05, "loss": 0.0157, "step": 87330 }, { "epoch": 0.6456048017503918, "grad_norm": 0.08410174399614334, "learning_rate": 1.7785493827160496e-05, "loss": 0.018, "step": 87340 }, { "epoch": 0.6456787203216936, "grad_norm": 0.06735754013061523, "learning_rate": 1.778178418803419e-05, "loss": 0.0177, "step": 87350 }, { "epoch": 0.6457526388929955, "grad_norm": 0.08470659703016281, "learning_rate": 1.7778074548907884e-05, "loss": 0.0195, "step": 87360 }, { "epoch": 0.6458265574642973, "grad_norm": 0.08102233707904816, "learning_rate": 1.7774364909781577e-05, "loss": 0.0164, "step": 87370 }, { "epoch": 0.6459004760355992, "grad_norm": 0.08035475015640259, "learning_rate": 1.777065527065527e-05, "loss": 0.0173, "step": 87380 }, { "epoch": 0.6459743946069011, "grad_norm": 0.07792873680591583, "learning_rate": 1.7766945631528965e-05, "loss": 0.0194, "step": 87390 }, { "epoch": 0.6460483131782029, "grad_norm": 0.09555920958518982, "learning_rate": 1.776323599240266e-05, "loss": 0.0177, "step": 87400 }, { "epoch": 0.6461222317495048, "grad_norm": 0.09256338328123093, "learning_rate": 1.7759526353276354e-05, "loss": 0.0192, "step": 87410 }, { "epoch": 0.6461961503208066, "grad_norm": 0.07088413834571838, "learning_rate": 1.7755816714150047e-05, "loss": 0.0181, "step": 87420 }, { "epoch": 0.6462700688921085, "grad_norm": 0.07728109508752823, "learning_rate": 1.7752107075023743e-05, "loss": 0.0158, "step": 87430 }, { "epoch": 0.6463439874634103, "grad_norm": 0.0669608861207962, "learning_rate": 1.774839743589744e-05, "loss": 0.0168, "step": 87440 }, { "epoch": 0.6464179060347122, "grad_norm": 0.09193000197410583, "learning_rate": 1.774468779677113e-05, "loss": 0.0168, "step": 87450 }, { "epoch": 0.6464918246060141, "grad_norm": 0.085015207529068, "learning_rate": 1.7740978157644824e-05, "loss": 0.0187, "step": 87460 }, { "epoch": 0.6465657431773159, "grad_norm": 0.08341581374406815, "learning_rate": 1.773726851851852e-05, "loss": 0.016, "step": 87470 }, { "epoch": 0.6466396617486178, "grad_norm": 0.07875963300466537, "learning_rate": 1.7733558879392216e-05, "loss": 0.0188, "step": 87480 }, { "epoch": 0.6467135803199195, "grad_norm": 0.05918658524751663, "learning_rate": 1.7729849240265908e-05, "loss": 0.0188, "step": 87490 }, { "epoch": 0.6467874988912214, "grad_norm": 0.0708702951669693, "learning_rate": 1.77261396011396e-05, "loss": 0.0179, "step": 87500 }, { "epoch": 0.6468614174625232, "grad_norm": 0.08807706832885742, "learning_rate": 1.7722429962013297e-05, "loss": 0.0177, "step": 87510 }, { "epoch": 0.6469353360338251, "grad_norm": 0.08028735220432281, "learning_rate": 1.771872032288699e-05, "loss": 0.0168, "step": 87520 }, { "epoch": 0.647009254605127, "grad_norm": 0.07263261079788208, "learning_rate": 1.7715010683760685e-05, "loss": 0.0173, "step": 87530 }, { "epoch": 0.6470831731764288, "grad_norm": 0.06898120045661926, "learning_rate": 1.7711301044634378e-05, "loss": 0.0141, "step": 87540 }, { "epoch": 0.6471570917477307, "grad_norm": 0.06692709028720856, "learning_rate": 1.7707591405508074e-05, "loss": 0.0154, "step": 87550 }, { "epoch": 0.6472310103190325, "grad_norm": 0.11443676054477692, "learning_rate": 1.7703881766381766e-05, "loss": 0.0161, "step": 87560 }, { "epoch": 0.6473049288903344, "grad_norm": 0.07005342096090317, "learning_rate": 1.7700172127255462e-05, "loss": 0.0169, "step": 87570 }, { "epoch": 0.6473788474616362, "grad_norm": 0.08362290263175964, "learning_rate": 1.7696462488129155e-05, "loss": 0.0186, "step": 87580 }, { "epoch": 0.6474527660329381, "grad_norm": 0.07407110184431076, "learning_rate": 1.769275284900285e-05, "loss": 0.017, "step": 87590 }, { "epoch": 0.64752668460424, "grad_norm": 0.07652756571769714, "learning_rate": 1.7689043209876543e-05, "loss": 0.015, "step": 87600 }, { "epoch": 0.6476006031755418, "grad_norm": 0.08048743009567261, "learning_rate": 1.7685333570750236e-05, "loss": 0.0169, "step": 87610 }, { "epoch": 0.6476745217468437, "grad_norm": 0.10383118689060211, "learning_rate": 1.7681623931623932e-05, "loss": 0.0178, "step": 87620 }, { "epoch": 0.6477484403181455, "grad_norm": 0.07576031982898712, "learning_rate": 1.7677914292497628e-05, "loss": 0.0185, "step": 87630 }, { "epoch": 0.6478223588894474, "grad_norm": 0.06110012158751488, "learning_rate": 1.767420465337132e-05, "loss": 0.0161, "step": 87640 }, { "epoch": 0.6478962774607493, "grad_norm": 0.07275137305259705, "learning_rate": 1.7670495014245013e-05, "loss": 0.0187, "step": 87650 }, { "epoch": 0.6479701960320511, "grad_norm": 0.09735322743654251, "learning_rate": 1.766678537511871e-05, "loss": 0.016, "step": 87660 }, { "epoch": 0.648044114603353, "grad_norm": 0.06891202181577682, "learning_rate": 1.7663075735992405e-05, "loss": 0.0165, "step": 87670 }, { "epoch": 0.6481180331746548, "grad_norm": 0.08524206280708313, "learning_rate": 1.7659366096866098e-05, "loss": 0.0173, "step": 87680 }, { "epoch": 0.6481919517459567, "grad_norm": 0.07716153562068939, "learning_rate": 1.765565645773979e-05, "loss": 0.0175, "step": 87690 }, { "epoch": 0.6482658703172585, "grad_norm": 0.06978312879800797, "learning_rate": 1.7651946818613486e-05, "loss": 0.0168, "step": 87700 }, { "epoch": 0.6483397888885604, "grad_norm": 0.10100270807743073, "learning_rate": 1.7648237179487182e-05, "loss": 0.018, "step": 87710 }, { "epoch": 0.6484137074598623, "grad_norm": 0.11189740151166916, "learning_rate": 1.7644527540360875e-05, "loss": 0.018, "step": 87720 }, { "epoch": 0.648487626031164, "grad_norm": 0.08508005738258362, "learning_rate": 1.7640817901234567e-05, "loss": 0.0183, "step": 87730 }, { "epoch": 0.648561544602466, "grad_norm": 0.0837252214550972, "learning_rate": 1.7637108262108263e-05, "loss": 0.0183, "step": 87740 }, { "epoch": 0.6486354631737677, "grad_norm": 0.07493939995765686, "learning_rate": 1.7633398622981956e-05, "loss": 0.0145, "step": 87750 }, { "epoch": 0.6487093817450696, "grad_norm": 0.07387962937355042, "learning_rate": 1.7629688983855652e-05, "loss": 0.0149, "step": 87760 }, { "epoch": 0.6487833003163714, "grad_norm": 0.07578963786363602, "learning_rate": 1.7625979344729344e-05, "loss": 0.0179, "step": 87770 }, { "epoch": 0.6488572188876733, "grad_norm": 0.07460996508598328, "learning_rate": 1.762226970560304e-05, "loss": 0.0179, "step": 87780 }, { "epoch": 0.6489311374589752, "grad_norm": 0.07213159650564194, "learning_rate": 1.7618560066476733e-05, "loss": 0.0181, "step": 87790 }, { "epoch": 0.649005056030277, "grad_norm": 0.10759951174259186, "learning_rate": 1.761485042735043e-05, "loss": 0.0198, "step": 87800 }, { "epoch": 0.6490789746015789, "grad_norm": 0.06241863965988159, "learning_rate": 1.761114078822412e-05, "loss": 0.0156, "step": 87810 }, { "epoch": 0.6491528931728807, "grad_norm": 0.06056777387857437, "learning_rate": 1.7607431149097817e-05, "loss": 0.0171, "step": 87820 }, { "epoch": 0.6492268117441826, "grad_norm": 0.09477005153894424, "learning_rate": 1.760372150997151e-05, "loss": 0.0194, "step": 87830 }, { "epoch": 0.6493007303154844, "grad_norm": 0.09164115786552429, "learning_rate": 1.7600011870845203e-05, "loss": 0.0161, "step": 87840 }, { "epoch": 0.6493746488867863, "grad_norm": 0.08950028568506241, "learning_rate": 1.75963022317189e-05, "loss": 0.017, "step": 87850 }, { "epoch": 0.6494485674580882, "grad_norm": 0.06165635585784912, "learning_rate": 1.7592592592592595e-05, "loss": 0.017, "step": 87860 }, { "epoch": 0.64952248602939, "grad_norm": 0.12589341402053833, "learning_rate": 1.7588882953466287e-05, "loss": 0.0167, "step": 87870 }, { "epoch": 0.6495964046006919, "grad_norm": 0.07935626804828644, "learning_rate": 1.758517331433998e-05, "loss": 0.0179, "step": 87880 }, { "epoch": 0.6496703231719937, "grad_norm": 0.08484697341918945, "learning_rate": 1.758146367521368e-05, "loss": 0.0194, "step": 87890 }, { "epoch": 0.6497442417432956, "grad_norm": 0.06872183829545975, "learning_rate": 1.757775403608737e-05, "loss": 0.0168, "step": 87900 }, { "epoch": 0.6498181603145975, "grad_norm": 0.11150965839624405, "learning_rate": 1.7574044396961064e-05, "loss": 0.0188, "step": 87910 }, { "epoch": 0.6498920788858993, "grad_norm": 0.07173824310302734, "learning_rate": 1.7570334757834757e-05, "loss": 0.0207, "step": 87920 }, { "epoch": 0.6499659974572012, "grad_norm": 0.07741278409957886, "learning_rate": 1.7566625118708453e-05, "loss": 0.0181, "step": 87930 }, { "epoch": 0.650039916028503, "grad_norm": 0.06695520877838135, "learning_rate": 1.756291547958215e-05, "loss": 0.0159, "step": 87940 }, { "epoch": 0.6501138345998049, "grad_norm": 0.08548988401889801, "learning_rate": 1.755920584045584e-05, "loss": 0.0172, "step": 87950 }, { "epoch": 0.6501877531711067, "grad_norm": 0.07158133387565613, "learning_rate": 1.7555496201329534e-05, "loss": 0.0161, "step": 87960 }, { "epoch": 0.6502616717424086, "grad_norm": 0.10227443277835846, "learning_rate": 1.755178656220323e-05, "loss": 0.0188, "step": 87970 }, { "epoch": 0.6503355903137105, "grad_norm": 0.06894666701555252, "learning_rate": 1.7548076923076922e-05, "loss": 0.0183, "step": 87980 }, { "epoch": 0.6504095088850123, "grad_norm": 0.09842050820589066, "learning_rate": 1.754436728395062e-05, "loss": 0.0192, "step": 87990 }, { "epoch": 0.6504834274563142, "grad_norm": 0.07729904353618622, "learning_rate": 1.754065764482431e-05, "loss": 0.0178, "step": 88000 }, { "epoch": 0.650557346027616, "grad_norm": 0.07297061383724213, "learning_rate": 1.7536948005698007e-05, "loss": 0.0155, "step": 88010 }, { "epoch": 0.6506312645989178, "grad_norm": 0.07612710446119308, "learning_rate": 1.75332383665717e-05, "loss": 0.0175, "step": 88020 }, { "epoch": 0.6507051831702196, "grad_norm": 0.08314087241888046, "learning_rate": 1.7529528727445396e-05, "loss": 0.0187, "step": 88030 }, { "epoch": 0.6507791017415215, "grad_norm": 0.10151941329240799, "learning_rate": 1.752581908831909e-05, "loss": 0.0167, "step": 88040 }, { "epoch": 0.6508530203128234, "grad_norm": 0.06678344309329987, "learning_rate": 1.7522109449192784e-05, "loss": 0.017, "step": 88050 }, { "epoch": 0.6509269388841252, "grad_norm": 0.06826924532651901, "learning_rate": 1.7518399810066477e-05, "loss": 0.0167, "step": 88060 }, { "epoch": 0.6510008574554271, "grad_norm": 0.06449755281209946, "learning_rate": 1.751469017094017e-05, "loss": 0.0188, "step": 88070 }, { "epoch": 0.6510747760267289, "grad_norm": 0.06460060179233551, "learning_rate": 1.751098053181387e-05, "loss": 0.0171, "step": 88080 }, { "epoch": 0.6511486945980308, "grad_norm": 0.09221690893173218, "learning_rate": 1.750727089268756e-05, "loss": 0.0193, "step": 88090 }, { "epoch": 0.6512226131693326, "grad_norm": 0.11470767110586166, "learning_rate": 1.7503561253561254e-05, "loss": 0.0175, "step": 88100 }, { "epoch": 0.6512965317406345, "grad_norm": 0.09464261680841446, "learning_rate": 1.7499851614434946e-05, "loss": 0.0165, "step": 88110 }, { "epoch": 0.6513704503119364, "grad_norm": 0.09214161336421967, "learning_rate": 1.7496141975308646e-05, "loss": 0.0175, "step": 88120 }, { "epoch": 0.6514443688832382, "grad_norm": 0.07291626185178757, "learning_rate": 1.7492432336182338e-05, "loss": 0.0187, "step": 88130 }, { "epoch": 0.6515182874545401, "grad_norm": 0.1113857626914978, "learning_rate": 1.748872269705603e-05, "loss": 0.0203, "step": 88140 }, { "epoch": 0.6515922060258419, "grad_norm": 0.07568159699440002, "learning_rate": 1.7485013057929723e-05, "loss": 0.0208, "step": 88150 }, { "epoch": 0.6516661245971438, "grad_norm": 0.09512155503034592, "learning_rate": 1.748130341880342e-05, "loss": 0.0172, "step": 88160 }, { "epoch": 0.6517400431684457, "grad_norm": 0.08339069038629532, "learning_rate": 1.7477593779677115e-05, "loss": 0.0192, "step": 88170 }, { "epoch": 0.6518139617397475, "grad_norm": 0.09774073213338852, "learning_rate": 1.7473884140550808e-05, "loss": 0.0193, "step": 88180 }, { "epoch": 0.6518878803110494, "grad_norm": 0.08510401099920273, "learning_rate": 1.74701745014245e-05, "loss": 0.0195, "step": 88190 }, { "epoch": 0.6519617988823512, "grad_norm": 0.07104596495628357, "learning_rate": 1.7466464862298196e-05, "loss": 0.0165, "step": 88200 }, { "epoch": 0.6520357174536531, "grad_norm": 0.06436054408550262, "learning_rate": 1.746275522317189e-05, "loss": 0.0178, "step": 88210 }, { "epoch": 0.6521096360249549, "grad_norm": 0.05952128395438194, "learning_rate": 1.7459045584045585e-05, "loss": 0.0171, "step": 88220 }, { "epoch": 0.6521835545962568, "grad_norm": 0.0875726044178009, "learning_rate": 1.745533594491928e-05, "loss": 0.0181, "step": 88230 }, { "epoch": 0.6522574731675587, "grad_norm": 0.07102656364440918, "learning_rate": 1.7451626305792974e-05, "loss": 0.0184, "step": 88240 }, { "epoch": 0.6523313917388605, "grad_norm": 0.07627592235803604, "learning_rate": 1.7447916666666666e-05, "loss": 0.0201, "step": 88250 }, { "epoch": 0.6524053103101624, "grad_norm": 0.07031363993883133, "learning_rate": 1.7444207027540362e-05, "loss": 0.0169, "step": 88260 }, { "epoch": 0.6524792288814641, "grad_norm": 0.06708788871765137, "learning_rate": 1.7440497388414058e-05, "loss": 0.0196, "step": 88270 }, { "epoch": 0.652553147452766, "grad_norm": 0.08406656980514526, "learning_rate": 1.743678774928775e-05, "loss": 0.019, "step": 88280 }, { "epoch": 0.6526270660240678, "grad_norm": 0.06256931275129318, "learning_rate": 1.7433078110161443e-05, "loss": 0.0157, "step": 88290 }, { "epoch": 0.6527009845953697, "grad_norm": 0.06588439643383026, "learning_rate": 1.7429368471035136e-05, "loss": 0.017, "step": 88300 }, { "epoch": 0.6527749031666716, "grad_norm": 0.09346655011177063, "learning_rate": 1.7425658831908835e-05, "loss": 0.0163, "step": 88310 }, { "epoch": 0.6528488217379734, "grad_norm": 0.07473476231098175, "learning_rate": 1.7421949192782528e-05, "loss": 0.0168, "step": 88320 }, { "epoch": 0.6529227403092753, "grad_norm": 0.06457147747278214, "learning_rate": 1.741823955365622e-05, "loss": 0.0191, "step": 88330 }, { "epoch": 0.6529966588805771, "grad_norm": 0.06038924679160118, "learning_rate": 1.7414529914529913e-05, "loss": 0.016, "step": 88340 }, { "epoch": 0.653070577451879, "grad_norm": 0.07191398739814758, "learning_rate": 1.7410820275403612e-05, "loss": 0.0176, "step": 88350 }, { "epoch": 0.6531444960231808, "grad_norm": 0.0851953998208046, "learning_rate": 1.7407110636277305e-05, "loss": 0.0174, "step": 88360 }, { "epoch": 0.6532184145944827, "grad_norm": 0.06611715257167816, "learning_rate": 1.7403400997150997e-05, "loss": 0.0159, "step": 88370 }, { "epoch": 0.6532923331657846, "grad_norm": 0.07906629145145416, "learning_rate": 1.7399691358024693e-05, "loss": 0.017, "step": 88380 }, { "epoch": 0.6533662517370864, "grad_norm": 0.07506294548511505, "learning_rate": 1.7395981718898386e-05, "loss": 0.017, "step": 88390 }, { "epoch": 0.6534401703083883, "grad_norm": 0.07016431540250778, "learning_rate": 1.7392272079772082e-05, "loss": 0.0187, "step": 88400 }, { "epoch": 0.6535140888796901, "grad_norm": 0.08150769025087357, "learning_rate": 1.7388562440645774e-05, "loss": 0.0178, "step": 88410 }, { "epoch": 0.653588007450992, "grad_norm": 0.08036355674266815, "learning_rate": 1.738485280151947e-05, "loss": 0.0191, "step": 88420 }, { "epoch": 0.6536619260222939, "grad_norm": 0.07620055973529816, "learning_rate": 1.7381143162393163e-05, "loss": 0.016, "step": 88430 }, { "epoch": 0.6537358445935957, "grad_norm": 0.08634445816278458, "learning_rate": 1.7377433523266856e-05, "loss": 0.0166, "step": 88440 }, { "epoch": 0.6538097631648976, "grad_norm": 0.09410831332206726, "learning_rate": 1.737372388414055e-05, "loss": 0.02, "step": 88450 }, { "epoch": 0.6538836817361994, "grad_norm": 0.08769334107637405, "learning_rate": 1.7370014245014248e-05, "loss": 0.0154, "step": 88460 }, { "epoch": 0.6539576003075013, "grad_norm": 0.05638042092323303, "learning_rate": 1.736630460588794e-05, "loss": 0.0162, "step": 88470 }, { "epoch": 0.6540315188788031, "grad_norm": 0.08551321923732758, "learning_rate": 1.7362594966761633e-05, "loss": 0.0212, "step": 88480 }, { "epoch": 0.654105437450105, "grad_norm": 0.06933954358100891, "learning_rate": 1.735888532763533e-05, "loss": 0.0204, "step": 88490 }, { "epoch": 0.6541793560214069, "grad_norm": 0.07857784628868103, "learning_rate": 1.7355175688509025e-05, "loss": 0.021, "step": 88500 }, { "epoch": 0.6542532745927087, "grad_norm": 0.06191938742995262, "learning_rate": 1.7351466049382717e-05, "loss": 0.017, "step": 88510 }, { "epoch": 0.6543271931640106, "grad_norm": 0.07220283150672913, "learning_rate": 1.734775641025641e-05, "loss": 0.0154, "step": 88520 }, { "epoch": 0.6544011117353123, "grad_norm": 0.08090227842330933, "learning_rate": 1.7344046771130106e-05, "loss": 0.0175, "step": 88530 }, { "epoch": 0.6544750303066142, "grad_norm": 0.06821764260530472, "learning_rate": 1.7340337132003802e-05, "loss": 0.0153, "step": 88540 }, { "epoch": 0.654548948877916, "grad_norm": 0.07408381253480911, "learning_rate": 1.7336627492877494e-05, "loss": 0.0161, "step": 88550 }, { "epoch": 0.6546228674492179, "grad_norm": 0.09872277826070786, "learning_rate": 1.7332917853751187e-05, "loss": 0.0143, "step": 88560 }, { "epoch": 0.6546967860205198, "grad_norm": 0.09294930100440979, "learning_rate": 1.7329208214624883e-05, "loss": 0.0184, "step": 88570 }, { "epoch": 0.6547707045918216, "grad_norm": 0.0852140411734581, "learning_rate": 1.732549857549858e-05, "loss": 0.0169, "step": 88580 }, { "epoch": 0.6548446231631235, "grad_norm": 0.06658681482076645, "learning_rate": 1.732178893637227e-05, "loss": 0.0189, "step": 88590 }, { "epoch": 0.6549185417344253, "grad_norm": 0.06592123210430145, "learning_rate": 1.7318079297245964e-05, "loss": 0.0124, "step": 88600 }, { "epoch": 0.6549924603057272, "grad_norm": 0.06855089962482452, "learning_rate": 1.731436965811966e-05, "loss": 0.018, "step": 88610 }, { "epoch": 0.655066378877029, "grad_norm": 0.0831160768866539, "learning_rate": 1.7310660018993353e-05, "loss": 0.0157, "step": 88620 }, { "epoch": 0.6551402974483309, "grad_norm": 0.09856075793504715, "learning_rate": 1.730695037986705e-05, "loss": 0.0197, "step": 88630 }, { "epoch": 0.6552142160196328, "grad_norm": 0.1038886234164238, "learning_rate": 1.730324074074074e-05, "loss": 0.0182, "step": 88640 }, { "epoch": 0.6552881345909346, "grad_norm": 0.08120853453874588, "learning_rate": 1.7299531101614437e-05, "loss": 0.0183, "step": 88650 }, { "epoch": 0.6553620531622365, "grad_norm": 0.09115610271692276, "learning_rate": 1.729582146248813e-05, "loss": 0.0169, "step": 88660 }, { "epoch": 0.6554359717335383, "grad_norm": 0.10014880448579788, "learning_rate": 1.7292111823361822e-05, "loss": 0.0209, "step": 88670 }, { "epoch": 0.6555098903048402, "grad_norm": 0.06310135871171951, "learning_rate": 1.7288402184235518e-05, "loss": 0.0195, "step": 88680 }, { "epoch": 0.6555838088761421, "grad_norm": 0.08188183605670929, "learning_rate": 1.7284692545109214e-05, "loss": 0.0172, "step": 88690 }, { "epoch": 0.6556577274474439, "grad_norm": 0.0723811686038971, "learning_rate": 1.7280982905982907e-05, "loss": 0.0152, "step": 88700 }, { "epoch": 0.6557316460187458, "grad_norm": 0.07690294831991196, "learning_rate": 1.72772732668566e-05, "loss": 0.0162, "step": 88710 }, { "epoch": 0.6558055645900476, "grad_norm": 0.07310546934604645, "learning_rate": 1.7273563627730295e-05, "loss": 0.0186, "step": 88720 }, { "epoch": 0.6558794831613495, "grad_norm": 0.07603324204683304, "learning_rate": 1.726985398860399e-05, "loss": 0.016, "step": 88730 }, { "epoch": 0.6559534017326513, "grad_norm": 0.07487497478723526, "learning_rate": 1.7266144349477684e-05, "loss": 0.0177, "step": 88740 }, { "epoch": 0.6560273203039532, "grad_norm": 0.06836133450269699, "learning_rate": 1.7262434710351376e-05, "loss": 0.0152, "step": 88750 }, { "epoch": 0.6561012388752551, "grad_norm": 0.0726594552397728, "learning_rate": 1.7258725071225072e-05, "loss": 0.0186, "step": 88760 }, { "epoch": 0.6561751574465569, "grad_norm": 0.0918281301856041, "learning_rate": 1.725501543209877e-05, "loss": 0.0181, "step": 88770 }, { "epoch": 0.6562490760178588, "grad_norm": 0.0876714214682579, "learning_rate": 1.725130579297246e-05, "loss": 0.0188, "step": 88780 }, { "epoch": 0.6563229945891605, "grad_norm": 0.08103278279304504, "learning_rate": 1.7247596153846153e-05, "loss": 0.0168, "step": 88790 }, { "epoch": 0.6563969131604624, "grad_norm": 0.06613600999116898, "learning_rate": 1.724388651471985e-05, "loss": 0.0185, "step": 88800 }, { "epoch": 0.6564708317317642, "grad_norm": 0.08758364617824554, "learning_rate": 1.7240176875593545e-05, "loss": 0.0154, "step": 88810 }, { "epoch": 0.6565447503030661, "grad_norm": 0.09205630421638489, "learning_rate": 1.7236467236467238e-05, "loss": 0.0175, "step": 88820 }, { "epoch": 0.656618668874368, "grad_norm": 0.06685183942317963, "learning_rate": 1.723275759734093e-05, "loss": 0.0167, "step": 88830 }, { "epoch": 0.6566925874456698, "grad_norm": 0.07834669202566147, "learning_rate": 1.7229047958214627e-05, "loss": 0.0175, "step": 88840 }, { "epoch": 0.6567665060169717, "grad_norm": 0.07539256662130356, "learning_rate": 1.722533831908832e-05, "loss": 0.0163, "step": 88850 }, { "epoch": 0.6568404245882735, "grad_norm": 0.10026615858078003, "learning_rate": 1.7221628679962015e-05, "loss": 0.0177, "step": 88860 }, { "epoch": 0.6569143431595754, "grad_norm": 0.06142430007457733, "learning_rate": 1.7217919040835708e-05, "loss": 0.0154, "step": 88870 }, { "epoch": 0.6569882617308773, "grad_norm": 0.07194249331951141, "learning_rate": 1.7214209401709404e-05, "loss": 0.0156, "step": 88880 }, { "epoch": 0.6570621803021791, "grad_norm": 0.09324514865875244, "learning_rate": 1.7210499762583096e-05, "loss": 0.0173, "step": 88890 }, { "epoch": 0.657136098873481, "grad_norm": 0.07574540376663208, "learning_rate": 1.720679012345679e-05, "loss": 0.0194, "step": 88900 }, { "epoch": 0.6572100174447828, "grad_norm": 0.08077821880578995, "learning_rate": 1.7203080484330485e-05, "loss": 0.0184, "step": 88910 }, { "epoch": 0.6572839360160847, "grad_norm": 0.1079104095697403, "learning_rate": 1.719937084520418e-05, "loss": 0.0178, "step": 88920 }, { "epoch": 0.6573578545873865, "grad_norm": 0.09008293598890305, "learning_rate": 1.7195661206077873e-05, "loss": 0.0169, "step": 88930 }, { "epoch": 0.6574317731586884, "grad_norm": 0.04996965825557709, "learning_rate": 1.7191951566951566e-05, "loss": 0.0163, "step": 88940 }, { "epoch": 0.6575056917299903, "grad_norm": 0.06016021594405174, "learning_rate": 1.7188241927825262e-05, "loss": 0.0193, "step": 88950 }, { "epoch": 0.6575796103012921, "grad_norm": 0.08215005695819855, "learning_rate": 1.7184532288698958e-05, "loss": 0.0188, "step": 88960 }, { "epoch": 0.657653528872594, "grad_norm": 0.08095736801624298, "learning_rate": 1.718082264957265e-05, "loss": 0.021, "step": 88970 }, { "epoch": 0.6577274474438958, "grad_norm": 0.09990512579679489, "learning_rate": 1.7177113010446343e-05, "loss": 0.0174, "step": 88980 }, { "epoch": 0.6578013660151977, "grad_norm": 0.09206452965736389, "learning_rate": 1.717340337132004e-05, "loss": 0.0178, "step": 88990 }, { "epoch": 0.6578752845864995, "grad_norm": 0.06057227775454521, "learning_rate": 1.7169693732193735e-05, "loss": 0.0191, "step": 89000 }, { "epoch": 0.6579492031578014, "grad_norm": 0.08504120260477066, "learning_rate": 1.7165984093067427e-05, "loss": 0.0171, "step": 89010 }, { "epoch": 0.6580231217291033, "grad_norm": 0.06572899222373962, "learning_rate": 1.716227445394112e-05, "loss": 0.0182, "step": 89020 }, { "epoch": 0.6580970403004051, "grad_norm": 0.08160002529621124, "learning_rate": 1.7158564814814816e-05, "loss": 0.0151, "step": 89030 }, { "epoch": 0.658170958871707, "grad_norm": 0.09511744230985641, "learning_rate": 1.7154855175688512e-05, "loss": 0.0148, "step": 89040 }, { "epoch": 0.6582448774430087, "grad_norm": 0.07572203129529953, "learning_rate": 1.7151145536562205e-05, "loss": 0.0176, "step": 89050 }, { "epoch": 0.6583187960143106, "grad_norm": 0.05752973258495331, "learning_rate": 1.7147435897435897e-05, "loss": 0.0144, "step": 89060 }, { "epoch": 0.6583927145856124, "grad_norm": 0.06448008865118027, "learning_rate": 1.7143726258309593e-05, "loss": 0.017, "step": 89070 }, { "epoch": 0.6584666331569143, "grad_norm": 0.09872671216726303, "learning_rate": 1.7140016619183286e-05, "loss": 0.018, "step": 89080 }, { "epoch": 0.6585405517282162, "grad_norm": 0.11234050989151001, "learning_rate": 1.713630698005698e-05, "loss": 0.0195, "step": 89090 }, { "epoch": 0.658614470299518, "grad_norm": 0.06337179988622665, "learning_rate": 1.7132597340930674e-05, "loss": 0.0189, "step": 89100 }, { "epoch": 0.6586883888708199, "grad_norm": 0.06142707169055939, "learning_rate": 1.712888770180437e-05, "loss": 0.0181, "step": 89110 }, { "epoch": 0.6587623074421217, "grad_norm": 0.0669117197394371, "learning_rate": 1.7125178062678063e-05, "loss": 0.0163, "step": 89120 }, { "epoch": 0.6588362260134236, "grad_norm": 0.16983623802661896, "learning_rate": 1.7121468423551755e-05, "loss": 0.0177, "step": 89130 }, { "epoch": 0.6589101445847255, "grad_norm": 0.06876539438962936, "learning_rate": 1.711775878442545e-05, "loss": 0.0169, "step": 89140 }, { "epoch": 0.6589840631560273, "grad_norm": 0.09889322519302368, "learning_rate": 1.7114049145299147e-05, "loss": 0.0187, "step": 89150 }, { "epoch": 0.6590579817273292, "grad_norm": 0.07979824393987656, "learning_rate": 1.711033950617284e-05, "loss": 0.0179, "step": 89160 }, { "epoch": 0.659131900298631, "grad_norm": 0.06535978615283966, "learning_rate": 1.7106629867046532e-05, "loss": 0.0188, "step": 89170 }, { "epoch": 0.6592058188699329, "grad_norm": 0.07772579044103622, "learning_rate": 1.710292022792023e-05, "loss": 0.0176, "step": 89180 }, { "epoch": 0.6592797374412347, "grad_norm": 0.09148101508617401, "learning_rate": 1.7099210588793924e-05, "loss": 0.0184, "step": 89190 }, { "epoch": 0.6593536560125366, "grad_norm": 0.07457172125577927, "learning_rate": 1.7095500949667617e-05, "loss": 0.0173, "step": 89200 }, { "epoch": 0.6594275745838385, "grad_norm": 0.07437124103307724, "learning_rate": 1.709179131054131e-05, "loss": 0.0177, "step": 89210 }, { "epoch": 0.6595014931551403, "grad_norm": 0.05599772930145264, "learning_rate": 1.7088081671415006e-05, "loss": 0.0168, "step": 89220 }, { "epoch": 0.6595754117264422, "grad_norm": 0.07221709191799164, "learning_rate": 1.70843720322887e-05, "loss": 0.0156, "step": 89230 }, { "epoch": 0.659649330297744, "grad_norm": 0.08130336552858353, "learning_rate": 1.7080662393162394e-05, "loss": 0.0163, "step": 89240 }, { "epoch": 0.6597232488690459, "grad_norm": 0.0671689435839653, "learning_rate": 1.7076952754036087e-05, "loss": 0.0154, "step": 89250 }, { "epoch": 0.6597971674403477, "grad_norm": 0.09063195437192917, "learning_rate": 1.7073243114909783e-05, "loss": 0.0168, "step": 89260 }, { "epoch": 0.6598710860116496, "grad_norm": 0.07398603856563568, "learning_rate": 1.706953347578348e-05, "loss": 0.016, "step": 89270 }, { "epoch": 0.6599450045829515, "grad_norm": 0.07265040278434753, "learning_rate": 1.706582383665717e-05, "loss": 0.0187, "step": 89280 }, { "epoch": 0.6600189231542533, "grad_norm": 0.12028414756059647, "learning_rate": 1.7062114197530864e-05, "loss": 0.0163, "step": 89290 }, { "epoch": 0.6600928417255552, "grad_norm": 0.11012061685323715, "learning_rate": 1.705840455840456e-05, "loss": 0.018, "step": 89300 }, { "epoch": 0.660166760296857, "grad_norm": 0.08662872016429901, "learning_rate": 1.7054694919278252e-05, "loss": 0.0194, "step": 89310 }, { "epoch": 0.6602406788681588, "grad_norm": 0.0670333206653595, "learning_rate": 1.7050985280151948e-05, "loss": 0.0163, "step": 89320 }, { "epoch": 0.6603145974394606, "grad_norm": 0.07254930585622787, "learning_rate": 1.704727564102564e-05, "loss": 0.0162, "step": 89330 }, { "epoch": 0.6603885160107625, "grad_norm": 0.07330934703350067, "learning_rate": 1.7043566001899337e-05, "loss": 0.0189, "step": 89340 }, { "epoch": 0.6604624345820644, "grad_norm": 0.06475830078125, "learning_rate": 1.703985636277303e-05, "loss": 0.0157, "step": 89350 }, { "epoch": 0.6605363531533662, "grad_norm": 0.07823119312524796, "learning_rate": 1.7036146723646722e-05, "loss": 0.0161, "step": 89360 }, { "epoch": 0.6606102717246681, "grad_norm": 0.06793460249900818, "learning_rate": 1.7032437084520418e-05, "loss": 0.0162, "step": 89370 }, { "epoch": 0.6606841902959699, "grad_norm": 0.06623463332653046, "learning_rate": 1.7028727445394114e-05, "loss": 0.0159, "step": 89380 }, { "epoch": 0.6607581088672718, "grad_norm": 0.08912117034196854, "learning_rate": 1.7025017806267806e-05, "loss": 0.0181, "step": 89390 }, { "epoch": 0.6608320274385737, "grad_norm": 0.07295441627502441, "learning_rate": 1.70213081671415e-05, "loss": 0.0204, "step": 89400 }, { "epoch": 0.6609059460098755, "grad_norm": 0.11057400703430176, "learning_rate": 1.70175985280152e-05, "loss": 0.0185, "step": 89410 }, { "epoch": 0.6609798645811774, "grad_norm": 0.09350541979074478, "learning_rate": 1.701388888888889e-05, "loss": 0.0183, "step": 89420 }, { "epoch": 0.6610537831524792, "grad_norm": 0.060925353318452835, "learning_rate": 1.7010179249762584e-05, "loss": 0.0172, "step": 89430 }, { "epoch": 0.6611277017237811, "grad_norm": 0.09891802072525024, "learning_rate": 1.7006469610636276e-05, "loss": 0.016, "step": 89440 }, { "epoch": 0.6612016202950829, "grad_norm": 0.06763199716806412, "learning_rate": 1.7002759971509972e-05, "loss": 0.0161, "step": 89450 }, { "epoch": 0.6612755388663848, "grad_norm": 0.09075762331485748, "learning_rate": 1.6999050332383668e-05, "loss": 0.0184, "step": 89460 }, { "epoch": 0.6613494574376867, "grad_norm": 0.0649956688284874, "learning_rate": 1.699534069325736e-05, "loss": 0.0185, "step": 89470 }, { "epoch": 0.6614233760089885, "grad_norm": 0.06586815416812897, "learning_rate": 1.6991631054131053e-05, "loss": 0.018, "step": 89480 }, { "epoch": 0.6614972945802904, "grad_norm": 0.06681115925312042, "learning_rate": 1.698792141500475e-05, "loss": 0.0207, "step": 89490 }, { "epoch": 0.6615712131515922, "grad_norm": 0.06798037886619568, "learning_rate": 1.6984211775878445e-05, "loss": 0.0166, "step": 89500 }, { "epoch": 0.6616451317228941, "grad_norm": 0.08689411729574203, "learning_rate": 1.6980502136752138e-05, "loss": 0.0172, "step": 89510 }, { "epoch": 0.6617190502941959, "grad_norm": 0.08138342946767807, "learning_rate": 1.697679249762583e-05, "loss": 0.0192, "step": 89520 }, { "epoch": 0.6617929688654978, "grad_norm": 0.07708865404129028, "learning_rate": 1.6973082858499526e-05, "loss": 0.0166, "step": 89530 }, { "epoch": 0.6618668874367997, "grad_norm": 0.07389518618583679, "learning_rate": 1.696937321937322e-05, "loss": 0.0167, "step": 89540 }, { "epoch": 0.6619408060081015, "grad_norm": 0.08059065043926239, "learning_rate": 1.6965663580246915e-05, "loss": 0.0161, "step": 89550 }, { "epoch": 0.6620147245794034, "grad_norm": 0.07389674335718155, "learning_rate": 1.6961953941120607e-05, "loss": 0.0156, "step": 89560 }, { "epoch": 0.6620886431507051, "grad_norm": 0.09503727406263351, "learning_rate": 1.6958244301994303e-05, "loss": 0.0178, "step": 89570 }, { "epoch": 0.662162561722007, "grad_norm": 0.08040928840637207, "learning_rate": 1.6954534662867996e-05, "loss": 0.0178, "step": 89580 }, { "epoch": 0.6622364802933088, "grad_norm": 0.06714902818202972, "learning_rate": 1.695082502374169e-05, "loss": 0.0191, "step": 89590 }, { "epoch": 0.6623103988646107, "grad_norm": 0.1026182547211647, "learning_rate": 1.6947115384615388e-05, "loss": 0.0182, "step": 89600 }, { "epoch": 0.6623843174359126, "grad_norm": 0.06692285090684891, "learning_rate": 1.694340574548908e-05, "loss": 0.017, "step": 89610 }, { "epoch": 0.6624582360072144, "grad_norm": 0.07225298136472702, "learning_rate": 1.6939696106362773e-05, "loss": 0.0176, "step": 89620 }, { "epoch": 0.6625321545785163, "grad_norm": 0.08241157233715057, "learning_rate": 1.6935986467236466e-05, "loss": 0.0189, "step": 89630 }, { "epoch": 0.6626060731498181, "grad_norm": 0.08315838873386383, "learning_rate": 1.6932276828110165e-05, "loss": 0.0169, "step": 89640 }, { "epoch": 0.66267999172112, "grad_norm": 0.06351329386234283, "learning_rate": 1.6928567188983858e-05, "loss": 0.0166, "step": 89650 }, { "epoch": 0.6627539102924219, "grad_norm": 0.10287221521139145, "learning_rate": 1.692485754985755e-05, "loss": 0.0217, "step": 89660 }, { "epoch": 0.6628278288637237, "grad_norm": 0.07420855015516281, "learning_rate": 1.6921147910731243e-05, "loss": 0.0168, "step": 89670 }, { "epoch": 0.6629017474350256, "grad_norm": 0.08003699034452438, "learning_rate": 1.691743827160494e-05, "loss": 0.0211, "step": 89680 }, { "epoch": 0.6629756660063274, "grad_norm": 0.07801926136016846, "learning_rate": 1.6913728632478635e-05, "loss": 0.0172, "step": 89690 }, { "epoch": 0.6630495845776293, "grad_norm": 0.08778087049722672, "learning_rate": 1.6910018993352327e-05, "loss": 0.0198, "step": 89700 }, { "epoch": 0.6631235031489311, "grad_norm": 0.06600422412157059, "learning_rate": 1.690630935422602e-05, "loss": 0.0176, "step": 89710 }, { "epoch": 0.663197421720233, "grad_norm": 0.06339359283447266, "learning_rate": 1.6902599715099716e-05, "loss": 0.0189, "step": 89720 }, { "epoch": 0.6632713402915349, "grad_norm": 0.05996141955256462, "learning_rate": 1.6898890075973412e-05, "loss": 0.0158, "step": 89730 }, { "epoch": 0.6633452588628367, "grad_norm": 0.06987646967172623, "learning_rate": 1.6895180436847104e-05, "loss": 0.0165, "step": 89740 }, { "epoch": 0.6634191774341386, "grad_norm": 0.06630247831344604, "learning_rate": 1.68914707977208e-05, "loss": 0.0195, "step": 89750 }, { "epoch": 0.6634930960054404, "grad_norm": 0.09023629128932953, "learning_rate": 1.6887761158594493e-05, "loss": 0.0172, "step": 89760 }, { "epoch": 0.6635670145767423, "grad_norm": 0.09470485895872116, "learning_rate": 1.6884051519468185e-05, "loss": 0.0195, "step": 89770 }, { "epoch": 0.6636409331480441, "grad_norm": 0.09433825314044952, "learning_rate": 1.688034188034188e-05, "loss": 0.0189, "step": 89780 }, { "epoch": 0.663714851719346, "grad_norm": 0.09324505925178528, "learning_rate": 1.6876632241215577e-05, "loss": 0.0194, "step": 89790 }, { "epoch": 0.6637887702906479, "grad_norm": 0.11419341713190079, "learning_rate": 1.687292260208927e-05, "loss": 0.0184, "step": 89800 }, { "epoch": 0.6638626888619497, "grad_norm": 0.09134076535701752, "learning_rate": 1.6869212962962963e-05, "loss": 0.0179, "step": 89810 }, { "epoch": 0.6639366074332516, "grad_norm": 0.060227178037166595, "learning_rate": 1.6865503323836655e-05, "loss": 0.0164, "step": 89820 }, { "epoch": 0.6640105260045533, "grad_norm": 0.09734909236431122, "learning_rate": 1.6861793684710354e-05, "loss": 0.0167, "step": 89830 }, { "epoch": 0.6640844445758552, "grad_norm": 0.08146527409553528, "learning_rate": 1.6858084045584047e-05, "loss": 0.018, "step": 89840 }, { "epoch": 0.664158363147157, "grad_norm": 0.0977560356259346, "learning_rate": 1.685437440645774e-05, "loss": 0.0179, "step": 89850 }, { "epoch": 0.6642322817184589, "grad_norm": 0.08954362571239471, "learning_rate": 1.6850664767331432e-05, "loss": 0.0166, "step": 89860 }, { "epoch": 0.6643062002897608, "grad_norm": 0.06876461952924728, "learning_rate": 1.684695512820513e-05, "loss": 0.0161, "step": 89870 }, { "epoch": 0.6643801188610626, "grad_norm": 0.07776942104101181, "learning_rate": 1.6843245489078824e-05, "loss": 0.0167, "step": 89880 }, { "epoch": 0.6644540374323645, "grad_norm": 0.08065933734178543, "learning_rate": 1.6839535849952517e-05, "loss": 0.0182, "step": 89890 }, { "epoch": 0.6645279560036663, "grad_norm": 0.08351798355579376, "learning_rate": 1.6835826210826213e-05, "loss": 0.0187, "step": 89900 }, { "epoch": 0.6646018745749682, "grad_norm": 0.08597084134817123, "learning_rate": 1.6832116571699905e-05, "loss": 0.018, "step": 89910 }, { "epoch": 0.6646757931462701, "grad_norm": 0.07811547815799713, "learning_rate": 1.68284069325736e-05, "loss": 0.0176, "step": 89920 }, { "epoch": 0.6647497117175719, "grad_norm": 0.06780789792537689, "learning_rate": 1.6824697293447294e-05, "loss": 0.0158, "step": 89930 }, { "epoch": 0.6648236302888738, "grad_norm": 0.06562267243862152, "learning_rate": 1.682098765432099e-05, "loss": 0.0176, "step": 89940 }, { "epoch": 0.6648975488601756, "grad_norm": 0.0878724604845047, "learning_rate": 1.6817278015194682e-05, "loss": 0.0154, "step": 89950 }, { "epoch": 0.6649714674314775, "grad_norm": 0.07386815547943115, "learning_rate": 1.681356837606838e-05, "loss": 0.0149, "step": 89960 }, { "epoch": 0.6650453860027793, "grad_norm": 0.07271129637956619, "learning_rate": 1.680985873694207e-05, "loss": 0.0174, "step": 89970 }, { "epoch": 0.6651193045740812, "grad_norm": 0.07908832281827927, "learning_rate": 1.6806149097815767e-05, "loss": 0.0176, "step": 89980 }, { "epoch": 0.6651932231453831, "grad_norm": 0.08835571259260178, "learning_rate": 1.680243945868946e-05, "loss": 0.0179, "step": 89990 }, { "epoch": 0.6652671417166849, "grad_norm": 0.08931245654821396, "learning_rate": 1.6798729819563152e-05, "loss": 0.0159, "step": 90000 }, { "epoch": 0.6652671417166849, "eval_f1": 0.6309572429107293, "eval_loss": 0.017208395525813103, "eval_precision": 0.5059339765875664, "eval_recall": 0.8380512764814848, "eval_runtime": 2920.6198, "eval_samples_per_second": 185.281, "eval_steps_per_second": 2.895, "step": 90000 }, { "epoch": 0.6653410602879868, "grad_norm": 0.05561404302716255, "learning_rate": 1.6795020180436848e-05, "loss": 0.0182, "step": 90010 }, { "epoch": 0.6654149788592886, "grad_norm": 0.09877961128950119, "learning_rate": 1.6791310541310544e-05, "loss": 0.0183, "step": 90020 }, { "epoch": 0.6654888974305905, "grad_norm": 0.06422388553619385, "learning_rate": 1.6787600902184237e-05, "loss": 0.018, "step": 90030 }, { "epoch": 0.6655628160018923, "grad_norm": 0.05520254746079445, "learning_rate": 1.678389126305793e-05, "loss": 0.0171, "step": 90040 }, { "epoch": 0.6656367345731942, "grad_norm": 0.05561777949333191, "learning_rate": 1.6780181623931625e-05, "loss": 0.0167, "step": 90050 }, { "epoch": 0.6657106531444961, "grad_norm": 0.07283684611320496, "learning_rate": 1.677647198480532e-05, "loss": 0.0146, "step": 90060 }, { "epoch": 0.6657845717157979, "grad_norm": 0.07278977334499359, "learning_rate": 1.6772762345679014e-05, "loss": 0.0159, "step": 90070 }, { "epoch": 0.6658584902870998, "grad_norm": 0.08145174384117126, "learning_rate": 1.6769052706552706e-05, "loss": 0.0173, "step": 90080 }, { "epoch": 0.6659324088584015, "grad_norm": 0.05579023063182831, "learning_rate": 1.6765343067426402e-05, "loss": 0.0176, "step": 90090 }, { "epoch": 0.6660063274297034, "grad_norm": 0.07451515644788742, "learning_rate": 1.6761633428300098e-05, "loss": 0.0165, "step": 90100 }, { "epoch": 0.6660802460010052, "grad_norm": 0.08948320895433426, "learning_rate": 1.675792378917379e-05, "loss": 0.0189, "step": 90110 }, { "epoch": 0.6661541645723071, "grad_norm": 0.07860399782657623, "learning_rate": 1.6754214150047483e-05, "loss": 0.0193, "step": 90120 }, { "epoch": 0.666228083143609, "grad_norm": 0.0718916580080986, "learning_rate": 1.675050451092118e-05, "loss": 0.0175, "step": 90130 }, { "epoch": 0.6663020017149108, "grad_norm": 0.10223414748907089, "learning_rate": 1.6746794871794872e-05, "loss": 0.0196, "step": 90140 }, { "epoch": 0.6663759202862127, "grad_norm": 0.07415630668401718, "learning_rate": 1.6743085232668568e-05, "loss": 0.0173, "step": 90150 }, { "epoch": 0.6664498388575145, "grad_norm": 0.10686575621366501, "learning_rate": 1.673937559354226e-05, "loss": 0.0184, "step": 90160 }, { "epoch": 0.6665237574288164, "grad_norm": 0.062122609466314316, "learning_rate": 1.6735665954415956e-05, "loss": 0.0208, "step": 90170 }, { "epoch": 0.6665976760001183, "grad_norm": 0.1056041568517685, "learning_rate": 1.673195631528965e-05, "loss": 0.0154, "step": 90180 }, { "epoch": 0.6666715945714201, "grad_norm": 0.06856260448694229, "learning_rate": 1.6728246676163345e-05, "loss": 0.0182, "step": 90190 }, { "epoch": 0.666745513142722, "grad_norm": 0.09874051809310913, "learning_rate": 1.6724537037037037e-05, "loss": 0.0192, "step": 90200 }, { "epoch": 0.6668194317140238, "grad_norm": 0.09938324987888336, "learning_rate": 1.6720827397910733e-05, "loss": 0.0186, "step": 90210 }, { "epoch": 0.6668933502853257, "grad_norm": 0.0918031856417656, "learning_rate": 1.6717117758784426e-05, "loss": 0.0164, "step": 90220 }, { "epoch": 0.6669672688566275, "grad_norm": 0.06843070685863495, "learning_rate": 1.671340811965812e-05, "loss": 0.0187, "step": 90230 }, { "epoch": 0.6670411874279294, "grad_norm": 0.07426135241985321, "learning_rate": 1.6709698480531815e-05, "loss": 0.019, "step": 90240 }, { "epoch": 0.6671151059992313, "grad_norm": 0.07184404879808426, "learning_rate": 1.670598884140551e-05, "loss": 0.0192, "step": 90250 }, { "epoch": 0.6671890245705331, "grad_norm": 0.108861044049263, "learning_rate": 1.6702279202279203e-05, "loss": 0.0181, "step": 90260 }, { "epoch": 0.667262943141835, "grad_norm": 0.0755942165851593, "learning_rate": 1.6698569563152896e-05, "loss": 0.0168, "step": 90270 }, { "epoch": 0.6673368617131368, "grad_norm": 0.06772216409444809, "learning_rate": 1.669485992402659e-05, "loss": 0.0179, "step": 90280 }, { "epoch": 0.6674107802844387, "grad_norm": 0.07250899821519852, "learning_rate": 1.6691150284900288e-05, "loss": 0.0152, "step": 90290 }, { "epoch": 0.6674846988557405, "grad_norm": 0.07351907342672348, "learning_rate": 1.668744064577398e-05, "loss": 0.0174, "step": 90300 }, { "epoch": 0.6675586174270424, "grad_norm": 0.07086682319641113, "learning_rate": 1.6683731006647673e-05, "loss": 0.0184, "step": 90310 }, { "epoch": 0.6676325359983443, "grad_norm": 0.0855761393904686, "learning_rate": 1.668002136752137e-05, "loss": 0.0165, "step": 90320 }, { "epoch": 0.6677064545696461, "grad_norm": 0.0903254970908165, "learning_rate": 1.6676311728395065e-05, "loss": 0.0187, "step": 90330 }, { "epoch": 0.667780373140948, "grad_norm": 0.10437069088220596, "learning_rate": 1.6672602089268757e-05, "loss": 0.0174, "step": 90340 }, { "epoch": 0.6678542917122497, "grad_norm": 0.07991361618041992, "learning_rate": 1.666889245014245e-05, "loss": 0.0177, "step": 90350 }, { "epoch": 0.6679282102835516, "grad_norm": 0.06454506516456604, "learning_rate": 1.6665182811016146e-05, "loss": 0.0136, "step": 90360 }, { "epoch": 0.6680021288548534, "grad_norm": 0.09109412133693695, "learning_rate": 1.666147317188984e-05, "loss": 0.0169, "step": 90370 }, { "epoch": 0.6680760474261553, "grad_norm": 0.06350691616535187, "learning_rate": 1.6657763532763534e-05, "loss": 0.015, "step": 90380 }, { "epoch": 0.6681499659974572, "grad_norm": 0.10788552463054657, "learning_rate": 1.6654053893637227e-05, "loss": 0.0197, "step": 90390 }, { "epoch": 0.668223884568759, "grad_norm": 0.054137811064720154, "learning_rate": 1.6650344254510923e-05, "loss": 0.0161, "step": 90400 }, { "epoch": 0.6682978031400609, "grad_norm": 0.06528239697217941, "learning_rate": 1.6646634615384616e-05, "loss": 0.0167, "step": 90410 }, { "epoch": 0.6683717217113627, "grad_norm": 0.06540937721729279, "learning_rate": 1.664292497625831e-05, "loss": 0.0164, "step": 90420 }, { "epoch": 0.6684456402826646, "grad_norm": 0.07134325057268143, "learning_rate": 1.6639215337132004e-05, "loss": 0.0174, "step": 90430 }, { "epoch": 0.6685195588539665, "grad_norm": 0.08838541060686111, "learning_rate": 1.66355056980057e-05, "loss": 0.0186, "step": 90440 }, { "epoch": 0.6685934774252683, "grad_norm": 0.08562029153108597, "learning_rate": 1.6631796058879393e-05, "loss": 0.0225, "step": 90450 }, { "epoch": 0.6686673959965702, "grad_norm": 0.10071739554405212, "learning_rate": 1.6628086419753085e-05, "loss": 0.0167, "step": 90460 }, { "epoch": 0.668741314567872, "grad_norm": 0.08474022895097733, "learning_rate": 1.662437678062678e-05, "loss": 0.0204, "step": 90470 }, { "epoch": 0.6688152331391739, "grad_norm": 0.08979560434818268, "learning_rate": 1.6620667141500477e-05, "loss": 0.0154, "step": 90480 }, { "epoch": 0.6688891517104757, "grad_norm": 0.09872204810380936, "learning_rate": 1.661695750237417e-05, "loss": 0.0184, "step": 90490 }, { "epoch": 0.6689630702817776, "grad_norm": 0.07118275761604309, "learning_rate": 1.6613247863247862e-05, "loss": 0.0177, "step": 90500 }, { "epoch": 0.6690369888530795, "grad_norm": 0.08609623461961746, "learning_rate": 1.6609538224121558e-05, "loss": 0.0182, "step": 90510 }, { "epoch": 0.6691109074243813, "grad_norm": 0.08788052946329117, "learning_rate": 1.6605828584995254e-05, "loss": 0.016, "step": 90520 }, { "epoch": 0.6691848259956832, "grad_norm": 0.09005333483219147, "learning_rate": 1.6602118945868947e-05, "loss": 0.0175, "step": 90530 }, { "epoch": 0.669258744566985, "grad_norm": 0.0722627267241478, "learning_rate": 1.659840930674264e-05, "loss": 0.0189, "step": 90540 }, { "epoch": 0.6693326631382869, "grad_norm": 0.07813578844070435, "learning_rate": 1.6594699667616335e-05, "loss": 0.0187, "step": 90550 }, { "epoch": 0.6694065817095887, "grad_norm": 0.056393325328826904, "learning_rate": 1.659099002849003e-05, "loss": 0.017, "step": 90560 }, { "epoch": 0.6694805002808906, "grad_norm": 0.06716451793909073, "learning_rate": 1.6587280389363724e-05, "loss": 0.0176, "step": 90570 }, { "epoch": 0.6695544188521925, "grad_norm": 0.09170413762331009, "learning_rate": 1.6583570750237416e-05, "loss": 0.0167, "step": 90580 }, { "epoch": 0.6696283374234943, "grad_norm": 0.09170734137296677, "learning_rate": 1.6579861111111112e-05, "loss": 0.0188, "step": 90590 }, { "epoch": 0.6697022559947962, "grad_norm": 0.09092351794242859, "learning_rate": 1.6576151471984805e-05, "loss": 0.0199, "step": 90600 }, { "epoch": 0.669776174566098, "grad_norm": 0.09902980178594589, "learning_rate": 1.65724418328585e-05, "loss": 0.018, "step": 90610 }, { "epoch": 0.6698500931373998, "grad_norm": 0.09110864251852036, "learning_rate": 1.6568732193732194e-05, "loss": 0.0185, "step": 90620 }, { "epoch": 0.6699240117087016, "grad_norm": 0.06778454780578613, "learning_rate": 1.656502255460589e-05, "loss": 0.0186, "step": 90630 }, { "epoch": 0.6699979302800035, "grad_norm": 0.08745856583118439, "learning_rate": 1.6561312915479582e-05, "loss": 0.0171, "step": 90640 }, { "epoch": 0.6700718488513054, "grad_norm": 0.06933335214853287, "learning_rate": 1.6557603276353278e-05, "loss": 0.0164, "step": 90650 }, { "epoch": 0.6701457674226072, "grad_norm": 0.05806926637887955, "learning_rate": 1.655389363722697e-05, "loss": 0.0163, "step": 90660 }, { "epoch": 0.6702196859939091, "grad_norm": 0.08060045540332794, "learning_rate": 1.6550183998100667e-05, "loss": 0.0158, "step": 90670 }, { "epoch": 0.6702936045652109, "grad_norm": 0.0801333636045456, "learning_rate": 1.654647435897436e-05, "loss": 0.0202, "step": 90680 }, { "epoch": 0.6703675231365128, "grad_norm": 0.10312236845493317, "learning_rate": 1.6542764719848052e-05, "loss": 0.0189, "step": 90690 }, { "epoch": 0.6704414417078147, "grad_norm": 0.08690429478883743, "learning_rate": 1.6539055080721748e-05, "loss": 0.0176, "step": 90700 }, { "epoch": 0.6705153602791165, "grad_norm": 0.15349721908569336, "learning_rate": 1.6535345441595444e-05, "loss": 0.0178, "step": 90710 }, { "epoch": 0.6705892788504184, "grad_norm": 0.09240947663784027, "learning_rate": 1.6531635802469136e-05, "loss": 0.0172, "step": 90720 }, { "epoch": 0.6706631974217202, "grad_norm": 0.08479215949773788, "learning_rate": 1.652792616334283e-05, "loss": 0.0167, "step": 90730 }, { "epoch": 0.6707371159930221, "grad_norm": 0.08831705152988434, "learning_rate": 1.6524216524216525e-05, "loss": 0.0176, "step": 90740 }, { "epoch": 0.6708110345643239, "grad_norm": 0.11717703193426132, "learning_rate": 1.652050688509022e-05, "loss": 0.0183, "step": 90750 }, { "epoch": 0.6708849531356258, "grad_norm": 0.07897736132144928, "learning_rate": 1.6516797245963913e-05, "loss": 0.0168, "step": 90760 }, { "epoch": 0.6709588717069277, "grad_norm": 0.07475557178258896, "learning_rate": 1.6513087606837606e-05, "loss": 0.0159, "step": 90770 }, { "epoch": 0.6710327902782295, "grad_norm": 0.07179439067840576, "learning_rate": 1.6509377967711302e-05, "loss": 0.0165, "step": 90780 }, { "epoch": 0.6711067088495314, "grad_norm": 0.08902493119239807, "learning_rate": 1.6505668328584998e-05, "loss": 0.0178, "step": 90790 }, { "epoch": 0.6711806274208332, "grad_norm": 0.08185980468988419, "learning_rate": 1.650195868945869e-05, "loss": 0.0164, "step": 90800 }, { "epoch": 0.6712545459921351, "grad_norm": 0.05548747256398201, "learning_rate": 1.6498249050332383e-05, "loss": 0.0185, "step": 90810 }, { "epoch": 0.6713284645634369, "grad_norm": 0.06674123555421829, "learning_rate": 1.649453941120608e-05, "loss": 0.0197, "step": 90820 }, { "epoch": 0.6714023831347388, "grad_norm": 0.07334823161363602, "learning_rate": 1.649082977207977e-05, "loss": 0.0177, "step": 90830 }, { "epoch": 0.6714763017060407, "grad_norm": 0.08820454031229019, "learning_rate": 1.6487120132953468e-05, "loss": 0.0173, "step": 90840 }, { "epoch": 0.6715502202773425, "grad_norm": 0.07256550341844559, "learning_rate": 1.648341049382716e-05, "loss": 0.0159, "step": 90850 }, { "epoch": 0.6716241388486444, "grad_norm": 0.07709689438343048, "learning_rate": 1.6479700854700856e-05, "loss": 0.0189, "step": 90860 }, { "epoch": 0.6716980574199461, "grad_norm": 0.06899577379226685, "learning_rate": 1.647599121557455e-05, "loss": 0.0208, "step": 90870 }, { "epoch": 0.671771975991248, "grad_norm": 0.08083607256412506, "learning_rate": 1.6472281576448245e-05, "loss": 0.0167, "step": 90880 }, { "epoch": 0.67184589456255, "grad_norm": 0.08641605824232101, "learning_rate": 1.6468571937321937e-05, "loss": 0.0148, "step": 90890 }, { "epoch": 0.6719198131338517, "grad_norm": 0.07134467363357544, "learning_rate": 1.6464862298195633e-05, "loss": 0.0169, "step": 90900 }, { "epoch": 0.6719937317051536, "grad_norm": 0.09091978520154953, "learning_rate": 1.6461152659069326e-05, "loss": 0.0185, "step": 90910 }, { "epoch": 0.6720676502764554, "grad_norm": 0.07118461281061172, "learning_rate": 1.645744301994302e-05, "loss": 0.016, "step": 90920 }, { "epoch": 0.6721415688477573, "grad_norm": 0.08685034513473511, "learning_rate": 1.6453733380816714e-05, "loss": 0.0192, "step": 90930 }, { "epoch": 0.6722154874190591, "grad_norm": 0.0918172299861908, "learning_rate": 1.645002374169041e-05, "loss": 0.0165, "step": 90940 }, { "epoch": 0.672289405990361, "grad_norm": 0.09770748764276505, "learning_rate": 1.6446314102564103e-05, "loss": 0.0172, "step": 90950 }, { "epoch": 0.6723633245616629, "grad_norm": 0.11164256185293198, "learning_rate": 1.6442604463437795e-05, "loss": 0.0168, "step": 90960 }, { "epoch": 0.6724372431329647, "grad_norm": 0.06833399832248688, "learning_rate": 1.643889482431149e-05, "loss": 0.0161, "step": 90970 }, { "epoch": 0.6725111617042666, "grad_norm": 0.0737537294626236, "learning_rate": 1.6435185185185187e-05, "loss": 0.0168, "step": 90980 }, { "epoch": 0.6725850802755684, "grad_norm": 0.08121098577976227, "learning_rate": 1.643147554605888e-05, "loss": 0.0177, "step": 90990 }, { "epoch": 0.6726589988468703, "grad_norm": 0.06961645931005478, "learning_rate": 1.6427765906932573e-05, "loss": 0.016, "step": 91000 }, { "epoch": 0.6727329174181721, "grad_norm": 0.08413616567850113, "learning_rate": 1.642405626780627e-05, "loss": 0.0165, "step": 91010 }, { "epoch": 0.672806835989474, "grad_norm": 0.09649928659200668, "learning_rate": 1.6420346628679964e-05, "loss": 0.0163, "step": 91020 }, { "epoch": 0.6728807545607759, "grad_norm": 0.10375601798295975, "learning_rate": 1.6416636989553657e-05, "loss": 0.0187, "step": 91030 }, { "epoch": 0.6729546731320777, "grad_norm": 0.07088519632816315, "learning_rate": 1.641292735042735e-05, "loss": 0.0178, "step": 91040 }, { "epoch": 0.6730285917033796, "grad_norm": 0.06176723167300224, "learning_rate": 1.6409217711301046e-05, "loss": 0.015, "step": 91050 }, { "epoch": 0.6731025102746814, "grad_norm": 0.09454366564750671, "learning_rate": 1.6405508072174738e-05, "loss": 0.0176, "step": 91060 }, { "epoch": 0.6731764288459833, "grad_norm": 0.0832739919424057, "learning_rate": 1.6401798433048434e-05, "loss": 0.0179, "step": 91070 }, { "epoch": 0.6732503474172851, "grad_norm": 0.09681088477373123, "learning_rate": 1.6398088793922127e-05, "loss": 0.0189, "step": 91080 }, { "epoch": 0.673324265988587, "grad_norm": 0.08091330528259277, "learning_rate": 1.6394379154795823e-05, "loss": 0.016, "step": 91090 }, { "epoch": 0.6733981845598889, "grad_norm": 0.08992233872413635, "learning_rate": 1.6390669515669515e-05, "loss": 0.0201, "step": 91100 }, { "epoch": 0.6734721031311907, "grad_norm": 0.07696627080440521, "learning_rate": 1.638695987654321e-05, "loss": 0.0157, "step": 91110 }, { "epoch": 0.6735460217024926, "grad_norm": 0.06534789502620697, "learning_rate": 1.6383250237416907e-05, "loss": 0.0186, "step": 91120 }, { "epoch": 0.6736199402737943, "grad_norm": 0.07140354067087173, "learning_rate": 1.63795405982906e-05, "loss": 0.0189, "step": 91130 }, { "epoch": 0.6736938588450962, "grad_norm": 0.07498431950807571, "learning_rate": 1.6375830959164292e-05, "loss": 0.0175, "step": 91140 }, { "epoch": 0.6737677774163982, "grad_norm": 0.08109982311725616, "learning_rate": 1.6372121320037985e-05, "loss": 0.0174, "step": 91150 }, { "epoch": 0.6738416959876999, "grad_norm": 0.06725709140300751, "learning_rate": 1.6368411680911684e-05, "loss": 0.0175, "step": 91160 }, { "epoch": 0.6739156145590018, "grad_norm": 0.07145275175571442, "learning_rate": 1.6364702041785377e-05, "loss": 0.0175, "step": 91170 }, { "epoch": 0.6739895331303036, "grad_norm": 0.07161834836006165, "learning_rate": 1.636099240265907e-05, "loss": 0.0154, "step": 91180 }, { "epoch": 0.6740634517016055, "grad_norm": 0.07694154977798462, "learning_rate": 1.6357282763532762e-05, "loss": 0.0155, "step": 91190 }, { "epoch": 0.6741373702729073, "grad_norm": 0.06621759384870529, "learning_rate": 1.6353573124406458e-05, "loss": 0.021, "step": 91200 }, { "epoch": 0.6742112888442092, "grad_norm": 0.0970006138086319, "learning_rate": 1.6349863485280154e-05, "loss": 0.017, "step": 91210 }, { "epoch": 0.6742852074155111, "grad_norm": 0.0957852154970169, "learning_rate": 1.6346153846153847e-05, "loss": 0.0187, "step": 91220 }, { "epoch": 0.6743591259868129, "grad_norm": 0.08307493478059769, "learning_rate": 1.634244420702754e-05, "loss": 0.0161, "step": 91230 }, { "epoch": 0.6744330445581148, "grad_norm": 0.09416956454515457, "learning_rate": 1.6338734567901235e-05, "loss": 0.0173, "step": 91240 }, { "epoch": 0.6745069631294166, "grad_norm": 0.07699141651391983, "learning_rate": 1.633502492877493e-05, "loss": 0.0159, "step": 91250 }, { "epoch": 0.6745808817007185, "grad_norm": 0.06724029034376144, "learning_rate": 1.6331315289648624e-05, "loss": 0.0156, "step": 91260 }, { "epoch": 0.6746548002720203, "grad_norm": 0.10115319490432739, "learning_rate": 1.632760565052232e-05, "loss": 0.0174, "step": 91270 }, { "epoch": 0.6747287188433222, "grad_norm": 0.0618862546980381, "learning_rate": 1.6323896011396012e-05, "loss": 0.0172, "step": 91280 }, { "epoch": 0.6748026374146241, "grad_norm": 0.101004958152771, "learning_rate": 1.6320186372269705e-05, "loss": 0.0189, "step": 91290 }, { "epoch": 0.6748765559859259, "grad_norm": 0.07224094867706299, "learning_rate": 1.63164767331434e-05, "loss": 0.0175, "step": 91300 }, { "epoch": 0.6749504745572278, "grad_norm": 0.07197592407464981, "learning_rate": 1.6312767094017097e-05, "loss": 0.0171, "step": 91310 }, { "epoch": 0.6750243931285296, "grad_norm": 0.06368234008550644, "learning_rate": 1.630905745489079e-05, "loss": 0.0166, "step": 91320 }, { "epoch": 0.6750983116998315, "grad_norm": 0.0943496897816658, "learning_rate": 1.6305347815764482e-05, "loss": 0.017, "step": 91330 }, { "epoch": 0.6751722302711333, "grad_norm": 0.07670585066080093, "learning_rate": 1.6301638176638178e-05, "loss": 0.0196, "step": 91340 }, { "epoch": 0.6752461488424352, "grad_norm": 0.09279531240463257, "learning_rate": 1.6297928537511874e-05, "loss": 0.0177, "step": 91350 }, { "epoch": 0.6753200674137371, "grad_norm": 0.06604079902172089, "learning_rate": 1.6294218898385566e-05, "loss": 0.0148, "step": 91360 }, { "epoch": 0.6753939859850389, "grad_norm": 0.081661157310009, "learning_rate": 1.629050925925926e-05, "loss": 0.0165, "step": 91370 }, { "epoch": 0.6754679045563408, "grad_norm": 0.09706810116767883, "learning_rate": 1.628679962013295e-05, "loss": 0.0156, "step": 91380 }, { "epoch": 0.6755418231276425, "grad_norm": 0.07296976447105408, "learning_rate": 1.628308998100665e-05, "loss": 0.0163, "step": 91390 }, { "epoch": 0.6756157416989444, "grad_norm": 0.07825100421905518, "learning_rate": 1.6279380341880343e-05, "loss": 0.0184, "step": 91400 }, { "epoch": 0.6756896602702464, "grad_norm": 0.07666713744401932, "learning_rate": 1.6275670702754036e-05, "loss": 0.0177, "step": 91410 }, { "epoch": 0.6757635788415481, "grad_norm": 0.11629067361354828, "learning_rate": 1.6271961063627732e-05, "loss": 0.0185, "step": 91420 }, { "epoch": 0.67583749741285, "grad_norm": 0.07979996502399445, "learning_rate": 1.6268251424501425e-05, "loss": 0.0181, "step": 91430 }, { "epoch": 0.6759114159841518, "grad_norm": 0.08092159777879715, "learning_rate": 1.626454178537512e-05, "loss": 0.0169, "step": 91440 }, { "epoch": 0.6759853345554537, "grad_norm": 0.09439874440431595, "learning_rate": 1.6260832146248813e-05, "loss": 0.0178, "step": 91450 }, { "epoch": 0.6760592531267555, "grad_norm": 0.11879925429821014, "learning_rate": 1.625712250712251e-05, "loss": 0.0167, "step": 91460 }, { "epoch": 0.6761331716980574, "grad_norm": 0.06729891896247864, "learning_rate": 1.62534128679962e-05, "loss": 0.0175, "step": 91470 }, { "epoch": 0.6762070902693593, "grad_norm": 0.08897735923528671, "learning_rate": 1.6249703228869898e-05, "loss": 0.0183, "step": 91480 }, { "epoch": 0.6762810088406611, "grad_norm": 0.08789429813623428, "learning_rate": 1.624599358974359e-05, "loss": 0.0173, "step": 91490 }, { "epoch": 0.676354927411963, "grad_norm": 0.09152679145336151, "learning_rate": 1.6242283950617286e-05, "loss": 0.0167, "step": 91500 }, { "epoch": 0.6764288459832648, "grad_norm": 0.07893484830856323, "learning_rate": 1.623857431149098e-05, "loss": 0.016, "step": 91510 }, { "epoch": 0.6765027645545667, "grad_norm": 0.081581711769104, "learning_rate": 1.623486467236467e-05, "loss": 0.016, "step": 91520 }, { "epoch": 0.6765766831258685, "grad_norm": 0.060920149087905884, "learning_rate": 1.6231155033238367e-05, "loss": 0.0164, "step": 91530 }, { "epoch": 0.6766506016971704, "grad_norm": 0.0741836279630661, "learning_rate": 1.6227445394112063e-05, "loss": 0.0172, "step": 91540 }, { "epoch": 0.6767245202684723, "grad_norm": 0.05645517259836197, "learning_rate": 1.6223735754985756e-05, "loss": 0.0191, "step": 91550 }, { "epoch": 0.6767984388397741, "grad_norm": 0.0640142410993576, "learning_rate": 1.622002611585945e-05, "loss": 0.0183, "step": 91560 }, { "epoch": 0.676872357411076, "grad_norm": 0.10702574998140335, "learning_rate": 1.6216316476733144e-05, "loss": 0.0157, "step": 91570 }, { "epoch": 0.6769462759823778, "grad_norm": 0.0870826467871666, "learning_rate": 1.621260683760684e-05, "loss": 0.018, "step": 91580 }, { "epoch": 0.6770201945536797, "grad_norm": 0.10151351988315582, "learning_rate": 1.6208897198480533e-05, "loss": 0.0185, "step": 91590 }, { "epoch": 0.6770941131249815, "grad_norm": 0.0760154202580452, "learning_rate": 1.6205187559354226e-05, "loss": 0.0168, "step": 91600 }, { "epoch": 0.6771680316962834, "grad_norm": 0.0814782902598381, "learning_rate": 1.620147792022792e-05, "loss": 0.0188, "step": 91610 }, { "epoch": 0.6772419502675853, "grad_norm": 0.07616132497787476, "learning_rate": 1.6197768281101617e-05, "loss": 0.0184, "step": 91620 }, { "epoch": 0.6773158688388871, "grad_norm": 0.07322046905755997, "learning_rate": 1.619405864197531e-05, "loss": 0.0176, "step": 91630 }, { "epoch": 0.677389787410189, "grad_norm": 0.06372839957475662, "learning_rate": 1.6190349002849003e-05, "loss": 0.0169, "step": 91640 }, { "epoch": 0.6774637059814907, "grad_norm": 0.07176537066698074, "learning_rate": 1.61866393637227e-05, "loss": 0.0158, "step": 91650 }, { "epoch": 0.6775376245527926, "grad_norm": 0.0934654176235199, "learning_rate": 1.618292972459639e-05, "loss": 0.0175, "step": 91660 }, { "epoch": 0.6776115431240946, "grad_norm": 0.08245817571878433, "learning_rate": 1.6179220085470087e-05, "loss": 0.0174, "step": 91670 }, { "epoch": 0.6776854616953963, "grad_norm": 0.08012942224740982, "learning_rate": 1.617551044634378e-05, "loss": 0.0169, "step": 91680 }, { "epoch": 0.6777593802666982, "grad_norm": 0.08259119093418121, "learning_rate": 1.6171800807217476e-05, "loss": 0.0192, "step": 91690 }, { "epoch": 0.677833298838, "grad_norm": 0.07471147179603577, "learning_rate": 1.6168091168091168e-05, "loss": 0.0193, "step": 91700 }, { "epoch": 0.6779072174093019, "grad_norm": 0.0738404244184494, "learning_rate": 1.6164381528964864e-05, "loss": 0.0162, "step": 91710 }, { "epoch": 0.6779811359806037, "grad_norm": 0.07564840465784073, "learning_rate": 1.6160671889838557e-05, "loss": 0.017, "step": 91720 }, { "epoch": 0.6780550545519056, "grad_norm": 0.11400596797466278, "learning_rate": 1.6156962250712253e-05, "loss": 0.0164, "step": 91730 }, { "epoch": 0.6781289731232075, "grad_norm": 0.09355101734399796, "learning_rate": 1.6153252611585945e-05, "loss": 0.0194, "step": 91740 }, { "epoch": 0.6782028916945093, "grad_norm": 0.08981883525848389, "learning_rate": 1.6149542972459638e-05, "loss": 0.0178, "step": 91750 }, { "epoch": 0.6782768102658112, "grad_norm": 0.08405116200447083, "learning_rate": 1.6145833333333334e-05, "loss": 0.0173, "step": 91760 }, { "epoch": 0.678350728837113, "grad_norm": 0.11737560480833054, "learning_rate": 1.614212369420703e-05, "loss": 0.0195, "step": 91770 }, { "epoch": 0.6784246474084149, "grad_norm": 0.08872861415147781, "learning_rate": 1.6138414055080722e-05, "loss": 0.0184, "step": 91780 }, { "epoch": 0.6784985659797167, "grad_norm": 0.05598621815443039, "learning_rate": 1.6134704415954415e-05, "loss": 0.016, "step": 91790 }, { "epoch": 0.6785724845510186, "grad_norm": 0.0666181743144989, "learning_rate": 1.613099477682811e-05, "loss": 0.0166, "step": 91800 }, { "epoch": 0.6786464031223205, "grad_norm": 0.09435441344976425, "learning_rate": 1.6127285137701807e-05, "loss": 0.0173, "step": 91810 }, { "epoch": 0.6787203216936223, "grad_norm": 0.07333523780107498, "learning_rate": 1.61235754985755e-05, "loss": 0.0169, "step": 91820 }, { "epoch": 0.6787942402649242, "grad_norm": 0.06826969981193542, "learning_rate": 1.6119865859449192e-05, "loss": 0.0169, "step": 91830 }, { "epoch": 0.678868158836226, "grad_norm": 0.09807952493429184, "learning_rate": 1.6116156220322888e-05, "loss": 0.0176, "step": 91840 }, { "epoch": 0.6789420774075279, "grad_norm": 0.08572237193584442, "learning_rate": 1.6112446581196584e-05, "loss": 0.0191, "step": 91850 }, { "epoch": 0.6790159959788297, "grad_norm": 0.08711138367652893, "learning_rate": 1.6108736942070277e-05, "loss": 0.0152, "step": 91860 }, { "epoch": 0.6790899145501316, "grad_norm": 0.07314954698085785, "learning_rate": 1.610502730294397e-05, "loss": 0.0161, "step": 91870 }, { "epoch": 0.6791638331214335, "grad_norm": 0.06449910253286362, "learning_rate": 1.6101317663817665e-05, "loss": 0.0171, "step": 91880 }, { "epoch": 0.6792377516927353, "grad_norm": 0.06305267661809921, "learning_rate": 1.6097608024691358e-05, "loss": 0.0179, "step": 91890 }, { "epoch": 0.6793116702640372, "grad_norm": 0.062146369367837906, "learning_rate": 1.6093898385565054e-05, "loss": 0.0168, "step": 91900 }, { "epoch": 0.679385588835339, "grad_norm": 0.08478434383869171, "learning_rate": 1.6090188746438746e-05, "loss": 0.0177, "step": 91910 }, { "epoch": 0.6794595074066409, "grad_norm": 0.07532593607902527, "learning_rate": 1.6086479107312442e-05, "loss": 0.0183, "step": 91920 }, { "epoch": 0.6795334259779428, "grad_norm": 0.08118089288473129, "learning_rate": 1.6082769468186135e-05, "loss": 0.0188, "step": 91930 }, { "epoch": 0.6796073445492445, "grad_norm": 0.09082608669996262, "learning_rate": 1.607905982905983e-05, "loss": 0.0203, "step": 91940 }, { "epoch": 0.6796812631205464, "grad_norm": 0.06602920591831207, "learning_rate": 1.6075350189933523e-05, "loss": 0.0187, "step": 91950 }, { "epoch": 0.6797551816918482, "grad_norm": 0.08158482611179352, "learning_rate": 1.607164055080722e-05, "loss": 0.0187, "step": 91960 }, { "epoch": 0.6798291002631501, "grad_norm": 0.05968308076262474, "learning_rate": 1.6067930911680912e-05, "loss": 0.0159, "step": 91970 }, { "epoch": 0.6799030188344519, "grad_norm": 0.08257415890693665, "learning_rate": 1.6064221272554605e-05, "loss": 0.0176, "step": 91980 }, { "epoch": 0.6799769374057538, "grad_norm": 0.10404511541128159, "learning_rate": 1.60605116334283e-05, "loss": 0.017, "step": 91990 }, { "epoch": 0.6800508559770557, "grad_norm": 0.09237322211265564, "learning_rate": 1.6056801994301996e-05, "loss": 0.0194, "step": 92000 }, { "epoch": 0.6801247745483575, "grad_norm": 0.06430641561746597, "learning_rate": 1.605309235517569e-05, "loss": 0.0186, "step": 92010 }, { "epoch": 0.6801986931196594, "grad_norm": 0.056055862456560135, "learning_rate": 1.604938271604938e-05, "loss": 0.0173, "step": 92020 }, { "epoch": 0.6802726116909612, "grad_norm": 0.08425690978765488, "learning_rate": 1.6045673076923078e-05, "loss": 0.0183, "step": 92030 }, { "epoch": 0.6803465302622631, "grad_norm": 0.0908007025718689, "learning_rate": 1.6041963437796774e-05, "loss": 0.0177, "step": 92040 }, { "epoch": 0.6804204488335649, "grad_norm": 0.06910260766744614, "learning_rate": 1.6038253798670466e-05, "loss": 0.0207, "step": 92050 }, { "epoch": 0.6804943674048668, "grad_norm": 0.08825428038835526, "learning_rate": 1.603454415954416e-05, "loss": 0.0185, "step": 92060 }, { "epoch": 0.6805682859761687, "grad_norm": 0.07975617051124573, "learning_rate": 1.6030834520417855e-05, "loss": 0.0193, "step": 92070 }, { "epoch": 0.6806422045474705, "grad_norm": 0.08019765466451645, "learning_rate": 1.602712488129155e-05, "loss": 0.0178, "step": 92080 }, { "epoch": 0.6807161231187724, "grad_norm": 0.07777857780456543, "learning_rate": 1.6023415242165243e-05, "loss": 0.0172, "step": 92090 }, { "epoch": 0.6807900416900742, "grad_norm": 0.09756932407617569, "learning_rate": 1.6019705603038936e-05, "loss": 0.018, "step": 92100 }, { "epoch": 0.6808639602613761, "grad_norm": 0.08036884665489197, "learning_rate": 1.6015995963912632e-05, "loss": 0.019, "step": 92110 }, { "epoch": 0.6809378788326779, "grad_norm": 0.08309336006641388, "learning_rate": 1.6012286324786324e-05, "loss": 0.0171, "step": 92120 }, { "epoch": 0.6810117974039798, "grad_norm": 0.08888134360313416, "learning_rate": 1.600857668566002e-05, "loss": 0.02, "step": 92130 }, { "epoch": 0.6810857159752817, "grad_norm": 0.07211754471063614, "learning_rate": 1.6004867046533713e-05, "loss": 0.0192, "step": 92140 }, { "epoch": 0.6811596345465835, "grad_norm": 0.0735514760017395, "learning_rate": 1.600115740740741e-05, "loss": 0.0181, "step": 92150 }, { "epoch": 0.6812335531178854, "grad_norm": 0.06698044389486313, "learning_rate": 1.59974477682811e-05, "loss": 0.0166, "step": 92160 }, { "epoch": 0.6813074716891871, "grad_norm": 0.09301523864269257, "learning_rate": 1.5993738129154797e-05, "loss": 0.0176, "step": 92170 }, { "epoch": 0.681381390260489, "grad_norm": 0.09156746417284012, "learning_rate": 1.599002849002849e-05, "loss": 0.0188, "step": 92180 }, { "epoch": 0.681455308831791, "grad_norm": 0.06802339851856232, "learning_rate": 1.5986318850902186e-05, "loss": 0.0178, "step": 92190 }, { "epoch": 0.6815292274030927, "grad_norm": 0.1307523250579834, "learning_rate": 1.598260921177588e-05, "loss": 0.0166, "step": 92200 }, { "epoch": 0.6816031459743946, "grad_norm": 0.08905123174190521, "learning_rate": 1.597889957264957e-05, "loss": 0.0174, "step": 92210 }, { "epoch": 0.6816770645456964, "grad_norm": 0.0922866016626358, "learning_rate": 1.5975189933523267e-05, "loss": 0.0183, "step": 92220 }, { "epoch": 0.6817509831169983, "grad_norm": 0.0921129509806633, "learning_rate": 1.5971480294396963e-05, "loss": 0.0164, "step": 92230 }, { "epoch": 0.6818249016883001, "grad_norm": 0.06170952692627907, "learning_rate": 1.5967770655270656e-05, "loss": 0.0169, "step": 92240 }, { "epoch": 0.681898820259602, "grad_norm": 0.08036050945520401, "learning_rate": 1.5964061016144348e-05, "loss": 0.0174, "step": 92250 }, { "epoch": 0.6819727388309039, "grad_norm": 0.10198846459388733, "learning_rate": 1.5960351377018044e-05, "loss": 0.0185, "step": 92260 }, { "epoch": 0.6820466574022057, "grad_norm": 0.05803276598453522, "learning_rate": 1.595664173789174e-05, "loss": 0.0162, "step": 92270 }, { "epoch": 0.6821205759735076, "grad_norm": 0.06204792112112045, "learning_rate": 1.5952932098765433e-05, "loss": 0.0175, "step": 92280 }, { "epoch": 0.6821944945448094, "grad_norm": 0.07870964705944061, "learning_rate": 1.5949222459639125e-05, "loss": 0.0189, "step": 92290 }, { "epoch": 0.6822684131161113, "grad_norm": 0.08516126126050949, "learning_rate": 1.594551282051282e-05, "loss": 0.0178, "step": 92300 }, { "epoch": 0.6823423316874131, "grad_norm": 0.07499513030052185, "learning_rate": 1.5941803181386517e-05, "loss": 0.0165, "step": 92310 }, { "epoch": 0.682416250258715, "grad_norm": 0.07255727052688599, "learning_rate": 1.593809354226021e-05, "loss": 0.0186, "step": 92320 }, { "epoch": 0.6824901688300169, "grad_norm": 0.056045882403850555, "learning_rate": 1.5934383903133902e-05, "loss": 0.0164, "step": 92330 }, { "epoch": 0.6825640874013187, "grad_norm": 0.056728675961494446, "learning_rate": 1.59306742640076e-05, "loss": 0.0159, "step": 92340 }, { "epoch": 0.6826380059726206, "grad_norm": 0.08712863177061081, "learning_rate": 1.592696462488129e-05, "loss": 0.0168, "step": 92350 }, { "epoch": 0.6827119245439224, "grad_norm": 0.08192974328994751, "learning_rate": 1.5923254985754987e-05, "loss": 0.0179, "step": 92360 }, { "epoch": 0.6827858431152243, "grad_norm": 0.07430005073547363, "learning_rate": 1.591954534662868e-05, "loss": 0.018, "step": 92370 }, { "epoch": 0.6828597616865261, "grad_norm": 0.0707472711801529, "learning_rate": 1.5915835707502375e-05, "loss": 0.0191, "step": 92380 }, { "epoch": 0.682933680257828, "grad_norm": 0.08295336365699768, "learning_rate": 1.5912126068376068e-05, "loss": 0.0181, "step": 92390 }, { "epoch": 0.6830075988291299, "grad_norm": 0.0547831691801548, "learning_rate": 1.5908416429249764e-05, "loss": 0.0167, "step": 92400 }, { "epoch": 0.6830815174004317, "grad_norm": 0.0616627112030983, "learning_rate": 1.5904706790123457e-05, "loss": 0.0166, "step": 92410 }, { "epoch": 0.6831554359717336, "grad_norm": 0.07605061680078506, "learning_rate": 1.5900997150997153e-05, "loss": 0.0174, "step": 92420 }, { "epoch": 0.6832293545430353, "grad_norm": 0.07115055620670319, "learning_rate": 1.5897287511870845e-05, "loss": 0.0147, "step": 92430 }, { "epoch": 0.6833032731143373, "grad_norm": 0.10412179678678513, "learning_rate": 1.5893577872744538e-05, "loss": 0.0189, "step": 92440 }, { "epoch": 0.6833771916856392, "grad_norm": 0.067973293364048, "learning_rate": 1.5889868233618234e-05, "loss": 0.0169, "step": 92450 }, { "epoch": 0.6834511102569409, "grad_norm": 0.07125255465507507, "learning_rate": 1.588615859449193e-05, "loss": 0.0196, "step": 92460 }, { "epoch": 0.6835250288282428, "grad_norm": 0.06105915084481239, "learning_rate": 1.5882448955365622e-05, "loss": 0.0171, "step": 92470 }, { "epoch": 0.6835989473995446, "grad_norm": 0.07705443352460861, "learning_rate": 1.5878739316239315e-05, "loss": 0.0161, "step": 92480 }, { "epoch": 0.6836728659708465, "grad_norm": 0.07152712345123291, "learning_rate": 1.5875029677113014e-05, "loss": 0.0178, "step": 92490 }, { "epoch": 0.6837467845421483, "grad_norm": 0.06911912560462952, "learning_rate": 1.5871320037986707e-05, "loss": 0.02, "step": 92500 }, { "epoch": 0.6838207031134502, "grad_norm": 0.06743113696575165, "learning_rate": 1.58676103988604e-05, "loss": 0.0148, "step": 92510 }, { "epoch": 0.6838946216847521, "grad_norm": 0.0807114690542221, "learning_rate": 1.5863900759734092e-05, "loss": 0.0175, "step": 92520 }, { "epoch": 0.6839685402560539, "grad_norm": 0.10657806694507599, "learning_rate": 1.5860191120607788e-05, "loss": 0.0179, "step": 92530 }, { "epoch": 0.6840424588273558, "grad_norm": 0.08332397043704987, "learning_rate": 1.5856481481481484e-05, "loss": 0.0176, "step": 92540 }, { "epoch": 0.6841163773986576, "grad_norm": 0.06702392548322678, "learning_rate": 1.5852771842355176e-05, "loss": 0.0158, "step": 92550 }, { "epoch": 0.6841902959699595, "grad_norm": 0.07311967760324478, "learning_rate": 1.584906220322887e-05, "loss": 0.0177, "step": 92560 }, { "epoch": 0.6842642145412613, "grad_norm": 0.06532112509012222, "learning_rate": 1.5845352564102565e-05, "loss": 0.0166, "step": 92570 }, { "epoch": 0.6843381331125632, "grad_norm": 0.097222238779068, "learning_rate": 1.5841642924976257e-05, "loss": 0.0193, "step": 92580 }, { "epoch": 0.6844120516838651, "grad_norm": 0.08110792189836502, "learning_rate": 1.5837933285849953e-05, "loss": 0.0163, "step": 92590 }, { "epoch": 0.6844859702551669, "grad_norm": 0.06945564597845078, "learning_rate": 1.5834223646723646e-05, "loss": 0.0168, "step": 92600 }, { "epoch": 0.6845598888264688, "grad_norm": 0.10886241495609283, "learning_rate": 1.5830514007597342e-05, "loss": 0.0201, "step": 92610 }, { "epoch": 0.6846338073977706, "grad_norm": 0.06769303977489471, "learning_rate": 1.5826804368471035e-05, "loss": 0.016, "step": 92620 }, { "epoch": 0.6847077259690725, "grad_norm": 0.05759311467409134, "learning_rate": 1.582309472934473e-05, "loss": 0.016, "step": 92630 }, { "epoch": 0.6847816445403744, "grad_norm": 0.07953110337257385, "learning_rate": 1.5819385090218427e-05, "loss": 0.0184, "step": 92640 }, { "epoch": 0.6848555631116762, "grad_norm": 0.07198061048984528, "learning_rate": 1.581567545109212e-05, "loss": 0.0177, "step": 92650 }, { "epoch": 0.6849294816829781, "grad_norm": 0.054443176835775375, "learning_rate": 1.581196581196581e-05, "loss": 0.0163, "step": 92660 }, { "epoch": 0.6850034002542799, "grad_norm": 0.1029653251171112, "learning_rate": 1.5808256172839504e-05, "loss": 0.0183, "step": 92670 }, { "epoch": 0.6850773188255818, "grad_norm": 0.07635175436735153, "learning_rate": 1.5804546533713204e-05, "loss": 0.0149, "step": 92680 }, { "epoch": 0.6851512373968836, "grad_norm": 0.07192494720220566, "learning_rate": 1.5800836894586896e-05, "loss": 0.0137, "step": 92690 }, { "epoch": 0.6852251559681855, "grad_norm": 0.08006956428289413, "learning_rate": 1.579712725546059e-05, "loss": 0.017, "step": 92700 }, { "epoch": 0.6852990745394874, "grad_norm": 0.07870300859212875, "learning_rate": 1.579341761633428e-05, "loss": 0.0166, "step": 92710 }, { "epoch": 0.6853729931107891, "grad_norm": 0.057502858340740204, "learning_rate": 1.578970797720798e-05, "loss": 0.0156, "step": 92720 }, { "epoch": 0.685446911682091, "grad_norm": 0.06075780466198921, "learning_rate": 1.5785998338081673e-05, "loss": 0.0175, "step": 92730 }, { "epoch": 0.6855208302533928, "grad_norm": 0.08338859677314758, "learning_rate": 1.5782288698955366e-05, "loss": 0.02, "step": 92740 }, { "epoch": 0.6855947488246947, "grad_norm": 0.08002070337533951, "learning_rate": 1.577857905982906e-05, "loss": 0.0183, "step": 92750 }, { "epoch": 0.6856686673959965, "grad_norm": 0.05922900512814522, "learning_rate": 1.5774869420702754e-05, "loss": 0.0167, "step": 92760 }, { "epoch": 0.6857425859672984, "grad_norm": 0.054509907960891724, "learning_rate": 1.577115978157645e-05, "loss": 0.0164, "step": 92770 }, { "epoch": 0.6858165045386003, "grad_norm": 0.07607532292604446, "learning_rate": 1.5767450142450143e-05, "loss": 0.015, "step": 92780 }, { "epoch": 0.6858904231099021, "grad_norm": 0.0719480961561203, "learning_rate": 1.5763740503323836e-05, "loss": 0.0206, "step": 92790 }, { "epoch": 0.685964341681204, "grad_norm": 0.0783941000699997, "learning_rate": 1.576003086419753e-05, "loss": 0.0189, "step": 92800 }, { "epoch": 0.6860382602525058, "grad_norm": 0.0678236335515976, "learning_rate": 1.5756321225071227e-05, "loss": 0.0161, "step": 92810 }, { "epoch": 0.6861121788238077, "grad_norm": 0.0772695392370224, "learning_rate": 1.575261158594492e-05, "loss": 0.0175, "step": 92820 }, { "epoch": 0.6861860973951095, "grad_norm": 0.09057267755270004, "learning_rate": 1.5748901946818616e-05, "loss": 0.0151, "step": 92830 }, { "epoch": 0.6862600159664114, "grad_norm": 0.08303174376487732, "learning_rate": 1.574519230769231e-05, "loss": 0.0185, "step": 92840 }, { "epoch": 0.6863339345377133, "grad_norm": 0.08072768896818161, "learning_rate": 1.5741482668566e-05, "loss": 0.0182, "step": 92850 }, { "epoch": 0.6864078531090151, "grad_norm": 0.08461995422840118, "learning_rate": 1.5737773029439697e-05, "loss": 0.0184, "step": 92860 }, { "epoch": 0.686481771680317, "grad_norm": 0.07058220356702805, "learning_rate": 1.5734063390313393e-05, "loss": 0.0145, "step": 92870 }, { "epoch": 0.6865556902516188, "grad_norm": 0.07958796620368958, "learning_rate": 1.5730353751187086e-05, "loss": 0.0169, "step": 92880 }, { "epoch": 0.6866296088229207, "grad_norm": 0.1059829518198967, "learning_rate": 1.5726644112060778e-05, "loss": 0.0179, "step": 92890 }, { "epoch": 0.6867035273942226, "grad_norm": 0.06931605190038681, "learning_rate": 1.572293447293447e-05, "loss": 0.0166, "step": 92900 }, { "epoch": 0.6867774459655244, "grad_norm": 0.06818471103906631, "learning_rate": 1.571922483380817e-05, "loss": 0.0158, "step": 92910 }, { "epoch": 0.6868513645368263, "grad_norm": 0.05438080057501793, "learning_rate": 1.5715515194681863e-05, "loss": 0.0157, "step": 92920 }, { "epoch": 0.6869252831081281, "grad_norm": 0.07555864006280899, "learning_rate": 1.5711805555555555e-05, "loss": 0.0171, "step": 92930 }, { "epoch": 0.68699920167943, "grad_norm": 0.06466842442750931, "learning_rate": 1.5708095916429248e-05, "loss": 0.018, "step": 92940 }, { "epoch": 0.6870731202507318, "grad_norm": 0.06618928164243698, "learning_rate": 1.5704386277302947e-05, "loss": 0.0167, "step": 92950 }, { "epoch": 0.6871470388220337, "grad_norm": 0.09534517675638199, "learning_rate": 1.570067663817664e-05, "loss": 0.0164, "step": 92960 }, { "epoch": 0.6872209573933356, "grad_norm": 0.08508911728858948, "learning_rate": 1.5696966999050332e-05, "loss": 0.0178, "step": 92970 }, { "epoch": 0.6872948759646373, "grad_norm": 0.09765244275331497, "learning_rate": 1.569325735992403e-05, "loss": 0.0188, "step": 92980 }, { "epoch": 0.6873687945359392, "grad_norm": 0.09178492426872253, "learning_rate": 1.568954772079772e-05, "loss": 0.0173, "step": 92990 }, { "epoch": 0.687442713107241, "grad_norm": 0.08355975896120071, "learning_rate": 1.5685838081671417e-05, "loss": 0.0174, "step": 93000 }, { "epoch": 0.6875166316785429, "grad_norm": 0.07787944376468658, "learning_rate": 1.568212844254511e-05, "loss": 0.0153, "step": 93010 }, { "epoch": 0.6875905502498447, "grad_norm": 0.06931786239147186, "learning_rate": 1.5678418803418806e-05, "loss": 0.0188, "step": 93020 }, { "epoch": 0.6876644688211466, "grad_norm": 0.07735756039619446, "learning_rate": 1.5674709164292498e-05, "loss": 0.0196, "step": 93030 }, { "epoch": 0.6877383873924485, "grad_norm": 0.07819321006536484, "learning_rate": 1.5670999525166194e-05, "loss": 0.0171, "step": 93040 }, { "epoch": 0.6878123059637503, "grad_norm": 0.09739043563604355, "learning_rate": 1.5667289886039887e-05, "loss": 0.0177, "step": 93050 }, { "epoch": 0.6878862245350522, "grad_norm": 0.06608149409294128, "learning_rate": 1.5663580246913583e-05, "loss": 0.016, "step": 93060 }, { "epoch": 0.687960143106354, "grad_norm": 0.08745332807302475, "learning_rate": 1.5659870607787275e-05, "loss": 0.0169, "step": 93070 }, { "epoch": 0.6880340616776559, "grad_norm": 0.07087000459432602, "learning_rate": 1.5656160968660968e-05, "loss": 0.016, "step": 93080 }, { "epoch": 0.6881079802489577, "grad_norm": 0.08703728020191193, "learning_rate": 1.5652451329534664e-05, "loss": 0.0154, "step": 93090 }, { "epoch": 0.6881818988202596, "grad_norm": 0.09080562740564346, "learning_rate": 1.564874169040836e-05, "loss": 0.0189, "step": 93100 }, { "epoch": 0.6882558173915615, "grad_norm": 0.09614677727222443, "learning_rate": 1.5645032051282052e-05, "loss": 0.0192, "step": 93110 }, { "epoch": 0.6883297359628633, "grad_norm": 0.06185782700777054, "learning_rate": 1.5641322412155745e-05, "loss": 0.017, "step": 93120 }, { "epoch": 0.6884036545341652, "grad_norm": 0.0741971805691719, "learning_rate": 1.563761277302944e-05, "loss": 0.0175, "step": 93130 }, { "epoch": 0.688477573105467, "grad_norm": 0.0744548961520195, "learning_rate": 1.5633903133903137e-05, "loss": 0.0176, "step": 93140 }, { "epoch": 0.6885514916767689, "grad_norm": 0.07029058784246445, "learning_rate": 1.563019349477683e-05, "loss": 0.0177, "step": 93150 }, { "epoch": 0.6886254102480708, "grad_norm": 0.05117655545473099, "learning_rate": 1.5626483855650522e-05, "loss": 0.0166, "step": 93160 }, { "epoch": 0.6886993288193726, "grad_norm": 0.07720599323511124, "learning_rate": 1.5622774216524218e-05, "loss": 0.02, "step": 93170 }, { "epoch": 0.6887732473906745, "grad_norm": 0.0722067803144455, "learning_rate": 1.5619064577397914e-05, "loss": 0.0179, "step": 93180 }, { "epoch": 0.6888471659619763, "grad_norm": 0.06938211619853973, "learning_rate": 1.5615354938271606e-05, "loss": 0.0162, "step": 93190 }, { "epoch": 0.6889210845332782, "grad_norm": 0.07468469440937042, "learning_rate": 1.56116452991453e-05, "loss": 0.0177, "step": 93200 }, { "epoch": 0.68899500310458, "grad_norm": 0.05706126615405083, "learning_rate": 1.5607935660018995e-05, "loss": 0.0172, "step": 93210 }, { "epoch": 0.6890689216758819, "grad_norm": 0.0980379581451416, "learning_rate": 1.5604226020892688e-05, "loss": 0.019, "step": 93220 }, { "epoch": 0.6891428402471838, "grad_norm": 0.09685836732387543, "learning_rate": 1.5600516381766384e-05, "loss": 0.0173, "step": 93230 }, { "epoch": 0.6892167588184855, "grad_norm": 0.0751485526561737, "learning_rate": 1.5596806742640076e-05, "loss": 0.0171, "step": 93240 }, { "epoch": 0.6892906773897874, "grad_norm": 0.10697372257709503, "learning_rate": 1.5593097103513772e-05, "loss": 0.0173, "step": 93250 }, { "epoch": 0.6893645959610892, "grad_norm": 0.09985894709825516, "learning_rate": 1.5589387464387465e-05, "loss": 0.0184, "step": 93260 }, { "epoch": 0.6894385145323911, "grad_norm": 0.07273690402507782, "learning_rate": 1.558567782526116e-05, "loss": 0.0197, "step": 93270 }, { "epoch": 0.6895124331036929, "grad_norm": 0.059736963361501694, "learning_rate": 1.5581968186134853e-05, "loss": 0.018, "step": 93280 }, { "epoch": 0.6895863516749948, "grad_norm": 0.05851113796234131, "learning_rate": 1.557825854700855e-05, "loss": 0.0174, "step": 93290 }, { "epoch": 0.6896602702462967, "grad_norm": 0.06368900835514069, "learning_rate": 1.5574548907882242e-05, "loss": 0.017, "step": 93300 }, { "epoch": 0.6897341888175985, "grad_norm": 0.077921062707901, "learning_rate": 1.5570839268755934e-05, "loss": 0.0187, "step": 93310 }, { "epoch": 0.6898081073889004, "grad_norm": 0.07463159412145615, "learning_rate": 1.556712962962963e-05, "loss": 0.0193, "step": 93320 }, { "epoch": 0.6898820259602022, "grad_norm": 0.09554771333932877, "learning_rate": 1.5563419990503326e-05, "loss": 0.0191, "step": 93330 }, { "epoch": 0.6899559445315041, "grad_norm": 0.09546215832233429, "learning_rate": 1.555971035137702e-05, "loss": 0.0188, "step": 93340 }, { "epoch": 0.6900298631028059, "grad_norm": 0.08112092316150665, "learning_rate": 1.555600071225071e-05, "loss": 0.018, "step": 93350 }, { "epoch": 0.6901037816741078, "grad_norm": 0.07517974823713303, "learning_rate": 1.5552291073124407e-05, "loss": 0.0143, "step": 93360 }, { "epoch": 0.6901777002454097, "grad_norm": 0.08749409019947052, "learning_rate": 1.5548581433998103e-05, "loss": 0.0191, "step": 93370 }, { "epoch": 0.6902516188167115, "grad_norm": 0.05953217297792435, "learning_rate": 1.5544871794871796e-05, "loss": 0.0176, "step": 93380 }, { "epoch": 0.6903255373880134, "grad_norm": 0.07596410810947418, "learning_rate": 1.554116215574549e-05, "loss": 0.0164, "step": 93390 }, { "epoch": 0.6903994559593152, "grad_norm": 0.07445093989372253, "learning_rate": 1.5537452516619184e-05, "loss": 0.0173, "step": 93400 }, { "epoch": 0.6904733745306171, "grad_norm": 0.08547616004943848, "learning_rate": 1.553374287749288e-05, "loss": 0.0167, "step": 93410 }, { "epoch": 0.690547293101919, "grad_norm": 0.0664495974779129, "learning_rate": 1.5530033238366573e-05, "loss": 0.0194, "step": 93420 }, { "epoch": 0.6906212116732208, "grad_norm": 0.07082750648260117, "learning_rate": 1.5526323599240266e-05, "loss": 0.0164, "step": 93430 }, { "epoch": 0.6906951302445227, "grad_norm": 0.06069180741906166, "learning_rate": 1.552261396011396e-05, "loss": 0.0158, "step": 93440 }, { "epoch": 0.6907690488158245, "grad_norm": 0.10092563927173615, "learning_rate": 1.5518904320987654e-05, "loss": 0.0187, "step": 93450 }, { "epoch": 0.6908429673871264, "grad_norm": 0.04582209512591362, "learning_rate": 1.551519468186135e-05, "loss": 0.0161, "step": 93460 }, { "epoch": 0.6909168859584282, "grad_norm": 0.0675414577126503, "learning_rate": 1.5511485042735043e-05, "loss": 0.0178, "step": 93470 }, { "epoch": 0.69099080452973, "grad_norm": 0.09676419943571091, "learning_rate": 1.550777540360874e-05, "loss": 0.0178, "step": 93480 }, { "epoch": 0.691064723101032, "grad_norm": 0.10699211061000824, "learning_rate": 1.550406576448243e-05, "loss": 0.0191, "step": 93490 }, { "epoch": 0.6911386416723337, "grad_norm": 0.06596608459949493, "learning_rate": 1.5500356125356127e-05, "loss": 0.0165, "step": 93500 }, { "epoch": 0.6912125602436356, "grad_norm": 0.07985042035579681, "learning_rate": 1.549664648622982e-05, "loss": 0.0182, "step": 93510 }, { "epoch": 0.6912864788149374, "grad_norm": 0.0778883844614029, "learning_rate": 1.5492936847103516e-05, "loss": 0.0154, "step": 93520 }, { "epoch": 0.6913603973862393, "grad_norm": 0.07879475504159927, "learning_rate": 1.548922720797721e-05, "loss": 0.0172, "step": 93530 }, { "epoch": 0.6914343159575411, "grad_norm": 0.07020098716020584, "learning_rate": 1.54855175688509e-05, "loss": 0.0182, "step": 93540 }, { "epoch": 0.691508234528843, "grad_norm": 0.0860968828201294, "learning_rate": 1.5481807929724597e-05, "loss": 0.0186, "step": 93550 }, { "epoch": 0.6915821531001449, "grad_norm": 0.06118820235133171, "learning_rate": 1.5478098290598293e-05, "loss": 0.0167, "step": 93560 }, { "epoch": 0.6916560716714467, "grad_norm": 0.09667540341615677, "learning_rate": 1.5474388651471985e-05, "loss": 0.0173, "step": 93570 }, { "epoch": 0.6917299902427486, "grad_norm": 0.05607501044869423, "learning_rate": 1.5470679012345678e-05, "loss": 0.0154, "step": 93580 }, { "epoch": 0.6918039088140504, "grad_norm": 0.05989857390522957, "learning_rate": 1.5466969373219374e-05, "loss": 0.0174, "step": 93590 }, { "epoch": 0.6918778273853523, "grad_norm": 0.21198701858520508, "learning_rate": 1.546325973409307e-05, "loss": 0.0159, "step": 93600 }, { "epoch": 0.6919517459566541, "grad_norm": 0.05558236688375473, "learning_rate": 1.5459550094966763e-05, "loss": 0.0144, "step": 93610 }, { "epoch": 0.692025664527956, "grad_norm": 0.08093948662281036, "learning_rate": 1.5455840455840455e-05, "loss": 0.0157, "step": 93620 }, { "epoch": 0.6920995830992579, "grad_norm": 0.09775768965482712, "learning_rate": 1.545213081671415e-05, "loss": 0.02, "step": 93630 }, { "epoch": 0.6921735016705597, "grad_norm": 0.07269429415464401, "learning_rate": 1.5448421177587847e-05, "loss": 0.0176, "step": 93640 }, { "epoch": 0.6922474202418616, "grad_norm": 0.06962566077709198, "learning_rate": 1.544471153846154e-05, "loss": 0.016, "step": 93650 }, { "epoch": 0.6923213388131634, "grad_norm": 0.0953865498304367, "learning_rate": 1.5441001899335232e-05, "loss": 0.0181, "step": 93660 }, { "epoch": 0.6923952573844653, "grad_norm": 0.0926433652639389, "learning_rate": 1.5437292260208928e-05, "loss": 0.0184, "step": 93670 }, { "epoch": 0.6924691759557672, "grad_norm": 0.04853018373250961, "learning_rate": 1.543358262108262e-05, "loss": 0.014, "step": 93680 }, { "epoch": 0.692543094527069, "grad_norm": 0.07597115635871887, "learning_rate": 1.5429872981956317e-05, "loss": 0.0163, "step": 93690 }, { "epoch": 0.6926170130983709, "grad_norm": 0.10934596508741379, "learning_rate": 1.542616334283001e-05, "loss": 0.017, "step": 93700 }, { "epoch": 0.6926909316696727, "grad_norm": 0.06172508746385574, "learning_rate": 1.5422453703703705e-05, "loss": 0.0138, "step": 93710 }, { "epoch": 0.6927648502409746, "grad_norm": 0.08862422406673431, "learning_rate": 1.5418744064577398e-05, "loss": 0.0208, "step": 93720 }, { "epoch": 0.6928387688122764, "grad_norm": 0.09241653978824615, "learning_rate": 1.5415034425451094e-05, "loss": 0.0205, "step": 93730 }, { "epoch": 0.6929126873835783, "grad_norm": 0.05826287344098091, "learning_rate": 1.5411324786324786e-05, "loss": 0.0154, "step": 93740 }, { "epoch": 0.6929866059548802, "grad_norm": 0.06904604285955429, "learning_rate": 1.5407615147198482e-05, "loss": 0.0168, "step": 93750 }, { "epoch": 0.6930605245261819, "grad_norm": 0.07445508241653442, "learning_rate": 1.5403905508072175e-05, "loss": 0.0162, "step": 93760 }, { "epoch": 0.6931344430974838, "grad_norm": 0.07311367243528366, "learning_rate": 1.5400195868945868e-05, "loss": 0.0177, "step": 93770 }, { "epoch": 0.6932083616687856, "grad_norm": 0.09041839092969894, "learning_rate": 1.5396486229819563e-05, "loss": 0.0197, "step": 93780 }, { "epoch": 0.6932822802400875, "grad_norm": 0.07511784881353378, "learning_rate": 1.539277659069326e-05, "loss": 0.0164, "step": 93790 }, { "epoch": 0.6933561988113893, "grad_norm": 0.0799839198589325, "learning_rate": 1.5389066951566952e-05, "loss": 0.0195, "step": 93800 }, { "epoch": 0.6934301173826912, "grad_norm": 0.09590111672878265, "learning_rate": 1.5385357312440645e-05, "loss": 0.0188, "step": 93810 }, { "epoch": 0.6935040359539931, "grad_norm": 0.08044479787349701, "learning_rate": 1.538164767331434e-05, "loss": 0.0175, "step": 93820 }, { "epoch": 0.6935779545252949, "grad_norm": 0.08378466963768005, "learning_rate": 1.5377938034188037e-05, "loss": 0.0184, "step": 93830 }, { "epoch": 0.6936518730965968, "grad_norm": 0.08196622878313065, "learning_rate": 1.537422839506173e-05, "loss": 0.0189, "step": 93840 }, { "epoch": 0.6937257916678986, "grad_norm": 0.05602654069662094, "learning_rate": 1.537051875593542e-05, "loss": 0.0169, "step": 93850 }, { "epoch": 0.6937997102392005, "grad_norm": 0.07777991145849228, "learning_rate": 1.5366809116809118e-05, "loss": 0.0187, "step": 93860 }, { "epoch": 0.6938736288105023, "grad_norm": 0.12807385623455048, "learning_rate": 1.5363099477682814e-05, "loss": 0.0204, "step": 93870 }, { "epoch": 0.6939475473818042, "grad_norm": 0.08109208196401596, "learning_rate": 1.5359389838556506e-05, "loss": 0.0173, "step": 93880 }, { "epoch": 0.6940214659531061, "grad_norm": 0.06914292275905609, "learning_rate": 1.53556801994302e-05, "loss": 0.0159, "step": 93890 }, { "epoch": 0.6940953845244079, "grad_norm": 0.08813401311635971, "learning_rate": 1.5351970560303895e-05, "loss": 0.0179, "step": 93900 }, { "epoch": 0.6941693030957098, "grad_norm": 0.07378672063350677, "learning_rate": 1.5348260921177587e-05, "loss": 0.017, "step": 93910 }, { "epoch": 0.6942432216670116, "grad_norm": 0.08109115809202194, "learning_rate": 1.5344551282051283e-05, "loss": 0.018, "step": 93920 }, { "epoch": 0.6943171402383135, "grad_norm": 0.08002543449401855, "learning_rate": 1.5340841642924976e-05, "loss": 0.0181, "step": 93930 }, { "epoch": 0.6943910588096154, "grad_norm": 0.07397976517677307, "learning_rate": 1.5337132003798672e-05, "loss": 0.0171, "step": 93940 }, { "epoch": 0.6944649773809172, "grad_norm": 0.05872524902224541, "learning_rate": 1.5333422364672364e-05, "loss": 0.0155, "step": 93950 }, { "epoch": 0.6945388959522191, "grad_norm": 0.14280560612678528, "learning_rate": 1.532971272554606e-05, "loss": 0.0196, "step": 93960 }, { "epoch": 0.6946128145235209, "grad_norm": 0.06711677461862564, "learning_rate": 1.5326003086419753e-05, "loss": 0.0163, "step": 93970 }, { "epoch": 0.6946867330948228, "grad_norm": 0.09471774846315384, "learning_rate": 1.532229344729345e-05, "loss": 0.0169, "step": 93980 }, { "epoch": 0.6947606516661246, "grad_norm": 0.06166717782616615, "learning_rate": 1.531858380816714e-05, "loss": 0.0169, "step": 93990 }, { "epoch": 0.6948345702374265, "grad_norm": 0.06437874585390091, "learning_rate": 1.5314874169040834e-05, "loss": 0.0183, "step": 94000 }, { "epoch": 0.6949084888087284, "grad_norm": 0.07692473381757736, "learning_rate": 1.5311164529914533e-05, "loss": 0.0169, "step": 94010 }, { "epoch": 0.6949824073800301, "grad_norm": 0.11510708928108215, "learning_rate": 1.5307454890788226e-05, "loss": 0.0153, "step": 94020 }, { "epoch": 0.695056325951332, "grad_norm": 0.10411643981933594, "learning_rate": 1.530374525166192e-05, "loss": 0.0216, "step": 94030 }, { "epoch": 0.6951302445226338, "grad_norm": 0.0748017430305481, "learning_rate": 1.530003561253561e-05, "loss": 0.0182, "step": 94040 }, { "epoch": 0.6952041630939357, "grad_norm": 0.07279026508331299, "learning_rate": 1.5296325973409307e-05, "loss": 0.0195, "step": 94050 }, { "epoch": 0.6952780816652375, "grad_norm": 0.07129613310098648, "learning_rate": 1.5292616334283003e-05, "loss": 0.0163, "step": 94060 }, { "epoch": 0.6953520002365394, "grad_norm": 0.05188415199518204, "learning_rate": 1.5288906695156696e-05, "loss": 0.0156, "step": 94070 }, { "epoch": 0.6954259188078413, "grad_norm": 0.056653719395399094, "learning_rate": 1.5285197056030388e-05, "loss": 0.0162, "step": 94080 }, { "epoch": 0.6954998373791431, "grad_norm": 0.06849221140146255, "learning_rate": 1.5281487416904084e-05, "loss": 0.02, "step": 94090 }, { "epoch": 0.695573755950445, "grad_norm": 0.08342500030994415, "learning_rate": 1.527777777777778e-05, "loss": 0.0185, "step": 94100 }, { "epoch": 0.6956476745217468, "grad_norm": 0.07203114032745361, "learning_rate": 1.5274068138651473e-05, "loss": 0.0166, "step": 94110 }, { "epoch": 0.6957215930930487, "grad_norm": 0.06841064244508743, "learning_rate": 1.5270358499525165e-05, "loss": 0.0178, "step": 94120 }, { "epoch": 0.6957955116643505, "grad_norm": 0.07839217036962509, "learning_rate": 1.526664886039886e-05, "loss": 0.0157, "step": 94130 }, { "epoch": 0.6958694302356524, "grad_norm": 0.0921502634882927, "learning_rate": 1.5262939221272554e-05, "loss": 0.0173, "step": 94140 }, { "epoch": 0.6959433488069543, "grad_norm": 0.07723719626665115, "learning_rate": 1.525922958214625e-05, "loss": 0.019, "step": 94150 }, { "epoch": 0.6960172673782561, "grad_norm": 0.07200010120868683, "learning_rate": 1.5255519943019944e-05, "loss": 0.0166, "step": 94160 }, { "epoch": 0.696091185949558, "grad_norm": 0.06452837586402893, "learning_rate": 1.5251810303893638e-05, "loss": 0.0162, "step": 94170 }, { "epoch": 0.6961651045208598, "grad_norm": 0.06620864570140839, "learning_rate": 1.5248100664767331e-05, "loss": 0.0177, "step": 94180 }, { "epoch": 0.6962390230921617, "grad_norm": 0.12665487825870514, "learning_rate": 1.5244391025641027e-05, "loss": 0.0195, "step": 94190 }, { "epoch": 0.6963129416634636, "grad_norm": 0.08196282386779785, "learning_rate": 1.5240681386514721e-05, "loss": 0.0181, "step": 94200 }, { "epoch": 0.6963868602347654, "grad_norm": 0.0876227468252182, "learning_rate": 1.5236971747388416e-05, "loss": 0.0189, "step": 94210 }, { "epoch": 0.6964607788060673, "grad_norm": 0.07745127379894257, "learning_rate": 1.5233262108262108e-05, "loss": 0.0168, "step": 94220 }, { "epoch": 0.6965346973773691, "grad_norm": 0.05537101626396179, "learning_rate": 1.5229552469135802e-05, "loss": 0.0172, "step": 94230 }, { "epoch": 0.696608615948671, "grad_norm": 0.09687453508377075, "learning_rate": 1.5225842830009498e-05, "loss": 0.0176, "step": 94240 }, { "epoch": 0.6966825345199728, "grad_norm": 0.08245055377483368, "learning_rate": 1.5222133190883193e-05, "loss": 0.0171, "step": 94250 }, { "epoch": 0.6967564530912747, "grad_norm": 0.07196832448244095, "learning_rate": 1.5218423551756885e-05, "loss": 0.0171, "step": 94260 }, { "epoch": 0.6968303716625766, "grad_norm": 0.08605038374662399, "learning_rate": 1.521471391263058e-05, "loss": 0.0174, "step": 94270 }, { "epoch": 0.6969042902338783, "grad_norm": 0.15191549062728882, "learning_rate": 1.5211004273504274e-05, "loss": 0.0201, "step": 94280 }, { "epoch": 0.6969782088051802, "grad_norm": 0.08578886091709137, "learning_rate": 1.520729463437797e-05, "loss": 0.0175, "step": 94290 }, { "epoch": 0.697052127376482, "grad_norm": 0.0731656476855278, "learning_rate": 1.5203584995251662e-05, "loss": 0.0163, "step": 94300 }, { "epoch": 0.6971260459477839, "grad_norm": 0.09269023686647415, "learning_rate": 1.5199875356125357e-05, "loss": 0.0186, "step": 94310 }, { "epoch": 0.6971999645190857, "grad_norm": 0.08424928039312363, "learning_rate": 1.519616571699905e-05, "loss": 0.0179, "step": 94320 }, { "epoch": 0.6972738830903876, "grad_norm": 0.06722105294466019, "learning_rate": 1.5192456077872747e-05, "loss": 0.0177, "step": 94330 }, { "epoch": 0.6973478016616895, "grad_norm": 0.06906666606664658, "learning_rate": 1.518874643874644e-05, "loss": 0.0169, "step": 94340 }, { "epoch": 0.6974217202329913, "grad_norm": 0.07479233294725418, "learning_rate": 1.5185036799620134e-05, "loss": 0.017, "step": 94350 }, { "epoch": 0.6974956388042932, "grad_norm": 0.08008567243814468, "learning_rate": 1.5181327160493828e-05, "loss": 0.0194, "step": 94360 }, { "epoch": 0.697569557375595, "grad_norm": 0.08511345088481903, "learning_rate": 1.517761752136752e-05, "loss": 0.0173, "step": 94370 }, { "epoch": 0.6976434759468969, "grad_norm": 0.07214191555976868, "learning_rate": 1.5173907882241216e-05, "loss": 0.0153, "step": 94380 }, { "epoch": 0.6977173945181987, "grad_norm": 0.09548451006412506, "learning_rate": 1.517019824311491e-05, "loss": 0.0199, "step": 94390 }, { "epoch": 0.6977913130895006, "grad_norm": 0.11812444776296616, "learning_rate": 1.5166488603988605e-05, "loss": 0.0155, "step": 94400 }, { "epoch": 0.6978652316608025, "grad_norm": 0.06574191153049469, "learning_rate": 1.5162778964862298e-05, "loss": 0.0165, "step": 94410 }, { "epoch": 0.6979391502321043, "grad_norm": 0.07519920915365219, "learning_rate": 1.5159069325735995e-05, "loss": 0.0178, "step": 94420 }, { "epoch": 0.6980130688034062, "grad_norm": 0.0758148655295372, "learning_rate": 1.5155359686609688e-05, "loss": 0.0179, "step": 94430 }, { "epoch": 0.698086987374708, "grad_norm": 0.09478045254945755, "learning_rate": 1.5151650047483382e-05, "loss": 0.0193, "step": 94440 }, { "epoch": 0.6981609059460099, "grad_norm": 0.07561103254556656, "learning_rate": 1.5147940408357075e-05, "loss": 0.015, "step": 94450 }, { "epoch": 0.6982348245173118, "grad_norm": 0.08076457679271698, "learning_rate": 1.5144230769230769e-05, "loss": 0.0161, "step": 94460 }, { "epoch": 0.6983087430886136, "grad_norm": 0.07646799832582474, "learning_rate": 1.5140521130104465e-05, "loss": 0.0185, "step": 94470 }, { "epoch": 0.6983826616599155, "grad_norm": 0.07224825024604797, "learning_rate": 1.513681149097816e-05, "loss": 0.0182, "step": 94480 }, { "epoch": 0.6984565802312173, "grad_norm": 0.09400869160890579, "learning_rate": 1.5133101851851852e-05, "loss": 0.0185, "step": 94490 }, { "epoch": 0.6985304988025192, "grad_norm": 0.08666172623634338, "learning_rate": 1.5129392212725546e-05, "loss": 0.0158, "step": 94500 }, { "epoch": 0.698604417373821, "grad_norm": 0.0649208202958107, "learning_rate": 1.512568257359924e-05, "loss": 0.0184, "step": 94510 }, { "epoch": 0.6986783359451229, "grad_norm": 0.07728701084852219, "learning_rate": 1.5121972934472936e-05, "loss": 0.0188, "step": 94520 }, { "epoch": 0.6987522545164248, "grad_norm": 0.09042263776063919, "learning_rate": 1.5118263295346629e-05, "loss": 0.0165, "step": 94530 }, { "epoch": 0.6988261730877265, "grad_norm": 0.0655745267868042, "learning_rate": 1.5114553656220323e-05, "loss": 0.0181, "step": 94540 }, { "epoch": 0.6989000916590284, "grad_norm": 0.0579184852540493, "learning_rate": 1.5110844017094017e-05, "loss": 0.0143, "step": 94550 }, { "epoch": 0.6989740102303302, "grad_norm": 0.07687050104141235, "learning_rate": 1.5107134377967713e-05, "loss": 0.0193, "step": 94560 }, { "epoch": 0.6990479288016321, "grad_norm": 0.06710931658744812, "learning_rate": 1.5103424738841408e-05, "loss": 0.0162, "step": 94570 }, { "epoch": 0.6991218473729339, "grad_norm": 0.07855264842510223, "learning_rate": 1.50997150997151e-05, "loss": 0.0186, "step": 94580 }, { "epoch": 0.6991957659442358, "grad_norm": 0.08692429959774017, "learning_rate": 1.5096005460588794e-05, "loss": 0.0177, "step": 94590 }, { "epoch": 0.6992696845155377, "grad_norm": 0.09486404061317444, "learning_rate": 1.5092295821462487e-05, "loss": 0.0145, "step": 94600 }, { "epoch": 0.6993436030868395, "grad_norm": 0.07637245953083038, "learning_rate": 1.5088586182336185e-05, "loss": 0.0168, "step": 94610 }, { "epoch": 0.6994175216581414, "grad_norm": 0.08055321127176285, "learning_rate": 1.5084876543209877e-05, "loss": 0.0167, "step": 94620 }, { "epoch": 0.6994914402294432, "grad_norm": 0.06514108926057816, "learning_rate": 1.5081166904083572e-05, "loss": 0.0174, "step": 94630 }, { "epoch": 0.6995653588007451, "grad_norm": 0.08226980268955231, "learning_rate": 1.5077457264957264e-05, "loss": 0.0176, "step": 94640 }, { "epoch": 0.699639277372047, "grad_norm": 0.0900607705116272, "learning_rate": 1.5073747625830962e-05, "loss": 0.018, "step": 94650 }, { "epoch": 0.6997131959433488, "grad_norm": 0.059451647102832794, "learning_rate": 1.5070037986704654e-05, "loss": 0.0184, "step": 94660 }, { "epoch": 0.6997871145146507, "grad_norm": 0.0902848169207573, "learning_rate": 1.5066328347578349e-05, "loss": 0.0193, "step": 94670 }, { "epoch": 0.6998610330859525, "grad_norm": 0.06665327399969101, "learning_rate": 1.5062618708452041e-05, "loss": 0.02, "step": 94680 }, { "epoch": 0.6999349516572544, "grad_norm": 0.08193585276603699, "learning_rate": 1.5058909069325736e-05, "loss": 0.0169, "step": 94690 }, { "epoch": 0.7000088702285562, "grad_norm": 0.09058967977762222, "learning_rate": 1.5055199430199431e-05, "loss": 0.0156, "step": 94700 }, { "epoch": 0.7000827887998581, "grad_norm": 0.05407675355672836, "learning_rate": 1.5051489791073126e-05, "loss": 0.0171, "step": 94710 }, { "epoch": 0.70015670737116, "grad_norm": 0.08992829918861389, "learning_rate": 1.504778015194682e-05, "loss": 0.0173, "step": 94720 }, { "epoch": 0.7002306259424618, "grad_norm": 0.08389969915151596, "learning_rate": 1.5044070512820513e-05, "loss": 0.0191, "step": 94730 }, { "epoch": 0.7003045445137637, "grad_norm": 0.08148416876792908, "learning_rate": 1.5040360873694207e-05, "loss": 0.0202, "step": 94740 }, { "epoch": 0.7003784630850655, "grad_norm": 0.06622910499572754, "learning_rate": 1.5036651234567903e-05, "loss": 0.0184, "step": 94750 }, { "epoch": 0.7004523816563674, "grad_norm": 0.29766276478767395, "learning_rate": 1.5032941595441597e-05, "loss": 0.0181, "step": 94760 }, { "epoch": 0.7005263002276692, "grad_norm": 0.08713862299919128, "learning_rate": 1.502923195631529e-05, "loss": 0.0188, "step": 94770 }, { "epoch": 0.700600218798971, "grad_norm": 0.08461463451385498, "learning_rate": 1.5025522317188984e-05, "loss": 0.0183, "step": 94780 }, { "epoch": 0.700674137370273, "grad_norm": 0.06767699867486954, "learning_rate": 1.502181267806268e-05, "loss": 0.0147, "step": 94790 }, { "epoch": 0.7007480559415747, "grad_norm": 0.09660223871469498, "learning_rate": 1.5018103038936374e-05, "loss": 0.016, "step": 94800 }, { "epoch": 0.7008219745128766, "grad_norm": 0.08179505914449692, "learning_rate": 1.5014393399810067e-05, "loss": 0.0173, "step": 94810 }, { "epoch": 0.7008958930841784, "grad_norm": 0.1166461706161499, "learning_rate": 1.5010683760683761e-05, "loss": 0.0196, "step": 94820 }, { "epoch": 0.7009698116554803, "grad_norm": 0.14224278926849365, "learning_rate": 1.5006974121557454e-05, "loss": 0.019, "step": 94830 }, { "epoch": 0.7010437302267821, "grad_norm": 0.05852314084768295, "learning_rate": 1.5003264482431151e-05, "loss": 0.0145, "step": 94840 }, { "epoch": 0.701117648798084, "grad_norm": 0.06998267769813538, "learning_rate": 1.4999554843304844e-05, "loss": 0.019, "step": 94850 }, { "epoch": 0.7011915673693859, "grad_norm": 0.07684922963380814, "learning_rate": 1.4995845204178538e-05, "loss": 0.0169, "step": 94860 }, { "epoch": 0.7012654859406877, "grad_norm": 0.08372074365615845, "learning_rate": 1.4992135565052232e-05, "loss": 0.0173, "step": 94870 }, { "epoch": 0.7013394045119896, "grad_norm": 0.07502642273902893, "learning_rate": 1.4988425925925928e-05, "loss": 0.0167, "step": 94880 }, { "epoch": 0.7014133230832914, "grad_norm": 0.07391461730003357, "learning_rate": 1.4984716286799621e-05, "loss": 0.0163, "step": 94890 }, { "epoch": 0.7014872416545933, "grad_norm": 0.07972786575555801, "learning_rate": 1.4981006647673315e-05, "loss": 0.0189, "step": 94900 }, { "epoch": 0.7015611602258952, "grad_norm": 0.05219242349267006, "learning_rate": 1.497729700854701e-05, "loss": 0.0152, "step": 94910 }, { "epoch": 0.701635078797197, "grad_norm": 0.07841724902391434, "learning_rate": 1.4973587369420702e-05, "loss": 0.016, "step": 94920 }, { "epoch": 0.7017089973684989, "grad_norm": 0.097462959587574, "learning_rate": 1.4969877730294398e-05, "loss": 0.016, "step": 94930 }, { "epoch": 0.7017829159398007, "grad_norm": 0.07454212754964828, "learning_rate": 1.4966168091168092e-05, "loss": 0.0145, "step": 94940 }, { "epoch": 0.7018568345111026, "grad_norm": 0.07699607312679291, "learning_rate": 1.4962458452041787e-05, "loss": 0.0201, "step": 94950 }, { "epoch": 0.7019307530824044, "grad_norm": 0.07101486623287201, "learning_rate": 1.495874881291548e-05, "loss": 0.0164, "step": 94960 }, { "epoch": 0.7020046716537063, "grad_norm": 0.07824986428022385, "learning_rate": 1.4955039173789173e-05, "loss": 0.0177, "step": 94970 }, { "epoch": 0.7020785902250082, "grad_norm": 0.08556605130434036, "learning_rate": 1.495132953466287e-05, "loss": 0.0178, "step": 94980 }, { "epoch": 0.70215250879631, "grad_norm": 0.10653487592935562, "learning_rate": 1.4947619895536564e-05, "loss": 0.0197, "step": 94990 }, { "epoch": 0.7022264273676119, "grad_norm": 0.08999094367027283, "learning_rate": 1.4943910256410256e-05, "loss": 0.0155, "step": 95000 }, { "epoch": 0.7023003459389137, "grad_norm": 0.08729143440723419, "learning_rate": 1.494020061728395e-05, "loss": 0.0179, "step": 95010 }, { "epoch": 0.7023742645102156, "grad_norm": 0.07924710214138031, "learning_rate": 1.4936490978157647e-05, "loss": 0.0206, "step": 95020 }, { "epoch": 0.7024481830815174, "grad_norm": 0.07349123060703278, "learning_rate": 1.493278133903134e-05, "loss": 0.0185, "step": 95030 }, { "epoch": 0.7025221016528193, "grad_norm": 0.08930125087499619, "learning_rate": 1.4929071699905033e-05, "loss": 0.0153, "step": 95040 }, { "epoch": 0.7025960202241212, "grad_norm": 0.06959878653287888, "learning_rate": 1.4925362060778728e-05, "loss": 0.0183, "step": 95050 }, { "epoch": 0.7026699387954229, "grad_norm": 0.06968756020069122, "learning_rate": 1.4921652421652422e-05, "loss": 0.0179, "step": 95060 }, { "epoch": 0.7027438573667248, "grad_norm": 0.07830172032117844, "learning_rate": 1.4917942782526118e-05, "loss": 0.0153, "step": 95070 }, { "epoch": 0.7028177759380266, "grad_norm": 0.11358001828193665, "learning_rate": 1.491423314339981e-05, "loss": 0.0166, "step": 95080 }, { "epoch": 0.7028916945093285, "grad_norm": 0.0876796543598175, "learning_rate": 1.4910523504273505e-05, "loss": 0.0185, "step": 95090 }, { "epoch": 0.7029656130806303, "grad_norm": 0.0739654153585434, "learning_rate": 1.4906813865147199e-05, "loss": 0.0168, "step": 95100 }, { "epoch": 0.7030395316519322, "grad_norm": 0.06776399165391922, "learning_rate": 1.4903104226020895e-05, "loss": 0.0155, "step": 95110 }, { "epoch": 0.7031134502232341, "grad_norm": 0.08771048486232758, "learning_rate": 1.4899394586894588e-05, "loss": 0.0163, "step": 95120 }, { "epoch": 0.7031873687945359, "grad_norm": 0.07175265997648239, "learning_rate": 1.4895684947768282e-05, "loss": 0.0205, "step": 95130 }, { "epoch": 0.7032612873658378, "grad_norm": 0.08065624535083771, "learning_rate": 1.4891975308641976e-05, "loss": 0.019, "step": 95140 }, { "epoch": 0.7033352059371396, "grad_norm": 0.08298975974321365, "learning_rate": 1.4888265669515669e-05, "loss": 0.016, "step": 95150 }, { "epoch": 0.7034091245084415, "grad_norm": 0.0918532982468605, "learning_rate": 1.4884556030389366e-05, "loss": 0.0179, "step": 95160 }, { "epoch": 0.7034830430797434, "grad_norm": 0.09895238280296326, "learning_rate": 1.4880846391263059e-05, "loss": 0.0182, "step": 95170 }, { "epoch": 0.7035569616510452, "grad_norm": 0.06671979278326035, "learning_rate": 1.4877136752136753e-05, "loss": 0.0182, "step": 95180 }, { "epoch": 0.7036308802223471, "grad_norm": 0.08994031697511673, "learning_rate": 1.4873427113010446e-05, "loss": 0.0178, "step": 95190 }, { "epoch": 0.7037047987936489, "grad_norm": 0.11199507862329483, "learning_rate": 1.486971747388414e-05, "loss": 0.0201, "step": 95200 }, { "epoch": 0.7037787173649508, "grad_norm": 0.09525155276060104, "learning_rate": 1.4866007834757836e-05, "loss": 0.0172, "step": 95210 }, { "epoch": 0.7038526359362526, "grad_norm": 0.06155973672866821, "learning_rate": 1.486229819563153e-05, "loss": 0.0168, "step": 95220 }, { "epoch": 0.7039265545075545, "grad_norm": 0.07256151735782623, "learning_rate": 1.4858588556505223e-05, "loss": 0.0168, "step": 95230 }, { "epoch": 0.7040004730788564, "grad_norm": 0.06860183924436569, "learning_rate": 1.4854878917378917e-05, "loss": 0.0194, "step": 95240 }, { "epoch": 0.7040743916501582, "grad_norm": 0.085331492125988, "learning_rate": 1.4851169278252613e-05, "loss": 0.0177, "step": 95250 }, { "epoch": 0.7041483102214601, "grad_norm": 0.08582129329442978, "learning_rate": 1.4847459639126307e-05, "loss": 0.0152, "step": 95260 }, { "epoch": 0.7042222287927619, "grad_norm": 0.07819168269634247, "learning_rate": 1.484375e-05, "loss": 0.0161, "step": 95270 }, { "epoch": 0.7042961473640638, "grad_norm": 0.07004784792661667, "learning_rate": 1.4840040360873694e-05, "loss": 0.0194, "step": 95280 }, { "epoch": 0.7043700659353656, "grad_norm": 0.09493345767259598, "learning_rate": 1.4836330721747389e-05, "loss": 0.0189, "step": 95290 }, { "epoch": 0.7044439845066675, "grad_norm": 0.06889653950929642, "learning_rate": 1.4832621082621084e-05, "loss": 0.0178, "step": 95300 }, { "epoch": 0.7045179030779694, "grad_norm": 0.0887618362903595, "learning_rate": 1.4828911443494777e-05, "loss": 0.0185, "step": 95310 }, { "epoch": 0.7045918216492711, "grad_norm": 0.071207694709301, "learning_rate": 1.4825201804368471e-05, "loss": 0.0178, "step": 95320 }, { "epoch": 0.704665740220573, "grad_norm": 0.09956490993499756, "learning_rate": 1.4821492165242166e-05, "loss": 0.0156, "step": 95330 }, { "epoch": 0.7047396587918748, "grad_norm": 0.09358590841293335, "learning_rate": 1.4817782526115862e-05, "loss": 0.0174, "step": 95340 }, { "epoch": 0.7048135773631767, "grad_norm": 0.07501141726970673, "learning_rate": 1.4814072886989556e-05, "loss": 0.0161, "step": 95350 }, { "epoch": 0.7048874959344785, "grad_norm": 0.04576000198721886, "learning_rate": 1.4810363247863248e-05, "loss": 0.0171, "step": 95360 }, { "epoch": 0.7049614145057804, "grad_norm": 0.06188805401325226, "learning_rate": 1.4806653608736943e-05, "loss": 0.0149, "step": 95370 }, { "epoch": 0.7050353330770823, "grad_norm": 0.08435463160276413, "learning_rate": 1.4802943969610635e-05, "loss": 0.0181, "step": 95380 }, { "epoch": 0.7051092516483841, "grad_norm": 0.10611864924430847, "learning_rate": 1.4799234330484333e-05, "loss": 0.0191, "step": 95390 }, { "epoch": 0.705183170219686, "grad_norm": 0.08419112861156464, "learning_rate": 1.4795524691358026e-05, "loss": 0.0175, "step": 95400 }, { "epoch": 0.7052570887909878, "grad_norm": 0.09311195462942123, "learning_rate": 1.479181505223172e-05, "loss": 0.0174, "step": 95410 }, { "epoch": 0.7053310073622897, "grad_norm": 0.0805216059088707, "learning_rate": 1.4788105413105412e-05, "loss": 0.018, "step": 95420 }, { "epoch": 0.7054049259335916, "grad_norm": 0.07350783795118332, "learning_rate": 1.4784395773979107e-05, "loss": 0.0182, "step": 95430 }, { "epoch": 0.7054788445048934, "grad_norm": 0.07063649594783783, "learning_rate": 1.4780686134852803e-05, "loss": 0.0176, "step": 95440 }, { "epoch": 0.7055527630761953, "grad_norm": 0.06820762902498245, "learning_rate": 1.4776976495726497e-05, "loss": 0.0176, "step": 95450 }, { "epoch": 0.7056266816474971, "grad_norm": 0.07071997225284576, "learning_rate": 1.477326685660019e-05, "loss": 0.0172, "step": 95460 }, { "epoch": 0.705700600218799, "grad_norm": 0.0830804631114006, "learning_rate": 1.4769557217473884e-05, "loss": 0.0161, "step": 95470 }, { "epoch": 0.7057745187901008, "grad_norm": 0.09889290481805801, "learning_rate": 1.476584757834758e-05, "loss": 0.0187, "step": 95480 }, { "epoch": 0.7058484373614027, "grad_norm": 0.08065763860940933, "learning_rate": 1.4762137939221274e-05, "loss": 0.0172, "step": 95490 }, { "epoch": 0.7059223559327046, "grad_norm": 0.06085206940770149, "learning_rate": 1.4758428300094968e-05, "loss": 0.0151, "step": 95500 }, { "epoch": 0.7059962745040064, "grad_norm": 0.07296063005924225, "learning_rate": 1.475471866096866e-05, "loss": 0.0191, "step": 95510 }, { "epoch": 0.7060701930753083, "grad_norm": 0.06595694273710251, "learning_rate": 1.4751009021842355e-05, "loss": 0.0188, "step": 95520 }, { "epoch": 0.7061441116466101, "grad_norm": 0.09198103100061417, "learning_rate": 1.4747299382716051e-05, "loss": 0.0151, "step": 95530 }, { "epoch": 0.706218030217912, "grad_norm": 0.054292045533657074, "learning_rate": 1.4743589743589745e-05, "loss": 0.0175, "step": 95540 }, { "epoch": 0.7062919487892138, "grad_norm": 0.08294742554426193, "learning_rate": 1.4739880104463438e-05, "loss": 0.0178, "step": 95550 }, { "epoch": 0.7063658673605157, "grad_norm": 0.07073783874511719, "learning_rate": 1.4736170465337132e-05, "loss": 0.0181, "step": 95560 }, { "epoch": 0.7064397859318176, "grad_norm": 0.07501115649938583, "learning_rate": 1.4732460826210828e-05, "loss": 0.0155, "step": 95570 }, { "epoch": 0.7065137045031193, "grad_norm": 0.07367327064275742, "learning_rate": 1.4728751187084522e-05, "loss": 0.0175, "step": 95580 }, { "epoch": 0.7065876230744212, "grad_norm": 0.08878722041845322, "learning_rate": 1.4725041547958215e-05, "loss": 0.0184, "step": 95590 }, { "epoch": 0.706661541645723, "grad_norm": 0.07486210018396378, "learning_rate": 1.472133190883191e-05, "loss": 0.0172, "step": 95600 }, { "epoch": 0.7067354602170249, "grad_norm": 0.0776633620262146, "learning_rate": 1.4717622269705602e-05, "loss": 0.0181, "step": 95610 }, { "epoch": 0.7068093787883267, "grad_norm": 0.08359479904174805, "learning_rate": 1.47139126305793e-05, "loss": 0.0156, "step": 95620 }, { "epoch": 0.7068832973596286, "grad_norm": 0.10812906175851822, "learning_rate": 1.4710202991452992e-05, "loss": 0.0197, "step": 95630 }, { "epoch": 0.7069572159309305, "grad_norm": 0.07726120203733444, "learning_rate": 1.4706493352326686e-05, "loss": 0.0178, "step": 95640 }, { "epoch": 0.7070311345022323, "grad_norm": 0.0754457488656044, "learning_rate": 1.470278371320038e-05, "loss": 0.0184, "step": 95650 }, { "epoch": 0.7071050530735342, "grad_norm": 0.11202505975961685, "learning_rate": 1.4699074074074073e-05, "loss": 0.0209, "step": 95660 }, { "epoch": 0.707178971644836, "grad_norm": 0.0659521147608757, "learning_rate": 1.469536443494777e-05, "loss": 0.0176, "step": 95670 }, { "epoch": 0.7072528902161379, "grad_norm": 0.05878068879246712, "learning_rate": 1.4691654795821463e-05, "loss": 0.0138, "step": 95680 }, { "epoch": 0.7073268087874398, "grad_norm": 0.09369215369224548, "learning_rate": 1.4687945156695158e-05, "loss": 0.0211, "step": 95690 }, { "epoch": 0.7074007273587416, "grad_norm": 0.09781701862812042, "learning_rate": 1.468423551756885e-05, "loss": 0.0199, "step": 95700 }, { "epoch": 0.7074746459300435, "grad_norm": 0.067824587225914, "learning_rate": 1.4680525878442546e-05, "loss": 0.0175, "step": 95710 }, { "epoch": 0.7075485645013453, "grad_norm": 0.06842079013586044, "learning_rate": 1.467681623931624e-05, "loss": 0.0183, "step": 95720 }, { "epoch": 0.7076224830726472, "grad_norm": 0.09361150115728378, "learning_rate": 1.4673106600189935e-05, "loss": 0.0174, "step": 95730 }, { "epoch": 0.707696401643949, "grad_norm": 0.0677383691072464, "learning_rate": 1.4669396961063627e-05, "loss": 0.0177, "step": 95740 }, { "epoch": 0.7077703202152509, "grad_norm": 0.05748044326901436, "learning_rate": 1.4665687321937322e-05, "loss": 0.018, "step": 95750 }, { "epoch": 0.7078442387865528, "grad_norm": 0.09779369831085205, "learning_rate": 1.4661977682811018e-05, "loss": 0.0185, "step": 95760 }, { "epoch": 0.7079181573578546, "grad_norm": 0.08810590952634811, "learning_rate": 1.4658268043684712e-05, "loss": 0.0192, "step": 95770 }, { "epoch": 0.7079920759291565, "grad_norm": 0.11388625204563141, "learning_rate": 1.4654558404558405e-05, "loss": 0.0197, "step": 95780 }, { "epoch": 0.7080659945004583, "grad_norm": 0.10341662913560867, "learning_rate": 1.4650848765432099e-05, "loss": 0.021, "step": 95790 }, { "epoch": 0.7081399130717602, "grad_norm": 0.1025422066450119, "learning_rate": 1.4647139126305795e-05, "loss": 0.0179, "step": 95800 }, { "epoch": 0.708213831643062, "grad_norm": 0.07308562099933624, "learning_rate": 1.4643429487179489e-05, "loss": 0.0166, "step": 95810 }, { "epoch": 0.7082877502143639, "grad_norm": 0.09482390433549881, "learning_rate": 1.4639719848053182e-05, "loss": 0.0141, "step": 95820 }, { "epoch": 0.7083616687856658, "grad_norm": 0.07176923751831055, "learning_rate": 1.4636010208926876e-05, "loss": 0.0182, "step": 95830 }, { "epoch": 0.7084355873569675, "grad_norm": 0.08835030347108841, "learning_rate": 1.463230056980057e-05, "loss": 0.0179, "step": 95840 }, { "epoch": 0.7085095059282694, "grad_norm": 0.07899758964776993, "learning_rate": 1.4628590930674266e-05, "loss": 0.017, "step": 95850 }, { "epoch": 0.7085834244995712, "grad_norm": 0.07400030642747879, "learning_rate": 1.4624881291547959e-05, "loss": 0.0173, "step": 95860 }, { "epoch": 0.7086573430708731, "grad_norm": 0.07127097994089127, "learning_rate": 1.4621171652421653e-05, "loss": 0.018, "step": 95870 }, { "epoch": 0.7087312616421749, "grad_norm": 0.13907699286937714, "learning_rate": 1.4617462013295347e-05, "loss": 0.0206, "step": 95880 }, { "epoch": 0.7088051802134768, "grad_norm": 0.07832183688879013, "learning_rate": 1.461375237416904e-05, "loss": 0.0166, "step": 95890 }, { "epoch": 0.7088790987847787, "grad_norm": 0.06786512583494186, "learning_rate": 1.4610042735042736e-05, "loss": 0.0158, "step": 95900 }, { "epoch": 0.7089530173560805, "grad_norm": 0.08504045754671097, "learning_rate": 1.460633309591643e-05, "loss": 0.0173, "step": 95910 }, { "epoch": 0.7090269359273824, "grad_norm": 0.06429767608642578, "learning_rate": 1.4602623456790124e-05, "loss": 0.0162, "step": 95920 }, { "epoch": 0.7091008544986842, "grad_norm": 0.12872451543807983, "learning_rate": 1.4598913817663817e-05, "loss": 0.0183, "step": 95930 }, { "epoch": 0.7091747730699861, "grad_norm": 0.07814126461744308, "learning_rate": 1.4595204178537515e-05, "loss": 0.0165, "step": 95940 }, { "epoch": 0.709248691641288, "grad_norm": 0.07006963342428207, "learning_rate": 1.4591494539411207e-05, "loss": 0.0152, "step": 95950 }, { "epoch": 0.7093226102125898, "grad_norm": 0.061831723898649216, "learning_rate": 1.4587784900284901e-05, "loss": 0.0163, "step": 95960 }, { "epoch": 0.7093965287838917, "grad_norm": 0.1168350800871849, "learning_rate": 1.4584075261158594e-05, "loss": 0.0185, "step": 95970 }, { "epoch": 0.7094704473551935, "grad_norm": 0.07854729890823364, "learning_rate": 1.4580365622032288e-05, "loss": 0.0172, "step": 95980 }, { "epoch": 0.7095443659264954, "grad_norm": 0.07101375609636307, "learning_rate": 1.4576655982905984e-05, "loss": 0.0188, "step": 95990 }, { "epoch": 0.7096182844977972, "grad_norm": 0.07454732060432434, "learning_rate": 1.4572946343779679e-05, "loss": 0.0178, "step": 96000 }, { "epoch": 0.7096922030690991, "grad_norm": 0.055436041206121445, "learning_rate": 1.4569236704653371e-05, "loss": 0.017, "step": 96010 }, { "epoch": 0.709766121640401, "grad_norm": 0.08497800678014755, "learning_rate": 1.4565527065527065e-05, "loss": 0.0165, "step": 96020 }, { "epoch": 0.7098400402117028, "grad_norm": 0.08286574482917786, "learning_rate": 1.4561817426400761e-05, "loss": 0.0162, "step": 96030 }, { "epoch": 0.7099139587830047, "grad_norm": 0.07201328873634338, "learning_rate": 1.4558107787274456e-05, "loss": 0.0163, "step": 96040 }, { "epoch": 0.7099878773543065, "grad_norm": 0.07630084455013275, "learning_rate": 1.4554398148148148e-05, "loss": 0.0162, "step": 96050 }, { "epoch": 0.7100617959256084, "grad_norm": 0.06601142138242722, "learning_rate": 1.4550688509021842e-05, "loss": 0.0179, "step": 96060 }, { "epoch": 0.7101357144969102, "grad_norm": 0.07810642570257187, "learning_rate": 1.4546978869895537e-05, "loss": 0.0198, "step": 96070 }, { "epoch": 0.710209633068212, "grad_norm": 0.09582499414682388, "learning_rate": 1.4543269230769233e-05, "loss": 0.0167, "step": 96080 }, { "epoch": 0.710283551639514, "grad_norm": 0.09497883915901184, "learning_rate": 1.4539559591642927e-05, "loss": 0.0189, "step": 96090 }, { "epoch": 0.7103574702108157, "grad_norm": 0.06800592690706253, "learning_rate": 1.453584995251662e-05, "loss": 0.0179, "step": 96100 }, { "epoch": 0.7104313887821176, "grad_norm": 0.07267574220895767, "learning_rate": 1.4532140313390314e-05, "loss": 0.0178, "step": 96110 }, { "epoch": 0.7105053073534194, "grad_norm": 0.0896543338894844, "learning_rate": 1.4528430674264006e-05, "loss": 0.0189, "step": 96120 }, { "epoch": 0.7105792259247213, "grad_norm": 0.07230860739946365, "learning_rate": 1.4524721035137704e-05, "loss": 0.0149, "step": 96130 }, { "epoch": 0.7106531444960231, "grad_norm": 0.07519300282001495, "learning_rate": 1.4521011396011397e-05, "loss": 0.0166, "step": 96140 }, { "epoch": 0.710727063067325, "grad_norm": 0.07765395939350128, "learning_rate": 1.4517301756885091e-05, "loss": 0.0166, "step": 96150 }, { "epoch": 0.7108009816386269, "grad_norm": 0.07616063952445984, "learning_rate": 1.4513592117758783e-05, "loss": 0.0157, "step": 96160 }, { "epoch": 0.7108749002099287, "grad_norm": 0.052234821021556854, "learning_rate": 1.4509882478632481e-05, "loss": 0.0188, "step": 96170 }, { "epoch": 0.7109488187812306, "grad_norm": 0.07081520557403564, "learning_rate": 1.4506172839506174e-05, "loss": 0.0181, "step": 96180 }, { "epoch": 0.7110227373525324, "grad_norm": 0.08544421195983887, "learning_rate": 1.4502463200379868e-05, "loss": 0.0196, "step": 96190 }, { "epoch": 0.7110966559238343, "grad_norm": 0.08664252609014511, "learning_rate": 1.449875356125356e-05, "loss": 0.0155, "step": 96200 }, { "epoch": 0.7111705744951362, "grad_norm": 0.08272361755371094, "learning_rate": 1.4495043922127255e-05, "loss": 0.0176, "step": 96210 }, { "epoch": 0.711244493066438, "grad_norm": 0.08338142931461334, "learning_rate": 1.449133428300095e-05, "loss": 0.0179, "step": 96220 }, { "epoch": 0.7113184116377399, "grad_norm": 0.08550018072128296, "learning_rate": 1.4487624643874645e-05, "loss": 0.0198, "step": 96230 }, { "epoch": 0.7113923302090417, "grad_norm": 0.06720562279224396, "learning_rate": 1.4483915004748338e-05, "loss": 0.0176, "step": 96240 }, { "epoch": 0.7114662487803436, "grad_norm": 0.06509919464588165, "learning_rate": 1.4480205365622032e-05, "loss": 0.0177, "step": 96250 }, { "epoch": 0.7115401673516454, "grad_norm": 0.11599702388048172, "learning_rate": 1.4476495726495728e-05, "loss": 0.0174, "step": 96260 }, { "epoch": 0.7116140859229473, "grad_norm": 0.05585482716560364, "learning_rate": 1.4472786087369422e-05, "loss": 0.0179, "step": 96270 }, { "epoch": 0.7116880044942492, "grad_norm": 0.0665430873632431, "learning_rate": 1.4469076448243116e-05, "loss": 0.0175, "step": 96280 }, { "epoch": 0.711761923065551, "grad_norm": 0.06822658330202103, "learning_rate": 1.4465366809116809e-05, "loss": 0.0168, "step": 96290 }, { "epoch": 0.7118358416368529, "grad_norm": 0.10566496849060059, "learning_rate": 1.4461657169990503e-05, "loss": 0.0155, "step": 96300 }, { "epoch": 0.7119097602081547, "grad_norm": 0.09547118842601776, "learning_rate": 1.44579475308642e-05, "loss": 0.0189, "step": 96310 }, { "epoch": 0.7119836787794566, "grad_norm": 0.09716902673244476, "learning_rate": 1.4454237891737894e-05, "loss": 0.0185, "step": 96320 }, { "epoch": 0.7120575973507584, "grad_norm": 0.07762455195188522, "learning_rate": 1.4450528252611586e-05, "loss": 0.0188, "step": 96330 }, { "epoch": 0.7121315159220603, "grad_norm": 0.07455060631036758, "learning_rate": 1.444681861348528e-05, "loss": 0.0161, "step": 96340 }, { "epoch": 0.7122054344933622, "grad_norm": 0.08196079730987549, "learning_rate": 1.4443108974358973e-05, "loss": 0.0183, "step": 96350 }, { "epoch": 0.712279353064664, "grad_norm": 0.08345690369606018, "learning_rate": 1.443939933523267e-05, "loss": 0.0198, "step": 96360 }, { "epoch": 0.7123532716359658, "grad_norm": 0.10257066786289215, "learning_rate": 1.4435689696106363e-05, "loss": 0.0179, "step": 96370 }, { "epoch": 0.7124271902072676, "grad_norm": 0.06419511884450912, "learning_rate": 1.4431980056980057e-05, "loss": 0.0149, "step": 96380 }, { "epoch": 0.7125011087785695, "grad_norm": 0.06590508669614792, "learning_rate": 1.442827041785375e-05, "loss": 0.0184, "step": 96390 }, { "epoch": 0.7125750273498714, "grad_norm": 0.09475826472043991, "learning_rate": 1.4424560778727448e-05, "loss": 0.0171, "step": 96400 }, { "epoch": 0.7126489459211732, "grad_norm": 0.08667121827602386, "learning_rate": 1.442085113960114e-05, "loss": 0.0184, "step": 96410 }, { "epoch": 0.7127228644924751, "grad_norm": 0.08919225633144379, "learning_rate": 1.4417141500474835e-05, "loss": 0.0192, "step": 96420 }, { "epoch": 0.7127967830637769, "grad_norm": 0.07371910661458969, "learning_rate": 1.4413431861348529e-05, "loss": 0.0187, "step": 96430 }, { "epoch": 0.7128707016350788, "grad_norm": 0.13033123314380646, "learning_rate": 1.4409722222222221e-05, "loss": 0.0187, "step": 96440 }, { "epoch": 0.7129446202063806, "grad_norm": 0.06924816966056824, "learning_rate": 1.4406012583095917e-05, "loss": 0.0157, "step": 96450 }, { "epoch": 0.7130185387776825, "grad_norm": 0.07701105624437332, "learning_rate": 1.4402302943969612e-05, "loss": 0.0158, "step": 96460 }, { "epoch": 0.7130924573489844, "grad_norm": 0.07914204150438309, "learning_rate": 1.4398593304843306e-05, "loss": 0.019, "step": 96470 }, { "epoch": 0.7131663759202862, "grad_norm": 0.07155069708824158, "learning_rate": 1.4394883665716999e-05, "loss": 0.0181, "step": 96480 }, { "epoch": 0.7132402944915881, "grad_norm": 0.06861402094364166, "learning_rate": 1.4391174026590694e-05, "loss": 0.0167, "step": 96490 }, { "epoch": 0.7133142130628899, "grad_norm": 0.09447457641363144, "learning_rate": 1.4387464387464389e-05, "loss": 0.0182, "step": 96500 }, { "epoch": 0.7133881316341918, "grad_norm": 0.06595803052186966, "learning_rate": 1.4383754748338083e-05, "loss": 0.0154, "step": 96510 }, { "epoch": 0.7134620502054936, "grad_norm": 0.09120450913906097, "learning_rate": 1.4380045109211776e-05, "loss": 0.0184, "step": 96520 }, { "epoch": 0.7135359687767955, "grad_norm": 0.07662360370159149, "learning_rate": 1.437633547008547e-05, "loss": 0.0185, "step": 96530 }, { "epoch": 0.7136098873480974, "grad_norm": 0.09823779761791229, "learning_rate": 1.4372625830959166e-05, "loss": 0.0204, "step": 96540 }, { "epoch": 0.7136838059193992, "grad_norm": 0.06132419407367706, "learning_rate": 1.436891619183286e-05, "loss": 0.0172, "step": 96550 }, { "epoch": 0.7137577244907011, "grad_norm": 0.09777303785085678, "learning_rate": 1.4365206552706553e-05, "loss": 0.017, "step": 96560 }, { "epoch": 0.7138316430620029, "grad_norm": 0.07330077141523361, "learning_rate": 1.4361496913580247e-05, "loss": 0.0168, "step": 96570 }, { "epoch": 0.7139055616333048, "grad_norm": 0.07575954496860504, "learning_rate": 1.4357787274453941e-05, "loss": 0.0191, "step": 96580 }, { "epoch": 0.7139794802046066, "grad_norm": 0.0686848908662796, "learning_rate": 1.4354077635327637e-05, "loss": 0.0161, "step": 96590 }, { "epoch": 0.7140533987759085, "grad_norm": 0.0778437927365303, "learning_rate": 1.435036799620133e-05, "loss": 0.0158, "step": 96600 }, { "epoch": 0.7141273173472104, "grad_norm": 0.08872485160827637, "learning_rate": 1.4346658357075024e-05, "loss": 0.0198, "step": 96610 }, { "epoch": 0.7142012359185121, "grad_norm": 0.0839143618941307, "learning_rate": 1.4342948717948718e-05, "loss": 0.0168, "step": 96620 }, { "epoch": 0.714275154489814, "grad_norm": 0.08605623990297318, "learning_rate": 1.4339239078822414e-05, "loss": 0.0174, "step": 96630 }, { "epoch": 0.7143490730611158, "grad_norm": 0.07410331070423126, "learning_rate": 1.4335529439696107e-05, "loss": 0.0156, "step": 96640 }, { "epoch": 0.7144229916324177, "grad_norm": 0.09648150205612183, "learning_rate": 1.4331819800569801e-05, "loss": 0.0164, "step": 96650 }, { "epoch": 0.7144969102037196, "grad_norm": 0.09856808930635452, "learning_rate": 1.4328110161443495e-05, "loss": 0.019, "step": 96660 }, { "epoch": 0.7145708287750214, "grad_norm": 0.08042283356189728, "learning_rate": 1.4324400522317188e-05, "loss": 0.0173, "step": 96670 }, { "epoch": 0.7146447473463233, "grad_norm": 0.06512616574764252, "learning_rate": 1.4320690883190884e-05, "loss": 0.0179, "step": 96680 }, { "epoch": 0.7147186659176251, "grad_norm": 0.0875067338347435, "learning_rate": 1.4316981244064578e-05, "loss": 0.0182, "step": 96690 }, { "epoch": 0.714792584488927, "grad_norm": 0.10024929791688919, "learning_rate": 1.4313271604938273e-05, "loss": 0.0189, "step": 96700 }, { "epoch": 0.7148665030602288, "grad_norm": 0.09748955070972443, "learning_rate": 1.4309561965811965e-05, "loss": 0.0184, "step": 96710 }, { "epoch": 0.7149404216315307, "grad_norm": 0.07124093919992447, "learning_rate": 1.4305852326685663e-05, "loss": 0.0193, "step": 96720 }, { "epoch": 0.7150143402028326, "grad_norm": 0.07453083992004395, "learning_rate": 1.4302142687559355e-05, "loss": 0.0187, "step": 96730 }, { "epoch": 0.7150882587741344, "grad_norm": 0.08479517698287964, "learning_rate": 1.429843304843305e-05, "loss": 0.0182, "step": 96740 }, { "epoch": 0.7151621773454363, "grad_norm": 0.08077602833509445, "learning_rate": 1.4294723409306742e-05, "loss": 0.0165, "step": 96750 }, { "epoch": 0.7152360959167381, "grad_norm": 0.10075265169143677, "learning_rate": 1.4291013770180436e-05, "loss": 0.0179, "step": 96760 }, { "epoch": 0.71531001448804, "grad_norm": 0.06620938330888748, "learning_rate": 1.4287304131054132e-05, "loss": 0.0173, "step": 96770 }, { "epoch": 0.7153839330593418, "grad_norm": 0.08356441557407379, "learning_rate": 1.4283594491927827e-05, "loss": 0.0168, "step": 96780 }, { "epoch": 0.7154578516306437, "grad_norm": 0.06950430572032928, "learning_rate": 1.427988485280152e-05, "loss": 0.0164, "step": 96790 }, { "epoch": 0.7155317702019456, "grad_norm": 0.10832403600215912, "learning_rate": 1.4276175213675214e-05, "loss": 0.0193, "step": 96800 }, { "epoch": 0.7156056887732474, "grad_norm": 0.0809079110622406, "learning_rate": 1.4272465574548908e-05, "loss": 0.0152, "step": 96810 }, { "epoch": 0.7156796073445493, "grad_norm": 0.05348746106028557, "learning_rate": 1.4268755935422604e-05, "loss": 0.0186, "step": 96820 }, { "epoch": 0.7157535259158511, "grad_norm": 0.1107306033372879, "learning_rate": 1.4265046296296296e-05, "loss": 0.0185, "step": 96830 }, { "epoch": 0.715827444487153, "grad_norm": 0.06968053430318832, "learning_rate": 1.426133665716999e-05, "loss": 0.0187, "step": 96840 }, { "epoch": 0.7159013630584548, "grad_norm": 0.07479491084814072, "learning_rate": 1.4257627018043685e-05, "loss": 0.0154, "step": 96850 }, { "epoch": 0.7159752816297567, "grad_norm": 0.0738830491900444, "learning_rate": 1.4253917378917381e-05, "loss": 0.0167, "step": 96860 }, { "epoch": 0.7160492002010586, "grad_norm": 0.0806659460067749, "learning_rate": 1.4250207739791075e-05, "loss": 0.017, "step": 96870 }, { "epoch": 0.7161231187723603, "grad_norm": 0.0961422547698021, "learning_rate": 1.4246498100664768e-05, "loss": 0.0184, "step": 96880 }, { "epoch": 0.7161970373436622, "grad_norm": 0.07728654146194458, "learning_rate": 1.4242788461538462e-05, "loss": 0.0191, "step": 96890 }, { "epoch": 0.716270955914964, "grad_norm": 0.08907140791416168, "learning_rate": 1.4239078822412155e-05, "loss": 0.0166, "step": 96900 }, { "epoch": 0.7163448744862659, "grad_norm": 0.08190717548131943, "learning_rate": 1.4235369183285852e-05, "loss": 0.0178, "step": 96910 }, { "epoch": 0.7164187930575678, "grad_norm": 0.07787206023931503, "learning_rate": 1.4231659544159545e-05, "loss": 0.0171, "step": 96920 }, { "epoch": 0.7164927116288696, "grad_norm": 0.06619174778461456, "learning_rate": 1.4227949905033239e-05, "loss": 0.0153, "step": 96930 }, { "epoch": 0.7165666302001715, "grad_norm": 0.0749562531709671, "learning_rate": 1.4224240265906932e-05, "loss": 0.0188, "step": 96940 }, { "epoch": 0.7166405487714733, "grad_norm": 0.09903047978878021, "learning_rate": 1.422053062678063e-05, "loss": 0.0176, "step": 96950 }, { "epoch": 0.7167144673427752, "grad_norm": 0.09285420179367065, "learning_rate": 1.4216820987654322e-05, "loss": 0.0189, "step": 96960 }, { "epoch": 0.716788385914077, "grad_norm": 0.0756186917424202, "learning_rate": 1.4213111348528016e-05, "loss": 0.0204, "step": 96970 }, { "epoch": 0.7168623044853789, "grad_norm": 0.08044464141130447, "learning_rate": 1.4209401709401709e-05, "loss": 0.0161, "step": 96980 }, { "epoch": 0.7169362230566808, "grad_norm": 0.0663600042462349, "learning_rate": 1.4205692070275403e-05, "loss": 0.0149, "step": 96990 }, { "epoch": 0.7170101416279826, "grad_norm": 0.08343540877103806, "learning_rate": 1.4201982431149099e-05, "loss": 0.0169, "step": 97000 }, { "epoch": 0.7170840601992845, "grad_norm": 0.08704724907875061, "learning_rate": 1.4198272792022793e-05, "loss": 0.0169, "step": 97010 }, { "epoch": 0.7171579787705863, "grad_norm": 0.10420592129230499, "learning_rate": 1.4194563152896488e-05, "loss": 0.015, "step": 97020 }, { "epoch": 0.7172318973418882, "grad_norm": 0.08785660564899445, "learning_rate": 1.419085351377018e-05, "loss": 0.0155, "step": 97030 }, { "epoch": 0.71730581591319, "grad_norm": 0.084081269800663, "learning_rate": 1.4187143874643874e-05, "loss": 0.0153, "step": 97040 }, { "epoch": 0.7173797344844919, "grad_norm": 0.07698749750852585, "learning_rate": 1.418343423551757e-05, "loss": 0.0182, "step": 97050 }, { "epoch": 0.7174536530557938, "grad_norm": 0.06612184643745422, "learning_rate": 1.4179724596391265e-05, "loss": 0.0192, "step": 97060 }, { "epoch": 0.7175275716270956, "grad_norm": 0.06976378709077835, "learning_rate": 1.4176014957264957e-05, "loss": 0.016, "step": 97070 }, { "epoch": 0.7176014901983975, "grad_norm": 0.07119813561439514, "learning_rate": 1.4172305318138652e-05, "loss": 0.0179, "step": 97080 }, { "epoch": 0.7176754087696993, "grad_norm": 0.0721423551440239, "learning_rate": 1.4168595679012347e-05, "loss": 0.0175, "step": 97090 }, { "epoch": 0.7177493273410012, "grad_norm": 0.06006765365600586, "learning_rate": 1.4164886039886042e-05, "loss": 0.0173, "step": 97100 }, { "epoch": 0.717823245912303, "grad_norm": 0.07218063622713089, "learning_rate": 1.4161176400759734e-05, "loss": 0.0179, "step": 97110 }, { "epoch": 0.7178971644836049, "grad_norm": 0.06732066720724106, "learning_rate": 1.4157466761633429e-05, "loss": 0.0174, "step": 97120 }, { "epoch": 0.7179710830549068, "grad_norm": 0.05910402163863182, "learning_rate": 1.4153757122507121e-05, "loss": 0.0173, "step": 97130 }, { "epoch": 0.7180450016262085, "grad_norm": 0.12241929024457932, "learning_rate": 1.4150047483380819e-05, "loss": 0.0175, "step": 97140 }, { "epoch": 0.7181189201975104, "grad_norm": 0.06028750538825989, "learning_rate": 1.4146337844254511e-05, "loss": 0.0197, "step": 97150 }, { "epoch": 0.7181928387688122, "grad_norm": 0.061230018734931946, "learning_rate": 1.4142628205128206e-05, "loss": 0.015, "step": 97160 }, { "epoch": 0.7182667573401141, "grad_norm": 0.09459489583969116, "learning_rate": 1.41389185660019e-05, "loss": 0.0184, "step": 97170 }, { "epoch": 0.718340675911416, "grad_norm": 0.06572078168392181, "learning_rate": 1.4135208926875596e-05, "loss": 0.016, "step": 97180 }, { "epoch": 0.7184145944827178, "grad_norm": 0.09846540540456772, "learning_rate": 1.4131499287749289e-05, "loss": 0.017, "step": 97190 }, { "epoch": 0.7184885130540197, "grad_norm": 0.06211007758975029, "learning_rate": 1.4127789648622983e-05, "loss": 0.0171, "step": 97200 }, { "epoch": 0.7185624316253215, "grad_norm": 0.0772789940237999, "learning_rate": 1.4124080009496677e-05, "loss": 0.0152, "step": 97210 }, { "epoch": 0.7186363501966234, "grad_norm": 0.08457238227128983, "learning_rate": 1.412037037037037e-05, "loss": 0.0158, "step": 97220 }, { "epoch": 0.7187102687679252, "grad_norm": 0.06557153910398483, "learning_rate": 1.4116660731244066e-05, "loss": 0.017, "step": 97230 }, { "epoch": 0.7187841873392271, "grad_norm": 0.0879523754119873, "learning_rate": 1.411295109211776e-05, "loss": 0.0179, "step": 97240 }, { "epoch": 0.718858105910529, "grad_norm": 0.058569859713315964, "learning_rate": 1.4109241452991454e-05, "loss": 0.016, "step": 97250 }, { "epoch": 0.7189320244818308, "grad_norm": 0.09156788885593414, "learning_rate": 1.4105531813865147e-05, "loss": 0.0185, "step": 97260 }, { "epoch": 0.7190059430531327, "grad_norm": 0.0831763818860054, "learning_rate": 1.4101822174738841e-05, "loss": 0.0174, "step": 97270 }, { "epoch": 0.7190798616244345, "grad_norm": 0.08405617624521255, "learning_rate": 1.4098112535612537e-05, "loss": 0.018, "step": 97280 }, { "epoch": 0.7191537801957364, "grad_norm": 0.06453635543584824, "learning_rate": 1.4094402896486231e-05, "loss": 0.0183, "step": 97290 }, { "epoch": 0.7192276987670382, "grad_norm": 0.08332324028015137, "learning_rate": 1.4090693257359924e-05, "loss": 0.0166, "step": 97300 }, { "epoch": 0.7193016173383401, "grad_norm": 0.07621225714683533, "learning_rate": 1.4086983618233618e-05, "loss": 0.0166, "step": 97310 }, { "epoch": 0.719375535909642, "grad_norm": 0.0854366272687912, "learning_rate": 1.4083273979107314e-05, "loss": 0.0169, "step": 97320 }, { "epoch": 0.7194494544809438, "grad_norm": 0.08591848611831665, "learning_rate": 1.4079564339981008e-05, "loss": 0.0178, "step": 97330 }, { "epoch": 0.7195233730522457, "grad_norm": 0.10342957079410553, "learning_rate": 1.4075854700854701e-05, "loss": 0.0176, "step": 97340 }, { "epoch": 0.7195972916235475, "grad_norm": 0.08080706000328064, "learning_rate": 1.4072145061728395e-05, "loss": 0.0174, "step": 97350 }, { "epoch": 0.7196712101948494, "grad_norm": 0.06645863503217697, "learning_rate": 1.406843542260209e-05, "loss": 0.0161, "step": 97360 }, { "epoch": 0.7197451287661512, "grad_norm": 0.0648173987865448, "learning_rate": 1.4064725783475785e-05, "loss": 0.0199, "step": 97370 }, { "epoch": 0.719819047337453, "grad_norm": 0.10382978618144989, "learning_rate": 1.4061016144349478e-05, "loss": 0.0173, "step": 97380 }, { "epoch": 0.719892965908755, "grad_norm": 0.07804905623197556, "learning_rate": 1.4057306505223172e-05, "loss": 0.0194, "step": 97390 }, { "epoch": 0.7199668844800567, "grad_norm": 0.06643490493297577, "learning_rate": 1.4053596866096867e-05, "loss": 0.0168, "step": 97400 }, { "epoch": 0.7200408030513586, "grad_norm": 0.0706179067492485, "learning_rate": 1.4049887226970563e-05, "loss": 0.0181, "step": 97410 }, { "epoch": 0.7201147216226604, "grad_norm": 0.056075792759656906, "learning_rate": 1.4046177587844255e-05, "loss": 0.0179, "step": 97420 }, { "epoch": 0.7201886401939623, "grad_norm": 0.0818905308842659, "learning_rate": 1.404246794871795e-05, "loss": 0.0167, "step": 97430 }, { "epoch": 0.7202625587652642, "grad_norm": 0.06715161353349686, "learning_rate": 1.4038758309591644e-05, "loss": 0.0185, "step": 97440 }, { "epoch": 0.720336477336566, "grad_norm": 0.0666624903678894, "learning_rate": 1.4035048670465336e-05, "loss": 0.0188, "step": 97450 }, { "epoch": 0.7204103959078679, "grad_norm": 0.05913471058011055, "learning_rate": 1.4031339031339034e-05, "loss": 0.0175, "step": 97460 }, { "epoch": 0.7204843144791697, "grad_norm": 0.057986270636320114, "learning_rate": 1.4027629392212726e-05, "loss": 0.0173, "step": 97470 }, { "epoch": 0.7205582330504716, "grad_norm": 0.09989604353904724, "learning_rate": 1.402391975308642e-05, "loss": 0.0155, "step": 97480 }, { "epoch": 0.7206321516217734, "grad_norm": 0.10672522336244583, "learning_rate": 1.4020210113960113e-05, "loss": 0.0167, "step": 97490 }, { "epoch": 0.7207060701930753, "grad_norm": 0.07857657223939896, "learning_rate": 1.4016500474833808e-05, "loss": 0.0185, "step": 97500 }, { "epoch": 0.7207799887643772, "grad_norm": 0.08305259793996811, "learning_rate": 1.4012790835707504e-05, "loss": 0.0181, "step": 97510 }, { "epoch": 0.720853907335679, "grad_norm": 0.06638690084218979, "learning_rate": 1.4009081196581198e-05, "loss": 0.0154, "step": 97520 }, { "epoch": 0.7209278259069809, "grad_norm": 0.07285010069608688, "learning_rate": 1.400537155745489e-05, "loss": 0.0167, "step": 97530 }, { "epoch": 0.7210017444782827, "grad_norm": 0.07371450960636139, "learning_rate": 1.4001661918328585e-05, "loss": 0.016, "step": 97540 }, { "epoch": 0.7210756630495846, "grad_norm": 0.07550600171089172, "learning_rate": 1.399795227920228e-05, "loss": 0.0158, "step": 97550 }, { "epoch": 0.7211495816208864, "grad_norm": 0.09500684589147568, "learning_rate": 1.3994242640075975e-05, "loss": 0.0177, "step": 97560 }, { "epoch": 0.7212235001921883, "grad_norm": 0.061078622937202454, "learning_rate": 1.3990533000949667e-05, "loss": 0.0185, "step": 97570 }, { "epoch": 0.7212974187634902, "grad_norm": 0.07436969131231308, "learning_rate": 1.3986823361823362e-05, "loss": 0.0152, "step": 97580 }, { "epoch": 0.721371337334792, "grad_norm": 0.08949553966522217, "learning_rate": 1.3983113722697056e-05, "loss": 0.0186, "step": 97590 }, { "epoch": 0.7214452559060939, "grad_norm": 0.07141807675361633, "learning_rate": 1.3979404083570752e-05, "loss": 0.0146, "step": 97600 }, { "epoch": 0.7215191744773957, "grad_norm": 0.095971018075943, "learning_rate": 1.3975694444444445e-05, "loss": 0.0189, "step": 97610 }, { "epoch": 0.7215930930486976, "grad_norm": 0.0835772380232811, "learning_rate": 1.3971984805318139e-05, "loss": 0.0163, "step": 97620 }, { "epoch": 0.7216670116199994, "grad_norm": 0.11169163882732391, "learning_rate": 1.3968275166191833e-05, "loss": 0.0192, "step": 97630 }, { "epoch": 0.7217409301913013, "grad_norm": 0.10236402601003647, "learning_rate": 1.3964565527065529e-05, "loss": 0.0194, "step": 97640 }, { "epoch": 0.7218148487626032, "grad_norm": 0.07134838402271271, "learning_rate": 1.3960855887939223e-05, "loss": 0.0176, "step": 97650 }, { "epoch": 0.721888767333905, "grad_norm": 0.07170452922582626, "learning_rate": 1.3957146248812916e-05, "loss": 0.0161, "step": 97660 }, { "epoch": 0.7219626859052068, "grad_norm": 0.09556709975004196, "learning_rate": 1.395343660968661e-05, "loss": 0.0182, "step": 97670 }, { "epoch": 0.7220366044765086, "grad_norm": 0.08802442252635956, "learning_rate": 1.3949726970560303e-05, "loss": 0.0172, "step": 97680 }, { "epoch": 0.7221105230478105, "grad_norm": 0.07534075528383255, "learning_rate": 1.3946017331434e-05, "loss": 0.0179, "step": 97690 }, { "epoch": 0.7221844416191124, "grad_norm": 0.07446454465389252, "learning_rate": 1.3942307692307693e-05, "loss": 0.0162, "step": 97700 }, { "epoch": 0.7222583601904142, "grad_norm": 0.07789715379476547, "learning_rate": 1.3938598053181387e-05, "loss": 0.0183, "step": 97710 }, { "epoch": 0.7223322787617161, "grad_norm": 0.08578293025493622, "learning_rate": 1.393488841405508e-05, "loss": 0.0174, "step": 97720 }, { "epoch": 0.7224061973330179, "grad_norm": 0.08506274223327637, "learning_rate": 1.3931178774928774e-05, "loss": 0.0184, "step": 97730 }, { "epoch": 0.7224801159043198, "grad_norm": 0.058572497218847275, "learning_rate": 1.392746913580247e-05, "loss": 0.0171, "step": 97740 }, { "epoch": 0.7225540344756216, "grad_norm": 0.052253492176532745, "learning_rate": 1.3923759496676164e-05, "loss": 0.0178, "step": 97750 }, { "epoch": 0.7226279530469235, "grad_norm": 0.09432969242334366, "learning_rate": 1.3920049857549857e-05, "loss": 0.0155, "step": 97760 }, { "epoch": 0.7227018716182254, "grad_norm": 0.0906846895813942, "learning_rate": 1.3916340218423551e-05, "loss": 0.0194, "step": 97770 }, { "epoch": 0.7227757901895272, "grad_norm": 0.05747510865330696, "learning_rate": 1.3912630579297247e-05, "loss": 0.018, "step": 97780 }, { "epoch": 0.7228497087608291, "grad_norm": 0.05860042944550514, "learning_rate": 1.3908920940170941e-05, "loss": 0.0154, "step": 97790 }, { "epoch": 0.7229236273321309, "grad_norm": 0.06754317134618759, "learning_rate": 1.3905211301044636e-05, "loss": 0.0205, "step": 97800 }, { "epoch": 0.7229975459034328, "grad_norm": 0.07940924167633057, "learning_rate": 1.3901501661918328e-05, "loss": 0.0194, "step": 97810 }, { "epoch": 0.7230714644747346, "grad_norm": 0.0894060730934143, "learning_rate": 1.3897792022792023e-05, "loss": 0.0174, "step": 97820 }, { "epoch": 0.7231453830460365, "grad_norm": 0.10713989287614822, "learning_rate": 1.3894082383665719e-05, "loss": 0.0174, "step": 97830 }, { "epoch": 0.7232193016173384, "grad_norm": 0.09438996016979218, "learning_rate": 1.3890372744539413e-05, "loss": 0.015, "step": 97840 }, { "epoch": 0.7232932201886402, "grad_norm": 0.09098189324140549, "learning_rate": 1.3886663105413105e-05, "loss": 0.0186, "step": 97850 }, { "epoch": 0.7233671387599421, "grad_norm": 0.06245177239179611, "learning_rate": 1.38829534662868e-05, "loss": 0.0162, "step": 97860 }, { "epoch": 0.7234410573312439, "grad_norm": 0.06434362381696701, "learning_rate": 1.3879243827160496e-05, "loss": 0.0173, "step": 97870 }, { "epoch": 0.7235149759025458, "grad_norm": 0.10157263278961182, "learning_rate": 1.387553418803419e-05, "loss": 0.0206, "step": 97880 }, { "epoch": 0.7235888944738476, "grad_norm": 0.07744581252336502, "learning_rate": 1.3871824548907883e-05, "loss": 0.0176, "step": 97890 }, { "epoch": 0.7236628130451495, "grad_norm": 0.07699330151081085, "learning_rate": 1.3868114909781577e-05, "loss": 0.015, "step": 97900 }, { "epoch": 0.7237367316164514, "grad_norm": 0.09393465518951416, "learning_rate": 1.386440527065527e-05, "loss": 0.0145, "step": 97910 }, { "epoch": 0.7238106501877531, "grad_norm": 0.1100480780005455, "learning_rate": 1.3860695631528967e-05, "loss": 0.0178, "step": 97920 }, { "epoch": 0.723884568759055, "grad_norm": 0.08318780362606049, "learning_rate": 1.385698599240266e-05, "loss": 0.0164, "step": 97930 }, { "epoch": 0.7239584873303568, "grad_norm": 0.09401154518127441, "learning_rate": 1.3853276353276354e-05, "loss": 0.0172, "step": 97940 }, { "epoch": 0.7240324059016587, "grad_norm": 0.05973022058606148, "learning_rate": 1.3849566714150048e-05, "loss": 0.0158, "step": 97950 }, { "epoch": 0.7241063244729606, "grad_norm": 0.09328263252973557, "learning_rate": 1.3845857075023744e-05, "loss": 0.0177, "step": 97960 }, { "epoch": 0.7241802430442624, "grad_norm": 0.08581507951021194, "learning_rate": 1.3842147435897437e-05, "loss": 0.0179, "step": 97970 }, { "epoch": 0.7242541616155643, "grad_norm": 0.08177319914102554, "learning_rate": 1.3838437796771131e-05, "loss": 0.0188, "step": 97980 }, { "epoch": 0.7243280801868661, "grad_norm": 0.07062500715255737, "learning_rate": 1.3834728157644825e-05, "loss": 0.0152, "step": 97990 }, { "epoch": 0.724401998758168, "grad_norm": 0.06682254374027252, "learning_rate": 1.3831018518518518e-05, "loss": 0.0189, "step": 98000 }, { "epoch": 0.7244759173294698, "grad_norm": 0.09271026402711868, "learning_rate": 1.3827308879392214e-05, "loss": 0.0164, "step": 98010 }, { "epoch": 0.7245498359007717, "grad_norm": 0.0880376547574997, "learning_rate": 1.3823599240265908e-05, "loss": 0.0169, "step": 98020 }, { "epoch": 0.7246237544720736, "grad_norm": 0.06539382040500641, "learning_rate": 1.3819889601139602e-05, "loss": 0.0167, "step": 98030 }, { "epoch": 0.7246976730433754, "grad_norm": 0.0701875388622284, "learning_rate": 1.3816179962013295e-05, "loss": 0.0183, "step": 98040 }, { "epoch": 0.7247715916146773, "grad_norm": 0.1255258321762085, "learning_rate": 1.381247032288699e-05, "loss": 0.0179, "step": 98050 }, { "epoch": 0.7248455101859791, "grad_norm": 0.09884500503540039, "learning_rate": 1.3808760683760685e-05, "loss": 0.0185, "step": 98060 }, { "epoch": 0.724919428757281, "grad_norm": 0.061767254024744034, "learning_rate": 1.380505104463438e-05, "loss": 0.0167, "step": 98070 }, { "epoch": 0.7249933473285828, "grad_norm": 0.07481275498867035, "learning_rate": 1.3801341405508072e-05, "loss": 0.0157, "step": 98080 }, { "epoch": 0.7250672658998847, "grad_norm": 0.07772032916545868, "learning_rate": 1.3797631766381766e-05, "loss": 0.0175, "step": 98090 }, { "epoch": 0.7251411844711866, "grad_norm": 0.08376848697662354, "learning_rate": 1.3793922127255462e-05, "loss": 0.017, "step": 98100 }, { "epoch": 0.7252151030424884, "grad_norm": 0.07278087735176086, "learning_rate": 1.3790212488129157e-05, "loss": 0.0165, "step": 98110 }, { "epoch": 0.7252890216137903, "grad_norm": 0.0768652856349945, "learning_rate": 1.3786502849002849e-05, "loss": 0.0156, "step": 98120 }, { "epoch": 0.7253629401850921, "grad_norm": 0.08204212039709091, "learning_rate": 1.3782793209876543e-05, "loss": 0.0193, "step": 98130 }, { "epoch": 0.725436858756394, "grad_norm": 0.07756412774324417, "learning_rate": 1.3779083570750238e-05, "loss": 0.0173, "step": 98140 }, { "epoch": 0.7255107773276958, "grad_norm": 0.09154937416315079, "learning_rate": 1.3775373931623934e-05, "loss": 0.0184, "step": 98150 }, { "epoch": 0.7255846958989977, "grad_norm": 0.06443267315626144, "learning_rate": 1.3771664292497626e-05, "loss": 0.0194, "step": 98160 }, { "epoch": 0.7256586144702996, "grad_norm": 0.07392708957195282, "learning_rate": 1.376795465337132e-05, "loss": 0.014, "step": 98170 }, { "epoch": 0.7257325330416013, "grad_norm": 0.06653968244791031, "learning_rate": 1.3764245014245015e-05, "loss": 0.0175, "step": 98180 }, { "epoch": 0.7258064516129032, "grad_norm": 0.0918004959821701, "learning_rate": 1.376053537511871e-05, "loss": 0.0177, "step": 98190 }, { "epoch": 0.725880370184205, "grad_norm": 0.08654023706912994, "learning_rate": 1.3756825735992403e-05, "loss": 0.0187, "step": 98200 }, { "epoch": 0.7259542887555069, "grad_norm": 0.08185164630413055, "learning_rate": 1.3753116096866098e-05, "loss": 0.018, "step": 98210 }, { "epoch": 0.7260282073268088, "grad_norm": 0.06029609963297844, "learning_rate": 1.3749406457739792e-05, "loss": 0.018, "step": 98220 }, { "epoch": 0.7261021258981106, "grad_norm": 0.058451976627111435, "learning_rate": 1.3745696818613484e-05, "loss": 0.0168, "step": 98230 }, { "epoch": 0.7261760444694125, "grad_norm": 0.09028119593858719, "learning_rate": 1.3741987179487182e-05, "loss": 0.0163, "step": 98240 }, { "epoch": 0.7262499630407143, "grad_norm": 0.072898730635643, "learning_rate": 1.3738277540360875e-05, "loss": 0.014, "step": 98250 }, { "epoch": 0.7263238816120162, "grad_norm": 0.0665789544582367, "learning_rate": 1.3734567901234569e-05, "loss": 0.0192, "step": 98260 }, { "epoch": 0.726397800183318, "grad_norm": 0.0644986480474472, "learning_rate": 1.3730858262108262e-05, "loss": 0.0168, "step": 98270 }, { "epoch": 0.7264717187546199, "grad_norm": 0.076494500041008, "learning_rate": 1.3727148622981956e-05, "loss": 0.0156, "step": 98280 }, { "epoch": 0.7265456373259218, "grad_norm": 0.07643983513116837, "learning_rate": 1.3723438983855652e-05, "loss": 0.0173, "step": 98290 }, { "epoch": 0.7266195558972236, "grad_norm": 0.0882069543004036, "learning_rate": 1.3719729344729346e-05, "loss": 0.0176, "step": 98300 }, { "epoch": 0.7266934744685255, "grad_norm": 0.0776149332523346, "learning_rate": 1.3716019705603039e-05, "loss": 0.017, "step": 98310 }, { "epoch": 0.7267673930398273, "grad_norm": 0.09404109418392181, "learning_rate": 1.3712310066476733e-05, "loss": 0.0178, "step": 98320 }, { "epoch": 0.7268413116111292, "grad_norm": 0.08796057850122452, "learning_rate": 1.3708600427350429e-05, "loss": 0.0177, "step": 98330 }, { "epoch": 0.726915230182431, "grad_norm": 0.11046618968248367, "learning_rate": 1.3704890788224123e-05, "loss": 0.0177, "step": 98340 }, { "epoch": 0.7269891487537329, "grad_norm": 0.06461908668279648, "learning_rate": 1.3701181149097816e-05, "loss": 0.0168, "step": 98350 }, { "epoch": 0.7270630673250348, "grad_norm": 0.1039423793554306, "learning_rate": 1.369747150997151e-05, "loss": 0.0182, "step": 98360 }, { "epoch": 0.7271369858963366, "grad_norm": 0.07331787049770355, "learning_rate": 1.3693761870845204e-05, "loss": 0.0163, "step": 98370 }, { "epoch": 0.7272109044676385, "grad_norm": 0.08373088389635086, "learning_rate": 1.36900522317189e-05, "loss": 0.02, "step": 98380 }, { "epoch": 0.7272848230389403, "grad_norm": 0.08613729476928711, "learning_rate": 1.3686342592592594e-05, "loss": 0.018, "step": 98390 }, { "epoch": 0.7273587416102422, "grad_norm": 0.07876535505056381, "learning_rate": 1.3682632953466287e-05, "loss": 0.0194, "step": 98400 }, { "epoch": 0.7274326601815441, "grad_norm": 0.06077132746577263, "learning_rate": 1.3678923314339981e-05, "loss": 0.0164, "step": 98410 }, { "epoch": 0.7275065787528459, "grad_norm": 0.1049044132232666, "learning_rate": 1.3675213675213677e-05, "loss": 0.0188, "step": 98420 }, { "epoch": 0.7275804973241478, "grad_norm": 0.08487299084663391, "learning_rate": 1.3671504036087372e-05, "loss": 0.0187, "step": 98430 }, { "epoch": 0.7276544158954495, "grad_norm": 0.07457312941551208, "learning_rate": 1.3667794396961064e-05, "loss": 0.0193, "step": 98440 }, { "epoch": 0.7277283344667514, "grad_norm": 0.06628565490245819, "learning_rate": 1.3664084757834758e-05, "loss": 0.0202, "step": 98450 }, { "epoch": 0.7278022530380532, "grad_norm": 0.08865582942962646, "learning_rate": 1.3660375118708451e-05, "loss": 0.0167, "step": 98460 }, { "epoch": 0.7278761716093551, "grad_norm": 0.08476138114929199, "learning_rate": 1.3656665479582149e-05, "loss": 0.018, "step": 98470 }, { "epoch": 0.727950090180657, "grad_norm": 0.09250761568546295, "learning_rate": 1.3652955840455841e-05, "loss": 0.0165, "step": 98480 }, { "epoch": 0.7280240087519588, "grad_norm": 0.10515966266393661, "learning_rate": 1.3649246201329536e-05, "loss": 0.0159, "step": 98490 }, { "epoch": 0.7280979273232607, "grad_norm": 0.06855718046426773, "learning_rate": 1.3645536562203228e-05, "loss": 0.017, "step": 98500 }, { "epoch": 0.7281718458945625, "grad_norm": 0.06158420443534851, "learning_rate": 1.3641826923076922e-05, "loss": 0.016, "step": 98510 }, { "epoch": 0.7282457644658644, "grad_norm": 0.08707701414823532, "learning_rate": 1.3638117283950618e-05, "loss": 0.016, "step": 98520 }, { "epoch": 0.7283196830371662, "grad_norm": 0.09738589823246002, "learning_rate": 1.3634407644824313e-05, "loss": 0.0178, "step": 98530 }, { "epoch": 0.7283936016084681, "grad_norm": 0.0779692679643631, "learning_rate": 1.3630698005698005e-05, "loss": 0.0176, "step": 98540 }, { "epoch": 0.72846752017977, "grad_norm": 0.08245556056499481, "learning_rate": 1.36269883665717e-05, "loss": 0.0166, "step": 98550 }, { "epoch": 0.7285414387510718, "grad_norm": 0.06367962062358856, "learning_rate": 1.3623278727445395e-05, "loss": 0.0165, "step": 98560 }, { "epoch": 0.7286153573223737, "grad_norm": 0.10788046568632126, "learning_rate": 1.361956908831909e-05, "loss": 0.0184, "step": 98570 }, { "epoch": 0.7286892758936755, "grad_norm": 0.06675861030817032, "learning_rate": 1.3615859449192784e-05, "loss": 0.0149, "step": 98580 }, { "epoch": 0.7287631944649774, "grad_norm": 0.055531568825244904, "learning_rate": 1.3612149810066477e-05, "loss": 0.0156, "step": 98590 }, { "epoch": 0.7288371130362792, "grad_norm": 0.060155775398015976, "learning_rate": 1.360844017094017e-05, "loss": 0.0151, "step": 98600 }, { "epoch": 0.7289110316075811, "grad_norm": 0.1119222342967987, "learning_rate": 1.3604730531813867e-05, "loss": 0.0177, "step": 98610 }, { "epoch": 0.728984950178883, "grad_norm": 0.10965950042009354, "learning_rate": 1.3601020892687561e-05, "loss": 0.018, "step": 98620 }, { "epoch": 0.7290588687501848, "grad_norm": 0.08716337382793427, "learning_rate": 1.3597311253561254e-05, "loss": 0.0162, "step": 98630 }, { "epoch": 0.7291327873214867, "grad_norm": 0.11401829868555069, "learning_rate": 1.3593601614434948e-05, "loss": 0.0173, "step": 98640 }, { "epoch": 0.7292067058927885, "grad_norm": 0.07196416705846786, "learning_rate": 1.3589891975308644e-05, "loss": 0.018, "step": 98650 }, { "epoch": 0.7292806244640904, "grad_norm": 0.0806848555803299, "learning_rate": 1.3586182336182338e-05, "loss": 0.0196, "step": 98660 }, { "epoch": 0.7293545430353923, "grad_norm": 0.08177975565195084, "learning_rate": 1.358247269705603e-05, "loss": 0.0179, "step": 98670 }, { "epoch": 0.7294284616066941, "grad_norm": 0.07048134505748749, "learning_rate": 1.3578763057929725e-05, "loss": 0.0192, "step": 98680 }, { "epoch": 0.729502380177996, "grad_norm": 0.07914569973945618, "learning_rate": 1.3575053418803418e-05, "loss": 0.0176, "step": 98690 }, { "epoch": 0.7295762987492977, "grad_norm": 0.07468180358409882, "learning_rate": 1.3571343779677115e-05, "loss": 0.0159, "step": 98700 }, { "epoch": 0.7296502173205996, "grad_norm": 0.07458420842885971, "learning_rate": 1.3567634140550808e-05, "loss": 0.021, "step": 98710 }, { "epoch": 0.7297241358919014, "grad_norm": 0.09042967855930328, "learning_rate": 1.3563924501424502e-05, "loss": 0.0197, "step": 98720 }, { "epoch": 0.7297980544632033, "grad_norm": 0.07578109204769135, "learning_rate": 1.3560214862298196e-05, "loss": 0.0172, "step": 98730 }, { "epoch": 0.7298719730345052, "grad_norm": 0.06371735781431198, "learning_rate": 1.3556505223171889e-05, "loss": 0.0188, "step": 98740 }, { "epoch": 0.729945891605807, "grad_norm": 0.057217177003622055, "learning_rate": 1.3552795584045585e-05, "loss": 0.0168, "step": 98750 }, { "epoch": 0.7300198101771089, "grad_norm": 0.08760707825422287, "learning_rate": 1.354908594491928e-05, "loss": 0.0171, "step": 98760 }, { "epoch": 0.7300937287484107, "grad_norm": 0.07055719196796417, "learning_rate": 1.3545376305792973e-05, "loss": 0.0174, "step": 98770 }, { "epoch": 0.7301676473197126, "grad_norm": 0.07746641337871552, "learning_rate": 1.3541666666666666e-05, "loss": 0.018, "step": 98780 }, { "epoch": 0.7302415658910144, "grad_norm": 0.048529159277677536, "learning_rate": 1.3537957027540362e-05, "loss": 0.0154, "step": 98790 }, { "epoch": 0.7303154844623163, "grad_norm": 0.08571402728557587, "learning_rate": 1.3534247388414056e-05, "loss": 0.0155, "step": 98800 }, { "epoch": 0.7303894030336182, "grad_norm": 0.09416328370571136, "learning_rate": 1.353053774928775e-05, "loss": 0.0179, "step": 98810 }, { "epoch": 0.73046332160492, "grad_norm": 0.08363144099712372, "learning_rate": 1.3526828110161443e-05, "loss": 0.0166, "step": 98820 }, { "epoch": 0.7305372401762219, "grad_norm": 0.06076916307210922, "learning_rate": 1.3523118471035137e-05, "loss": 0.0145, "step": 98830 }, { "epoch": 0.7306111587475237, "grad_norm": 0.0909329429268837, "learning_rate": 1.3519408831908833e-05, "loss": 0.0175, "step": 98840 }, { "epoch": 0.7306850773188256, "grad_norm": 0.08312118798494339, "learning_rate": 1.3515699192782528e-05, "loss": 0.019, "step": 98850 }, { "epoch": 0.7307589958901274, "grad_norm": 0.08315754681825638, "learning_rate": 1.351198955365622e-05, "loss": 0.0174, "step": 98860 }, { "epoch": 0.7308329144614293, "grad_norm": 0.06642234325408936, "learning_rate": 1.3508279914529915e-05, "loss": 0.0174, "step": 98870 }, { "epoch": 0.7309068330327312, "grad_norm": 0.06108556315302849, "learning_rate": 1.350457027540361e-05, "loss": 0.0163, "step": 98880 }, { "epoch": 0.730980751604033, "grad_norm": 0.09150240570306778, "learning_rate": 1.3500860636277305e-05, "loss": 0.0209, "step": 98890 }, { "epoch": 0.7310546701753349, "grad_norm": 0.09559241682291031, "learning_rate": 1.3497150997150997e-05, "loss": 0.0168, "step": 98900 }, { "epoch": 0.7311285887466367, "grad_norm": 0.08340034633874893, "learning_rate": 1.3493441358024692e-05, "loss": 0.0185, "step": 98910 }, { "epoch": 0.7312025073179386, "grad_norm": 0.10789379477500916, "learning_rate": 1.3489731718898386e-05, "loss": 0.0169, "step": 98920 }, { "epoch": 0.7312764258892405, "grad_norm": 0.09083528071641922, "learning_rate": 1.3486022079772082e-05, "loss": 0.0181, "step": 98930 }, { "epoch": 0.7313503444605423, "grad_norm": 0.06849498301744461, "learning_rate": 1.3482312440645774e-05, "loss": 0.0168, "step": 98940 }, { "epoch": 0.7314242630318442, "grad_norm": 0.07019881159067154, "learning_rate": 1.3478602801519469e-05, "loss": 0.0165, "step": 98950 }, { "epoch": 0.731498181603146, "grad_norm": 0.10241382569074631, "learning_rate": 1.3474893162393163e-05, "loss": 0.0187, "step": 98960 }, { "epoch": 0.7315721001744478, "grad_norm": 0.08488575369119644, "learning_rate": 1.3471183523266856e-05, "loss": 0.0178, "step": 98970 }, { "epoch": 0.7316460187457496, "grad_norm": 0.09588972479104996, "learning_rate": 1.3467473884140552e-05, "loss": 0.016, "step": 98980 }, { "epoch": 0.7317199373170515, "grad_norm": 0.07969026267528534, "learning_rate": 1.3463764245014246e-05, "loss": 0.0173, "step": 98990 }, { "epoch": 0.7317938558883534, "grad_norm": 0.09308630973100662, "learning_rate": 1.346005460588794e-05, "loss": 0.0186, "step": 99000 }, { "epoch": 0.7318677744596552, "grad_norm": 0.0691651776432991, "learning_rate": 1.3456344966761633e-05, "loss": 0.0167, "step": 99010 }, { "epoch": 0.7319416930309571, "grad_norm": 0.05044808238744736, "learning_rate": 1.345263532763533e-05, "loss": 0.0172, "step": 99020 }, { "epoch": 0.7320156116022589, "grad_norm": 0.12543408572673798, "learning_rate": 1.3448925688509023e-05, "loss": 0.0197, "step": 99030 }, { "epoch": 0.7320895301735608, "grad_norm": 0.08851069211959839, "learning_rate": 1.3445216049382717e-05, "loss": 0.0156, "step": 99040 }, { "epoch": 0.7321634487448626, "grad_norm": 0.08638571947813034, "learning_rate": 1.344150641025641e-05, "loss": 0.015, "step": 99050 }, { "epoch": 0.7322373673161645, "grad_norm": 0.11562138050794601, "learning_rate": 1.3437796771130104e-05, "loss": 0.0179, "step": 99060 }, { "epoch": 0.7323112858874664, "grad_norm": 0.08109956234693527, "learning_rate": 1.34340871320038e-05, "loss": 0.0172, "step": 99070 }, { "epoch": 0.7323852044587682, "grad_norm": 0.08304541558027267, "learning_rate": 1.3430377492877494e-05, "loss": 0.0208, "step": 99080 }, { "epoch": 0.7324591230300701, "grad_norm": 0.06958150863647461, "learning_rate": 1.3426667853751187e-05, "loss": 0.0168, "step": 99090 }, { "epoch": 0.7325330416013719, "grad_norm": 0.08339308202266693, "learning_rate": 1.3422958214624881e-05, "loss": 0.0203, "step": 99100 }, { "epoch": 0.7326069601726738, "grad_norm": 0.05242053046822548, "learning_rate": 1.3419248575498577e-05, "loss": 0.0178, "step": 99110 }, { "epoch": 0.7326808787439756, "grad_norm": 0.09275031089782715, "learning_rate": 1.3415538936372271e-05, "loss": 0.016, "step": 99120 }, { "epoch": 0.7327547973152775, "grad_norm": 0.09298167377710342, "learning_rate": 1.3411829297245964e-05, "loss": 0.0184, "step": 99130 }, { "epoch": 0.7328287158865794, "grad_norm": 0.06392785161733627, "learning_rate": 1.3408119658119658e-05, "loss": 0.0142, "step": 99140 }, { "epoch": 0.7329026344578812, "grad_norm": 0.06492038071155548, "learning_rate": 1.3404410018993352e-05, "loss": 0.0169, "step": 99150 }, { "epoch": 0.7329765530291831, "grad_norm": 0.08821947872638702, "learning_rate": 1.3400700379867048e-05, "loss": 0.0192, "step": 99160 }, { "epoch": 0.7330504716004849, "grad_norm": 0.08305369317531586, "learning_rate": 1.3396990740740743e-05, "loss": 0.0174, "step": 99170 }, { "epoch": 0.7331243901717868, "grad_norm": 0.0802772119641304, "learning_rate": 1.3393281101614435e-05, "loss": 0.0179, "step": 99180 }, { "epoch": 0.7331983087430887, "grad_norm": 0.08738873898983002, "learning_rate": 1.338957146248813e-05, "loss": 0.0174, "step": 99190 }, { "epoch": 0.7332722273143905, "grad_norm": 0.0812428891658783, "learning_rate": 1.3385861823361822e-05, "loss": 0.0165, "step": 99200 }, { "epoch": 0.7333461458856924, "grad_norm": 0.06330117583274841, "learning_rate": 1.338215218423552e-05, "loss": 0.0173, "step": 99210 }, { "epoch": 0.7334200644569941, "grad_norm": 0.07262025028467178, "learning_rate": 1.3378442545109212e-05, "loss": 0.0166, "step": 99220 }, { "epoch": 0.733493983028296, "grad_norm": 0.09085766226053238, "learning_rate": 1.3374732905982907e-05, "loss": 0.0184, "step": 99230 }, { "epoch": 0.7335679015995978, "grad_norm": 0.08859771490097046, "learning_rate": 1.33710232668566e-05, "loss": 0.0175, "step": 99240 }, { "epoch": 0.7336418201708997, "grad_norm": 0.07603614032268524, "learning_rate": 1.3367313627730297e-05, "loss": 0.0168, "step": 99250 }, { "epoch": 0.7337157387422016, "grad_norm": 0.07759816944599152, "learning_rate": 1.336360398860399e-05, "loss": 0.0171, "step": 99260 }, { "epoch": 0.7337896573135034, "grad_norm": 0.09745617210865021, "learning_rate": 1.3359894349477684e-05, "loss": 0.0171, "step": 99270 }, { "epoch": 0.7338635758848053, "grad_norm": 0.07508081197738647, "learning_rate": 1.3356184710351376e-05, "loss": 0.0171, "step": 99280 }, { "epoch": 0.7339374944561071, "grad_norm": 0.09933502972126007, "learning_rate": 1.335247507122507e-05, "loss": 0.0193, "step": 99290 }, { "epoch": 0.734011413027409, "grad_norm": 0.06211693957448006, "learning_rate": 1.3348765432098767e-05, "loss": 0.017, "step": 99300 }, { "epoch": 0.7340853315987108, "grad_norm": 0.08026242256164551, "learning_rate": 1.334505579297246e-05, "loss": 0.0178, "step": 99310 }, { "epoch": 0.7341592501700127, "grad_norm": 0.07862497121095657, "learning_rate": 1.3341346153846155e-05, "loss": 0.0167, "step": 99320 }, { "epoch": 0.7342331687413146, "grad_norm": 0.06331951171159744, "learning_rate": 1.3337636514719848e-05, "loss": 0.0162, "step": 99330 }, { "epoch": 0.7343070873126164, "grad_norm": 0.08189788460731506, "learning_rate": 1.3333926875593544e-05, "loss": 0.0187, "step": 99340 }, { "epoch": 0.7343810058839183, "grad_norm": 0.09491676092147827, "learning_rate": 1.3330217236467238e-05, "loss": 0.0185, "step": 99350 }, { "epoch": 0.7344549244552201, "grad_norm": 0.05899200215935707, "learning_rate": 1.3326507597340932e-05, "loss": 0.0162, "step": 99360 }, { "epoch": 0.734528843026522, "grad_norm": 0.11451012641191483, "learning_rate": 1.3322797958214625e-05, "loss": 0.0179, "step": 99370 }, { "epoch": 0.7346027615978238, "grad_norm": 0.0756615698337555, "learning_rate": 1.3319088319088319e-05, "loss": 0.0179, "step": 99380 }, { "epoch": 0.7346766801691257, "grad_norm": 0.07908909767866135, "learning_rate": 1.3315378679962015e-05, "loss": 0.0188, "step": 99390 }, { "epoch": 0.7347505987404276, "grad_norm": 0.08498425781726837, "learning_rate": 1.331166904083571e-05, "loss": 0.0159, "step": 99400 }, { "epoch": 0.7348245173117294, "grad_norm": 0.09858408570289612, "learning_rate": 1.3307959401709402e-05, "loss": 0.0165, "step": 99410 }, { "epoch": 0.7348984358830313, "grad_norm": 0.09515240043401718, "learning_rate": 1.3304249762583096e-05, "loss": 0.019, "step": 99420 }, { "epoch": 0.7349723544543331, "grad_norm": 0.1012689471244812, "learning_rate": 1.3300540123456789e-05, "loss": 0.0175, "step": 99430 }, { "epoch": 0.735046273025635, "grad_norm": 0.09500102698802948, "learning_rate": 1.3296830484330486e-05, "loss": 0.0188, "step": 99440 }, { "epoch": 0.7351201915969369, "grad_norm": 0.0788261741399765, "learning_rate": 1.3293120845204179e-05, "loss": 0.0201, "step": 99450 }, { "epoch": 0.7351941101682387, "grad_norm": 0.07192644476890564, "learning_rate": 1.3289411206077873e-05, "loss": 0.017, "step": 99460 }, { "epoch": 0.7352680287395406, "grad_norm": 0.06370582431554794, "learning_rate": 1.3285701566951567e-05, "loss": 0.0174, "step": 99470 }, { "epoch": 0.7353419473108423, "grad_norm": 0.11295067518949509, "learning_rate": 1.3281991927825263e-05, "loss": 0.0164, "step": 99480 }, { "epoch": 0.7354158658821442, "grad_norm": 0.07671871036291122, "learning_rate": 1.3278282288698956e-05, "loss": 0.0155, "step": 99490 }, { "epoch": 0.735489784453446, "grad_norm": 0.06635639071464539, "learning_rate": 1.327457264957265e-05, "loss": 0.0168, "step": 99500 }, { "epoch": 0.7355637030247479, "grad_norm": 0.08532058447599411, "learning_rate": 1.3270863010446345e-05, "loss": 0.0204, "step": 99510 }, { "epoch": 0.7356376215960498, "grad_norm": 0.07075171172618866, "learning_rate": 1.3267153371320037e-05, "loss": 0.0164, "step": 99520 }, { "epoch": 0.7357115401673516, "grad_norm": 0.10158608853816986, "learning_rate": 1.3263443732193733e-05, "loss": 0.018, "step": 99530 }, { "epoch": 0.7357854587386535, "grad_norm": 0.0684775710105896, "learning_rate": 1.3259734093067427e-05, "loss": 0.0168, "step": 99540 }, { "epoch": 0.7358593773099553, "grad_norm": 0.07273388653993607, "learning_rate": 1.3256024453941122e-05, "loss": 0.0182, "step": 99550 }, { "epoch": 0.7359332958812572, "grad_norm": 0.09297880530357361, "learning_rate": 1.3252314814814814e-05, "loss": 0.0163, "step": 99560 }, { "epoch": 0.736007214452559, "grad_norm": 0.06453926861286163, "learning_rate": 1.324860517568851e-05, "loss": 0.0182, "step": 99570 }, { "epoch": 0.7360811330238609, "grad_norm": 0.0784500390291214, "learning_rate": 1.3244895536562204e-05, "loss": 0.0169, "step": 99580 }, { "epoch": 0.7361550515951628, "grad_norm": 0.07597056031227112, "learning_rate": 1.3241185897435899e-05, "loss": 0.0179, "step": 99590 }, { "epoch": 0.7362289701664646, "grad_norm": 0.06927984207868576, "learning_rate": 1.3237476258309591e-05, "loss": 0.0178, "step": 99600 }, { "epoch": 0.7363028887377665, "grad_norm": 0.10358776897192001, "learning_rate": 1.3233766619183286e-05, "loss": 0.0181, "step": 99610 }, { "epoch": 0.7363768073090683, "grad_norm": 0.06848066300153732, "learning_rate": 1.3230056980056982e-05, "loss": 0.019, "step": 99620 }, { "epoch": 0.7364507258803702, "grad_norm": 0.07205010205507278, "learning_rate": 1.3226347340930676e-05, "loss": 0.0172, "step": 99630 }, { "epoch": 0.736524644451672, "grad_norm": 0.08415688574314117, "learning_rate": 1.3222637701804368e-05, "loss": 0.0179, "step": 99640 }, { "epoch": 0.7365985630229739, "grad_norm": 0.0837029442191124, "learning_rate": 1.3218928062678063e-05, "loss": 0.0182, "step": 99650 }, { "epoch": 0.7366724815942758, "grad_norm": 0.08125575631856918, "learning_rate": 1.3215218423551757e-05, "loss": 0.0168, "step": 99660 }, { "epoch": 0.7367464001655776, "grad_norm": 0.0863199532032013, "learning_rate": 1.3211508784425453e-05, "loss": 0.0177, "step": 99670 }, { "epoch": 0.7368203187368795, "grad_norm": 0.1306217908859253, "learning_rate": 1.3207799145299146e-05, "loss": 0.0191, "step": 99680 }, { "epoch": 0.7368942373081813, "grad_norm": 0.08671586215496063, "learning_rate": 1.320408950617284e-05, "loss": 0.0181, "step": 99690 }, { "epoch": 0.7369681558794832, "grad_norm": 0.08784538507461548, "learning_rate": 1.3200379867046534e-05, "loss": 0.0181, "step": 99700 }, { "epoch": 0.7370420744507851, "grad_norm": 0.10323961079120636, "learning_rate": 1.319667022792023e-05, "loss": 0.02, "step": 99710 }, { "epoch": 0.7371159930220869, "grad_norm": 0.08128951489925385, "learning_rate": 1.3192960588793923e-05, "loss": 0.0168, "step": 99720 }, { "epoch": 0.7371899115933888, "grad_norm": 0.07869691401720047, "learning_rate": 1.3189250949667617e-05, "loss": 0.017, "step": 99730 }, { "epoch": 0.7372638301646905, "grad_norm": 0.07044506818056107, "learning_rate": 1.3185541310541311e-05, "loss": 0.0201, "step": 99740 }, { "epoch": 0.7373377487359924, "grad_norm": 0.10342875868082047, "learning_rate": 1.3181831671415004e-05, "loss": 0.017, "step": 99750 }, { "epoch": 0.7374116673072942, "grad_norm": 0.08335622400045395, "learning_rate": 1.3178122032288701e-05, "loss": 0.017, "step": 99760 }, { "epoch": 0.7374855858785961, "grad_norm": 0.07070112973451614, "learning_rate": 1.3174412393162394e-05, "loss": 0.0158, "step": 99770 }, { "epoch": 0.737559504449898, "grad_norm": 0.08443205803632736, "learning_rate": 1.3170702754036088e-05, "loss": 0.0182, "step": 99780 }, { "epoch": 0.7376334230211998, "grad_norm": 0.08464431017637253, "learning_rate": 1.316699311490978e-05, "loss": 0.0165, "step": 99790 }, { "epoch": 0.7377073415925017, "grad_norm": 0.07187016308307648, "learning_rate": 1.3163283475783478e-05, "loss": 0.0186, "step": 99800 }, { "epoch": 0.7377812601638035, "grad_norm": 0.08470672369003296, "learning_rate": 1.3159573836657171e-05, "loss": 0.0166, "step": 99810 }, { "epoch": 0.7378551787351054, "grad_norm": 0.09578060358762741, "learning_rate": 1.3155864197530865e-05, "loss": 0.0177, "step": 99820 }, { "epoch": 0.7379290973064072, "grad_norm": 0.08780781179666519, "learning_rate": 1.3152154558404558e-05, "loss": 0.0172, "step": 99830 }, { "epoch": 0.7380030158777091, "grad_norm": 0.07516979426145554, "learning_rate": 1.3148444919278252e-05, "loss": 0.0157, "step": 99840 }, { "epoch": 0.738076934449011, "grad_norm": 0.0591609925031662, "learning_rate": 1.3144735280151948e-05, "loss": 0.0179, "step": 99850 }, { "epoch": 0.7381508530203128, "grad_norm": 0.08503682911396027, "learning_rate": 1.3141025641025642e-05, "loss": 0.0189, "step": 99860 }, { "epoch": 0.7382247715916147, "grad_norm": 0.048752620816230774, "learning_rate": 1.3137316001899335e-05, "loss": 0.0167, "step": 99870 }, { "epoch": 0.7382986901629165, "grad_norm": 0.06892601400613785, "learning_rate": 1.313360636277303e-05, "loss": 0.0167, "step": 99880 }, { "epoch": 0.7383726087342184, "grad_norm": 0.08108551800251007, "learning_rate": 1.3129896723646724e-05, "loss": 0.0164, "step": 99890 }, { "epoch": 0.7384465273055202, "grad_norm": 0.10203902423381805, "learning_rate": 1.312618708452042e-05, "loss": 0.0192, "step": 99900 }, { "epoch": 0.7385204458768221, "grad_norm": 0.07102199643850327, "learning_rate": 1.3122477445394112e-05, "loss": 0.0171, "step": 99910 }, { "epoch": 0.738594364448124, "grad_norm": 0.08365597575902939, "learning_rate": 1.3118767806267806e-05, "loss": 0.0159, "step": 99920 }, { "epoch": 0.7386682830194258, "grad_norm": 0.06642188131809235, "learning_rate": 1.31150581671415e-05, "loss": 0.0175, "step": 99930 }, { "epoch": 0.7387422015907277, "grad_norm": 0.08410719037055969, "learning_rate": 1.3111348528015197e-05, "loss": 0.0158, "step": 99940 }, { "epoch": 0.7388161201620295, "grad_norm": 0.08759714663028717, "learning_rate": 1.3107638888888891e-05, "loss": 0.0183, "step": 99950 }, { "epoch": 0.7388900387333314, "grad_norm": 0.09986711293458939, "learning_rate": 1.3103929249762583e-05, "loss": 0.019, "step": 99960 }, { "epoch": 0.7389639573046333, "grad_norm": 0.07965222001075745, "learning_rate": 1.3100219610636278e-05, "loss": 0.0177, "step": 99970 }, { "epoch": 0.7390378758759351, "grad_norm": 0.05860085412859917, "learning_rate": 1.309650997150997e-05, "loss": 0.0179, "step": 99980 }, { "epoch": 0.739111794447237, "grad_norm": 0.08647920936346054, "learning_rate": 1.3092800332383668e-05, "loss": 0.0159, "step": 99990 }, { "epoch": 0.7391857130185387, "grad_norm": 0.06778937578201294, "learning_rate": 1.308909069325736e-05, "loss": 0.0175, "step": 100000 }, { "epoch": 0.7391857130185387, "eval_f1": 0.6265636264638181, "eval_loss": 0.01698913984000683, "eval_precision": 0.4966591483671539, "eval_recall": 0.8484924183456124, "eval_runtime": 2919.6426, "eval_samples_per_second": 185.343, "eval_steps_per_second": 2.896, "step": 100000 }, { "epoch": 0.7392596315898406, "grad_norm": 0.08258048444986343, "learning_rate": 1.3085381054131055e-05, "loss": 0.0159, "step": 100010 }, { "epoch": 0.7393335501611424, "grad_norm": 0.07373189926147461, "learning_rate": 1.3081671415004747e-05, "loss": 0.0157, "step": 100020 }, { "epoch": 0.7394074687324443, "grad_norm": 0.06241403892636299, "learning_rate": 1.3077961775878445e-05, "loss": 0.0193, "step": 100030 }, { "epoch": 0.7394813873037462, "grad_norm": 0.06616196036338806, "learning_rate": 1.3074252136752138e-05, "loss": 0.0156, "step": 100040 }, { "epoch": 0.739555305875048, "grad_norm": 0.06679438799619675, "learning_rate": 1.3070542497625832e-05, "loss": 0.0157, "step": 100050 }, { "epoch": 0.7396292244463499, "grad_norm": 0.08504631370306015, "learning_rate": 1.3066832858499525e-05, "loss": 0.0157, "step": 100060 }, { "epoch": 0.7397031430176517, "grad_norm": 0.0888257697224617, "learning_rate": 1.3063123219373219e-05, "loss": 0.0167, "step": 100070 }, { "epoch": 0.7397770615889536, "grad_norm": 0.10319739580154419, "learning_rate": 1.3059413580246915e-05, "loss": 0.0158, "step": 100080 }, { "epoch": 0.7398509801602554, "grad_norm": 0.0712389275431633, "learning_rate": 1.3055703941120609e-05, "loss": 0.0158, "step": 100090 }, { "epoch": 0.7399248987315573, "grad_norm": 0.07851889729499817, "learning_rate": 1.3051994301994303e-05, "loss": 0.0205, "step": 100100 }, { "epoch": 0.7399988173028592, "grad_norm": 0.06646507233381271, "learning_rate": 1.3048284662867996e-05, "loss": 0.0185, "step": 100110 }, { "epoch": 0.740072735874161, "grad_norm": 0.0772419422864914, "learning_rate": 1.304457502374169e-05, "loss": 0.019, "step": 100120 }, { "epoch": 0.7401466544454629, "grad_norm": 0.11786741018295288, "learning_rate": 1.3040865384615386e-05, "loss": 0.0211, "step": 100130 }, { "epoch": 0.7402205730167647, "grad_norm": 0.05052179470658302, "learning_rate": 1.303715574548908e-05, "loss": 0.0171, "step": 100140 }, { "epoch": 0.7402944915880666, "grad_norm": 0.07826834917068481, "learning_rate": 1.3033446106362773e-05, "loss": 0.0204, "step": 100150 }, { "epoch": 0.7403684101593685, "grad_norm": 0.06459248065948486, "learning_rate": 1.3029736467236467e-05, "loss": 0.017, "step": 100160 }, { "epoch": 0.7404423287306703, "grad_norm": 0.06260719150304794, "learning_rate": 1.3026026828110163e-05, "loss": 0.0186, "step": 100170 }, { "epoch": 0.7405162473019722, "grad_norm": 0.09153701364994049, "learning_rate": 1.3022317188983857e-05, "loss": 0.0171, "step": 100180 }, { "epoch": 0.740590165873274, "grad_norm": 0.07650409638881683, "learning_rate": 1.301860754985755e-05, "loss": 0.018, "step": 100190 }, { "epoch": 0.7406640844445759, "grad_norm": 0.05814934894442558, "learning_rate": 1.3014897910731244e-05, "loss": 0.0153, "step": 100200 }, { "epoch": 0.7407380030158777, "grad_norm": 0.07334347069263458, "learning_rate": 1.3011188271604937e-05, "loss": 0.0174, "step": 100210 }, { "epoch": 0.7408119215871796, "grad_norm": 0.06784732639789581, "learning_rate": 1.3007478632478635e-05, "loss": 0.0182, "step": 100220 }, { "epoch": 0.7408858401584815, "grad_norm": 0.07556786388158798, "learning_rate": 1.3003768993352327e-05, "loss": 0.0197, "step": 100230 }, { "epoch": 0.7409597587297833, "grad_norm": 0.08706970512866974, "learning_rate": 1.3000059354226021e-05, "loss": 0.0175, "step": 100240 }, { "epoch": 0.7410336773010852, "grad_norm": 0.07846567779779434, "learning_rate": 1.2996349715099716e-05, "loss": 0.0171, "step": 100250 }, { "epoch": 0.741107595872387, "grad_norm": 0.06241794303059578, "learning_rate": 1.2992640075973412e-05, "loss": 0.0164, "step": 100260 }, { "epoch": 0.7411815144436888, "grad_norm": 0.07802298665046692, "learning_rate": 1.2988930436847104e-05, "loss": 0.0199, "step": 100270 }, { "epoch": 0.7412554330149906, "grad_norm": 0.07757619023323059, "learning_rate": 1.2985220797720799e-05, "loss": 0.0179, "step": 100280 }, { "epoch": 0.7413293515862925, "grad_norm": 0.07125821709632874, "learning_rate": 1.2981511158594493e-05, "loss": 0.0178, "step": 100290 }, { "epoch": 0.7414032701575944, "grad_norm": 0.08631688356399536, "learning_rate": 1.2977801519468185e-05, "loss": 0.0166, "step": 100300 }, { "epoch": 0.7414771887288962, "grad_norm": 0.08890466392040253, "learning_rate": 1.2974091880341881e-05, "loss": 0.0161, "step": 100310 }, { "epoch": 0.7415511073001981, "grad_norm": 0.060113392770290375, "learning_rate": 1.2970382241215576e-05, "loss": 0.0162, "step": 100320 }, { "epoch": 0.7416250258714999, "grad_norm": 0.06005857139825821, "learning_rate": 1.296667260208927e-05, "loss": 0.015, "step": 100330 }, { "epoch": 0.7416989444428018, "grad_norm": 0.06900335103273392, "learning_rate": 1.2962962962962962e-05, "loss": 0.0201, "step": 100340 }, { "epoch": 0.7417728630141036, "grad_norm": 0.05656469240784645, "learning_rate": 1.2959253323836657e-05, "loss": 0.0178, "step": 100350 }, { "epoch": 0.7418467815854055, "grad_norm": 0.085307277739048, "learning_rate": 1.2955543684710353e-05, "loss": 0.0161, "step": 100360 }, { "epoch": 0.7419207001567074, "grad_norm": 0.09140070527791977, "learning_rate": 1.2951834045584047e-05, "loss": 0.0176, "step": 100370 }, { "epoch": 0.7419946187280092, "grad_norm": 0.08289497345685959, "learning_rate": 1.294812440645774e-05, "loss": 0.0167, "step": 100380 }, { "epoch": 0.7420685372993111, "grad_norm": 0.08116666972637177, "learning_rate": 1.2944414767331434e-05, "loss": 0.0156, "step": 100390 }, { "epoch": 0.7421424558706129, "grad_norm": 0.10772228986024857, "learning_rate": 1.294070512820513e-05, "loss": 0.0209, "step": 100400 }, { "epoch": 0.7422163744419148, "grad_norm": 0.07488065958023071, "learning_rate": 1.2936995489078824e-05, "loss": 0.0176, "step": 100410 }, { "epoch": 0.7422902930132167, "grad_norm": 0.07480562478303909, "learning_rate": 1.2933285849952517e-05, "loss": 0.0173, "step": 100420 }, { "epoch": 0.7423642115845185, "grad_norm": 0.07681198418140411, "learning_rate": 1.2929576210826211e-05, "loss": 0.0159, "step": 100430 }, { "epoch": 0.7424381301558204, "grad_norm": 0.10482607036828995, "learning_rate": 1.2925866571699905e-05, "loss": 0.018, "step": 100440 }, { "epoch": 0.7425120487271222, "grad_norm": 0.08250238001346588, "learning_rate": 1.2922156932573601e-05, "loss": 0.0183, "step": 100450 }, { "epoch": 0.7425859672984241, "grad_norm": 0.08101457357406616, "learning_rate": 1.2918447293447294e-05, "loss": 0.0164, "step": 100460 }, { "epoch": 0.7426598858697259, "grad_norm": 0.09645617753267288, "learning_rate": 1.2914737654320988e-05, "loss": 0.0172, "step": 100470 }, { "epoch": 0.7427338044410278, "grad_norm": 0.10801961272954941, "learning_rate": 1.2911028015194682e-05, "loss": 0.0162, "step": 100480 }, { "epoch": 0.7428077230123297, "grad_norm": 0.07592090964317322, "learning_rate": 1.2907318376068378e-05, "loss": 0.0194, "step": 100490 }, { "epoch": 0.7428816415836315, "grad_norm": 0.09863361716270447, "learning_rate": 1.290360873694207e-05, "loss": 0.0183, "step": 100500 }, { "epoch": 0.7429555601549334, "grad_norm": 0.0681617334485054, "learning_rate": 1.2899899097815765e-05, "loss": 0.0155, "step": 100510 }, { "epoch": 0.7430294787262351, "grad_norm": 0.09108418226242065, "learning_rate": 1.289618945868946e-05, "loss": 0.0206, "step": 100520 }, { "epoch": 0.743103397297537, "grad_norm": 0.07221322506666183, "learning_rate": 1.2892479819563152e-05, "loss": 0.0161, "step": 100530 }, { "epoch": 0.7431773158688388, "grad_norm": 0.07454651594161987, "learning_rate": 1.288877018043685e-05, "loss": 0.02, "step": 100540 }, { "epoch": 0.7432512344401407, "grad_norm": 0.07050295919179916, "learning_rate": 1.2885060541310542e-05, "loss": 0.0193, "step": 100550 }, { "epoch": 0.7433251530114426, "grad_norm": 0.07239052653312683, "learning_rate": 1.2881350902184236e-05, "loss": 0.0175, "step": 100560 }, { "epoch": 0.7433990715827444, "grad_norm": 0.06946774572134018, "learning_rate": 1.2877641263057929e-05, "loss": 0.0185, "step": 100570 }, { "epoch": 0.7434729901540463, "grad_norm": 0.0834672674536705, "learning_rate": 1.2873931623931623e-05, "loss": 0.0154, "step": 100580 }, { "epoch": 0.7435469087253481, "grad_norm": 0.09061615914106369, "learning_rate": 1.287022198480532e-05, "loss": 0.0172, "step": 100590 }, { "epoch": 0.74362082729665, "grad_norm": 0.08729087561368942, "learning_rate": 1.2866512345679014e-05, "loss": 0.0173, "step": 100600 }, { "epoch": 0.7436947458679518, "grad_norm": 0.05605660006403923, "learning_rate": 1.2862802706552706e-05, "loss": 0.0181, "step": 100610 }, { "epoch": 0.7437686644392537, "grad_norm": 0.08221442997455597, "learning_rate": 1.28590930674264e-05, "loss": 0.0188, "step": 100620 }, { "epoch": 0.7438425830105556, "grad_norm": 0.05651714280247688, "learning_rate": 1.2855383428300096e-05, "loss": 0.0158, "step": 100630 }, { "epoch": 0.7439165015818574, "grad_norm": 0.0809439942240715, "learning_rate": 1.285167378917379e-05, "loss": 0.0193, "step": 100640 }, { "epoch": 0.7439904201531593, "grad_norm": 0.09423944354057312, "learning_rate": 1.2847964150047483e-05, "loss": 0.0174, "step": 100650 }, { "epoch": 0.7440643387244611, "grad_norm": 0.10862912237644196, "learning_rate": 1.2844254510921177e-05, "loss": 0.0175, "step": 100660 }, { "epoch": 0.744138257295763, "grad_norm": 0.09572023153305054, "learning_rate": 1.2840544871794872e-05, "loss": 0.0191, "step": 100670 }, { "epoch": 0.7442121758670649, "grad_norm": 0.09042080491781235, "learning_rate": 1.2836835232668568e-05, "loss": 0.0157, "step": 100680 }, { "epoch": 0.7442860944383667, "grad_norm": 0.09502577781677246, "learning_rate": 1.2833125593542262e-05, "loss": 0.0188, "step": 100690 }, { "epoch": 0.7443600130096686, "grad_norm": 0.07590213418006897, "learning_rate": 1.2829415954415955e-05, "loss": 0.0172, "step": 100700 }, { "epoch": 0.7444339315809704, "grad_norm": 0.08804917335510254, "learning_rate": 1.2825706315289649e-05, "loss": 0.0179, "step": 100710 }, { "epoch": 0.7445078501522723, "grad_norm": 0.07385943084955215, "learning_rate": 1.2821996676163345e-05, "loss": 0.019, "step": 100720 }, { "epoch": 0.7445817687235741, "grad_norm": 0.10739407688379288, "learning_rate": 1.2818287037037039e-05, "loss": 0.0181, "step": 100730 }, { "epoch": 0.744655687294876, "grad_norm": 0.09441185742616653, "learning_rate": 1.2814577397910732e-05, "loss": 0.0185, "step": 100740 }, { "epoch": 0.7447296058661779, "grad_norm": 0.10174049437046051, "learning_rate": 1.2810867758784426e-05, "loss": 0.0158, "step": 100750 }, { "epoch": 0.7448035244374797, "grad_norm": 0.058671485632658005, "learning_rate": 1.2807158119658119e-05, "loss": 0.0169, "step": 100760 }, { "epoch": 0.7448774430087816, "grad_norm": 0.06477764248847961, "learning_rate": 1.2803448480531816e-05, "loss": 0.0171, "step": 100770 }, { "epoch": 0.7449513615800833, "grad_norm": 0.09175854921340942, "learning_rate": 1.2799738841405509e-05, "loss": 0.0144, "step": 100780 }, { "epoch": 0.7450252801513852, "grad_norm": 0.07994644343852997, "learning_rate": 1.2796029202279203e-05, "loss": 0.0139, "step": 100790 }, { "epoch": 0.745099198722687, "grad_norm": 0.07431617379188538, "learning_rate": 1.2792319563152896e-05, "loss": 0.016, "step": 100800 }, { "epoch": 0.7451731172939889, "grad_norm": 0.08921124786138535, "learning_rate": 1.278860992402659e-05, "loss": 0.0181, "step": 100810 }, { "epoch": 0.7452470358652908, "grad_norm": 0.06759527325630188, "learning_rate": 1.2784900284900286e-05, "loss": 0.0164, "step": 100820 }, { "epoch": 0.7453209544365926, "grad_norm": 0.07384713739156723, "learning_rate": 1.278119064577398e-05, "loss": 0.0165, "step": 100830 }, { "epoch": 0.7453948730078945, "grad_norm": 0.11012984067201614, "learning_rate": 1.2777481006647673e-05, "loss": 0.0173, "step": 100840 }, { "epoch": 0.7454687915791963, "grad_norm": 0.06738018244504929, "learning_rate": 1.2773771367521367e-05, "loss": 0.0177, "step": 100850 }, { "epoch": 0.7455427101504982, "grad_norm": 0.07870987057685852, "learning_rate": 1.2770061728395063e-05, "loss": 0.0152, "step": 100860 }, { "epoch": 0.7456166287218, "grad_norm": 0.08337525278329849, "learning_rate": 1.2766352089268757e-05, "loss": 0.0175, "step": 100870 }, { "epoch": 0.7456905472931019, "grad_norm": 0.08534202724695206, "learning_rate": 1.2762642450142451e-05, "loss": 0.0175, "step": 100880 }, { "epoch": 0.7457644658644038, "grad_norm": 0.14618049561977386, "learning_rate": 1.2758932811016144e-05, "loss": 0.0185, "step": 100890 }, { "epoch": 0.7458383844357056, "grad_norm": 0.08101604878902435, "learning_rate": 1.2755223171889838e-05, "loss": 0.0168, "step": 100900 }, { "epoch": 0.7459123030070075, "grad_norm": 0.0876898542046547, "learning_rate": 1.2751513532763534e-05, "loss": 0.0188, "step": 100910 }, { "epoch": 0.7459862215783093, "grad_norm": 0.07244478166103363, "learning_rate": 1.2747803893637229e-05, "loss": 0.0189, "step": 100920 }, { "epoch": 0.7460601401496112, "grad_norm": 0.08289396017789841, "learning_rate": 1.2744094254510921e-05, "loss": 0.0172, "step": 100930 }, { "epoch": 0.7461340587209131, "grad_norm": 0.06628874689340591, "learning_rate": 1.2740384615384615e-05, "loss": 0.015, "step": 100940 }, { "epoch": 0.7462079772922149, "grad_norm": 0.06265491247177124, "learning_rate": 1.2736674976258311e-05, "loss": 0.0161, "step": 100950 }, { "epoch": 0.7462818958635168, "grad_norm": 0.0762435793876648, "learning_rate": 1.2732965337132006e-05, "loss": 0.0186, "step": 100960 }, { "epoch": 0.7463558144348186, "grad_norm": 0.08350300043821335, "learning_rate": 1.2729255698005698e-05, "loss": 0.0179, "step": 100970 }, { "epoch": 0.7464297330061205, "grad_norm": 0.08295262604951859, "learning_rate": 1.2725546058879393e-05, "loss": 0.0168, "step": 100980 }, { "epoch": 0.7465036515774223, "grad_norm": 0.07857391238212585, "learning_rate": 1.2721836419753085e-05, "loss": 0.0144, "step": 100990 }, { "epoch": 0.7465775701487242, "grad_norm": 0.08510608971118927, "learning_rate": 1.2718126780626783e-05, "loss": 0.0174, "step": 101000 }, { "epoch": 0.7466514887200261, "grad_norm": 0.08745384216308594, "learning_rate": 1.2714417141500475e-05, "loss": 0.0175, "step": 101010 }, { "epoch": 0.7467254072913279, "grad_norm": 0.06416953355073929, "learning_rate": 1.271070750237417e-05, "loss": 0.0158, "step": 101020 }, { "epoch": 0.7467993258626298, "grad_norm": 0.0958615094423294, "learning_rate": 1.2706997863247864e-05, "loss": 0.0193, "step": 101030 }, { "epoch": 0.7468732444339315, "grad_norm": 0.09908900409936905, "learning_rate": 1.2703288224121556e-05, "loss": 0.0156, "step": 101040 }, { "epoch": 0.7469471630052334, "grad_norm": 0.07092351466417313, "learning_rate": 1.2699578584995252e-05, "loss": 0.0149, "step": 101050 }, { "epoch": 0.7470210815765352, "grad_norm": 0.060773033648729324, "learning_rate": 1.2695868945868947e-05, "loss": 0.0149, "step": 101060 }, { "epoch": 0.7470950001478371, "grad_norm": 0.06796196848154068, "learning_rate": 1.2692159306742641e-05, "loss": 0.0173, "step": 101070 }, { "epoch": 0.747168918719139, "grad_norm": 0.0906725749373436, "learning_rate": 1.2688449667616334e-05, "loss": 0.0184, "step": 101080 }, { "epoch": 0.7472428372904408, "grad_norm": 0.07799607515335083, "learning_rate": 1.268474002849003e-05, "loss": 0.0191, "step": 101090 }, { "epoch": 0.7473167558617427, "grad_norm": 0.071399986743927, "learning_rate": 1.2681030389363724e-05, "loss": 0.0177, "step": 101100 }, { "epoch": 0.7473906744330445, "grad_norm": 0.0783345103263855, "learning_rate": 1.2677320750237418e-05, "loss": 0.0178, "step": 101110 }, { "epoch": 0.7474645930043464, "grad_norm": 0.0984099954366684, "learning_rate": 1.267361111111111e-05, "loss": 0.0174, "step": 101120 }, { "epoch": 0.7475385115756482, "grad_norm": 0.09083379060029984, "learning_rate": 1.2669901471984805e-05, "loss": 0.0182, "step": 101130 }, { "epoch": 0.7476124301469501, "grad_norm": 0.08555403351783752, "learning_rate": 1.2666191832858501e-05, "loss": 0.0156, "step": 101140 }, { "epoch": 0.747686348718252, "grad_norm": 0.07865635305643082, "learning_rate": 1.2662482193732195e-05, "loss": 0.0184, "step": 101150 }, { "epoch": 0.7477602672895538, "grad_norm": 0.07970672100782394, "learning_rate": 1.2658772554605888e-05, "loss": 0.0159, "step": 101160 }, { "epoch": 0.7478341858608557, "grad_norm": 0.0820077583193779, "learning_rate": 1.2655062915479582e-05, "loss": 0.018, "step": 101170 }, { "epoch": 0.7479081044321575, "grad_norm": 0.0661480724811554, "learning_rate": 1.2651353276353278e-05, "loss": 0.0187, "step": 101180 }, { "epoch": 0.7479820230034594, "grad_norm": 0.056968096643686295, "learning_rate": 1.2647643637226972e-05, "loss": 0.0184, "step": 101190 }, { "epoch": 0.7480559415747613, "grad_norm": 0.06653908640146255, "learning_rate": 1.2643933998100665e-05, "loss": 0.0176, "step": 101200 }, { "epoch": 0.7481298601460631, "grad_norm": 0.0841420367360115, "learning_rate": 1.2640224358974359e-05, "loss": 0.0148, "step": 101210 }, { "epoch": 0.748203778717365, "grad_norm": 0.05905050411820412, "learning_rate": 1.2636514719848053e-05, "loss": 0.0157, "step": 101220 }, { "epoch": 0.7482776972886668, "grad_norm": 0.06777974218130112, "learning_rate": 1.263280508072175e-05, "loss": 0.0181, "step": 101230 }, { "epoch": 0.7483516158599687, "grad_norm": 0.09229224175214767, "learning_rate": 1.2629095441595442e-05, "loss": 0.0185, "step": 101240 }, { "epoch": 0.7484255344312705, "grad_norm": 0.07637009769678116, "learning_rate": 1.2625385802469136e-05, "loss": 0.0179, "step": 101250 }, { "epoch": 0.7484994530025724, "grad_norm": 0.08407588303089142, "learning_rate": 1.262167616334283e-05, "loss": 0.0174, "step": 101260 }, { "epoch": 0.7485733715738743, "grad_norm": 0.09340599179267883, "learning_rate": 1.2617966524216523e-05, "loss": 0.0173, "step": 101270 }, { "epoch": 0.7486472901451761, "grad_norm": 0.08749319612979889, "learning_rate": 1.2614256885090219e-05, "loss": 0.0181, "step": 101280 }, { "epoch": 0.748721208716478, "grad_norm": 0.08961571007966995, "learning_rate": 1.2610547245963913e-05, "loss": 0.017, "step": 101290 }, { "epoch": 0.7487951272877797, "grad_norm": 0.10773412883281708, "learning_rate": 1.2606837606837608e-05, "loss": 0.0193, "step": 101300 }, { "epoch": 0.7488690458590817, "grad_norm": 0.10216166824102402, "learning_rate": 1.26031279677113e-05, "loss": 0.02, "step": 101310 }, { "epoch": 0.7489429644303834, "grad_norm": 0.0787181630730629, "learning_rate": 1.2599418328584998e-05, "loss": 0.0164, "step": 101320 }, { "epoch": 0.7490168830016853, "grad_norm": 0.0944897010922432, "learning_rate": 1.259570868945869e-05, "loss": 0.0176, "step": 101330 }, { "epoch": 0.7490908015729872, "grad_norm": 0.07871638238430023, "learning_rate": 1.2591999050332385e-05, "loss": 0.0169, "step": 101340 }, { "epoch": 0.749164720144289, "grad_norm": 0.07367895543575287, "learning_rate": 1.2588289411206077e-05, "loss": 0.0175, "step": 101350 }, { "epoch": 0.7492386387155909, "grad_norm": 0.07219689339399338, "learning_rate": 1.2584579772079772e-05, "loss": 0.0181, "step": 101360 }, { "epoch": 0.7493125572868927, "grad_norm": 0.09437573701143265, "learning_rate": 1.2580870132953467e-05, "loss": 0.0167, "step": 101370 }, { "epoch": 0.7493864758581946, "grad_norm": 0.08827673643827438, "learning_rate": 1.2577160493827162e-05, "loss": 0.0156, "step": 101380 }, { "epoch": 0.7494603944294964, "grad_norm": 0.07567749172449112, "learning_rate": 1.2573450854700854e-05, "loss": 0.0154, "step": 101390 }, { "epoch": 0.7495343130007983, "grad_norm": 0.08592404425144196, "learning_rate": 1.2569741215574549e-05, "loss": 0.0181, "step": 101400 }, { "epoch": 0.7496082315721002, "grad_norm": 0.07100368291139603, "learning_rate": 1.2566031576448245e-05, "loss": 0.0152, "step": 101410 }, { "epoch": 0.749682150143402, "grad_norm": 0.08753882348537445, "learning_rate": 1.2562321937321939e-05, "loss": 0.0162, "step": 101420 }, { "epoch": 0.7497560687147039, "grad_norm": 0.05322737991809845, "learning_rate": 1.2558612298195631e-05, "loss": 0.0197, "step": 101430 }, { "epoch": 0.7498299872860057, "grad_norm": 0.10462437570095062, "learning_rate": 1.2554902659069326e-05, "loss": 0.0217, "step": 101440 }, { "epoch": 0.7499039058573076, "grad_norm": 0.08601612597703934, "learning_rate": 1.255119301994302e-05, "loss": 0.0179, "step": 101450 }, { "epoch": 0.7499778244286095, "grad_norm": 0.06775778532028198, "learning_rate": 1.2547483380816716e-05, "loss": 0.0176, "step": 101460 }, { "epoch": 0.7500517429999113, "grad_norm": 0.09714211523532867, "learning_rate": 1.254377374169041e-05, "loss": 0.0204, "step": 101470 }, { "epoch": 0.7501256615712132, "grad_norm": 0.06501183658838272, "learning_rate": 1.2540064102564103e-05, "loss": 0.0163, "step": 101480 }, { "epoch": 0.750199580142515, "grad_norm": 0.0687502771615982, "learning_rate": 1.2536354463437797e-05, "loss": 0.018, "step": 101490 }, { "epoch": 0.7502734987138169, "grad_norm": 0.07812726497650146, "learning_rate": 1.253264482431149e-05, "loss": 0.0177, "step": 101500 }, { "epoch": 0.7503474172851187, "grad_norm": 0.06303098797798157, "learning_rate": 1.2528935185185187e-05, "loss": 0.0166, "step": 101510 }, { "epoch": 0.7504213358564206, "grad_norm": 0.07705267518758774, "learning_rate": 1.252522554605888e-05, "loss": 0.0142, "step": 101520 }, { "epoch": 0.7504952544277225, "grad_norm": 0.0688970610499382, "learning_rate": 1.2521515906932574e-05, "loss": 0.0163, "step": 101530 }, { "epoch": 0.7505691729990243, "grad_norm": 0.08850441873073578, "learning_rate": 1.2517806267806267e-05, "loss": 0.0185, "step": 101540 }, { "epoch": 0.7506430915703262, "grad_norm": 0.06363435089588165, "learning_rate": 1.2514096628679964e-05, "loss": 0.0183, "step": 101550 }, { "epoch": 0.750717010141628, "grad_norm": 0.07842385768890381, "learning_rate": 1.2510386989553657e-05, "loss": 0.0165, "step": 101560 }, { "epoch": 0.7507909287129299, "grad_norm": 0.06949975341558456, "learning_rate": 1.2506677350427351e-05, "loss": 0.0168, "step": 101570 }, { "epoch": 0.7508648472842316, "grad_norm": 0.07649943977594376, "learning_rate": 1.2502967711301044e-05, "loss": 0.0154, "step": 101580 }, { "epoch": 0.7509387658555335, "grad_norm": 0.05525654926896095, "learning_rate": 1.249925807217474e-05, "loss": 0.0165, "step": 101590 }, { "epoch": 0.7510126844268354, "grad_norm": 0.08242253214120865, "learning_rate": 1.2495548433048432e-05, "loss": 0.0195, "step": 101600 }, { "epoch": 0.7510866029981372, "grad_norm": 0.06378057599067688, "learning_rate": 1.2491838793922128e-05, "loss": 0.0162, "step": 101610 }, { "epoch": 0.7511605215694391, "grad_norm": 0.07558952271938324, "learning_rate": 1.2488129154795823e-05, "loss": 0.0168, "step": 101620 }, { "epoch": 0.7512344401407409, "grad_norm": 0.06266353279352188, "learning_rate": 1.2484419515669517e-05, "loss": 0.016, "step": 101630 }, { "epoch": 0.7513083587120428, "grad_norm": 0.08001867681741714, "learning_rate": 1.2480709876543211e-05, "loss": 0.0162, "step": 101640 }, { "epoch": 0.7513822772833446, "grad_norm": 0.0811537504196167, "learning_rate": 1.2477000237416904e-05, "loss": 0.015, "step": 101650 }, { "epoch": 0.7514561958546465, "grad_norm": 0.08687859773635864, "learning_rate": 1.24732905982906e-05, "loss": 0.0182, "step": 101660 }, { "epoch": 0.7515301144259484, "grad_norm": 0.07284379005432129, "learning_rate": 1.2469580959164292e-05, "loss": 0.0167, "step": 101670 }, { "epoch": 0.7516040329972502, "grad_norm": 0.0731201022863388, "learning_rate": 1.2465871320037988e-05, "loss": 0.018, "step": 101680 }, { "epoch": 0.7516779515685521, "grad_norm": 0.06715166568756104, "learning_rate": 1.246216168091168e-05, "loss": 0.0169, "step": 101690 }, { "epoch": 0.7517518701398539, "grad_norm": 0.07490894943475723, "learning_rate": 1.2458452041785377e-05, "loss": 0.0155, "step": 101700 }, { "epoch": 0.7518257887111558, "grad_norm": 0.10497954487800598, "learning_rate": 1.245474240265907e-05, "loss": 0.0185, "step": 101710 }, { "epoch": 0.7518997072824577, "grad_norm": 0.07911212742328644, "learning_rate": 1.2451032763532765e-05, "loss": 0.0145, "step": 101720 }, { "epoch": 0.7519736258537595, "grad_norm": 0.07326927036046982, "learning_rate": 1.2447323124406458e-05, "loss": 0.0161, "step": 101730 }, { "epoch": 0.7520475444250614, "grad_norm": 0.08427941054105759, "learning_rate": 1.2443613485280152e-05, "loss": 0.0176, "step": 101740 }, { "epoch": 0.7521214629963632, "grad_norm": 0.0915616899728775, "learning_rate": 1.2439903846153846e-05, "loss": 0.0176, "step": 101750 }, { "epoch": 0.7521953815676651, "grad_norm": 0.07714962959289551, "learning_rate": 1.243619420702754e-05, "loss": 0.0171, "step": 101760 }, { "epoch": 0.7522693001389669, "grad_norm": 0.0776534304022789, "learning_rate": 1.2432484567901235e-05, "loss": 0.0153, "step": 101770 }, { "epoch": 0.7523432187102688, "grad_norm": 0.0937986969947815, "learning_rate": 1.242877492877493e-05, "loss": 0.0193, "step": 101780 }, { "epoch": 0.7524171372815707, "grad_norm": 0.07981719821691513, "learning_rate": 1.2425065289648624e-05, "loss": 0.0176, "step": 101790 }, { "epoch": 0.7524910558528725, "grad_norm": 0.05857951566576958, "learning_rate": 1.2421355650522318e-05, "loss": 0.0183, "step": 101800 }, { "epoch": 0.7525649744241744, "grad_norm": 0.0774340108036995, "learning_rate": 1.2417646011396012e-05, "loss": 0.016, "step": 101810 }, { "epoch": 0.7526388929954761, "grad_norm": 0.08497736603021622, "learning_rate": 1.2413936372269706e-05, "loss": 0.0178, "step": 101820 }, { "epoch": 0.752712811566778, "grad_norm": 0.07533351331949234, "learning_rate": 1.24102267331434e-05, "loss": 0.016, "step": 101830 }, { "epoch": 0.7527867301380798, "grad_norm": 0.08452022075653076, "learning_rate": 1.2406517094017095e-05, "loss": 0.0173, "step": 101840 }, { "epoch": 0.7528606487093817, "grad_norm": 0.07964122295379639, "learning_rate": 1.240280745489079e-05, "loss": 0.018, "step": 101850 }, { "epoch": 0.7529345672806836, "grad_norm": 0.0665905624628067, "learning_rate": 1.2399097815764483e-05, "loss": 0.0191, "step": 101860 }, { "epoch": 0.7530084858519854, "grad_norm": 0.08771977573633194, "learning_rate": 1.2395388176638178e-05, "loss": 0.0178, "step": 101870 }, { "epoch": 0.7530824044232873, "grad_norm": 0.06156764179468155, "learning_rate": 1.2391678537511872e-05, "loss": 0.0183, "step": 101880 }, { "epoch": 0.7531563229945891, "grad_norm": 0.06278455257415771, "learning_rate": 1.2387968898385566e-05, "loss": 0.0191, "step": 101890 }, { "epoch": 0.753230241565891, "grad_norm": 0.07921794056892395, "learning_rate": 1.2384259259259259e-05, "loss": 0.0166, "step": 101900 }, { "epoch": 0.7533041601371928, "grad_norm": 0.10754285752773285, "learning_rate": 1.2380549620132955e-05, "loss": 0.0186, "step": 101910 }, { "epoch": 0.7533780787084947, "grad_norm": 0.10148213803768158, "learning_rate": 1.2376839981006647e-05, "loss": 0.0184, "step": 101920 }, { "epoch": 0.7534519972797966, "grad_norm": 0.0633421391248703, "learning_rate": 1.2373130341880343e-05, "loss": 0.0178, "step": 101930 }, { "epoch": 0.7535259158510984, "grad_norm": 0.07311506569385529, "learning_rate": 1.2369420702754036e-05, "loss": 0.0157, "step": 101940 }, { "epoch": 0.7535998344224003, "grad_norm": 0.07114532589912415, "learning_rate": 1.2365711063627732e-05, "loss": 0.0182, "step": 101950 }, { "epoch": 0.7536737529937021, "grad_norm": 0.06623285263776779, "learning_rate": 1.2362001424501425e-05, "loss": 0.0152, "step": 101960 }, { "epoch": 0.753747671565004, "grad_norm": 0.07522322982549667, "learning_rate": 1.2358291785375119e-05, "loss": 0.019, "step": 101970 }, { "epoch": 0.7538215901363059, "grad_norm": 0.11243265122175217, "learning_rate": 1.2354582146248813e-05, "loss": 0.0175, "step": 101980 }, { "epoch": 0.7538955087076077, "grad_norm": 0.08355090767145157, "learning_rate": 1.2350872507122507e-05, "loss": 0.0159, "step": 101990 }, { "epoch": 0.7539694272789096, "grad_norm": 0.07274141907691956, "learning_rate": 1.2347162867996202e-05, "loss": 0.0183, "step": 102000 }, { "epoch": 0.7540433458502114, "grad_norm": 0.09165412187576294, "learning_rate": 1.2343453228869896e-05, "loss": 0.0168, "step": 102010 }, { "epoch": 0.7541172644215133, "grad_norm": 0.09555403888225555, "learning_rate": 1.233974358974359e-05, "loss": 0.0175, "step": 102020 }, { "epoch": 0.7541911829928151, "grad_norm": 0.09564153850078583, "learning_rate": 1.2336033950617284e-05, "loss": 0.0199, "step": 102030 }, { "epoch": 0.754265101564117, "grad_norm": 0.06774450838565826, "learning_rate": 1.2332324311490979e-05, "loss": 0.0149, "step": 102040 }, { "epoch": 0.7543390201354189, "grad_norm": 0.053174346685409546, "learning_rate": 1.2328614672364673e-05, "loss": 0.0143, "step": 102050 }, { "epoch": 0.7544129387067207, "grad_norm": 0.09616374224424362, "learning_rate": 1.2324905033238367e-05, "loss": 0.016, "step": 102060 }, { "epoch": 0.7544868572780226, "grad_norm": 0.11800222098827362, "learning_rate": 1.2321195394112062e-05, "loss": 0.0173, "step": 102070 }, { "epoch": 0.7545607758493243, "grad_norm": 0.0716506615281105, "learning_rate": 1.2317485754985756e-05, "loss": 0.0178, "step": 102080 }, { "epoch": 0.7546346944206263, "grad_norm": 0.06857968121767044, "learning_rate": 1.231377611585945e-05, "loss": 0.0168, "step": 102090 }, { "epoch": 0.754708612991928, "grad_norm": 0.08074000477790833, "learning_rate": 1.2310066476733144e-05, "loss": 0.0166, "step": 102100 }, { "epoch": 0.7547825315632299, "grad_norm": 0.07367126643657684, "learning_rate": 1.2306356837606839e-05, "loss": 0.0167, "step": 102110 }, { "epoch": 0.7548564501345318, "grad_norm": 0.06470063328742981, "learning_rate": 1.2302647198480533e-05, "loss": 0.0166, "step": 102120 }, { "epoch": 0.7549303687058336, "grad_norm": 0.06659591943025589, "learning_rate": 1.2298937559354225e-05, "loss": 0.0172, "step": 102130 }, { "epoch": 0.7550042872771355, "grad_norm": 0.08944061398506165, "learning_rate": 1.2295227920227921e-05, "loss": 0.0153, "step": 102140 }, { "epoch": 0.7550782058484373, "grad_norm": 0.0934378057718277, "learning_rate": 1.2291518281101614e-05, "loss": 0.0163, "step": 102150 }, { "epoch": 0.7551521244197392, "grad_norm": 0.0620492585003376, "learning_rate": 1.228780864197531e-05, "loss": 0.0179, "step": 102160 }, { "epoch": 0.7552260429910411, "grad_norm": 0.0643744096159935, "learning_rate": 1.2284099002849003e-05, "loss": 0.019, "step": 102170 }, { "epoch": 0.7552999615623429, "grad_norm": 0.08223942667245865, "learning_rate": 1.2280389363722699e-05, "loss": 0.0157, "step": 102180 }, { "epoch": 0.7553738801336448, "grad_norm": 0.07553199678659439, "learning_rate": 1.2276679724596391e-05, "loss": 0.0184, "step": 102190 }, { "epoch": 0.7554477987049466, "grad_norm": 0.06578723341226578, "learning_rate": 1.2272970085470085e-05, "loss": 0.0151, "step": 102200 }, { "epoch": 0.7555217172762485, "grad_norm": 0.08422625809907913, "learning_rate": 1.226926044634378e-05, "loss": 0.0184, "step": 102210 }, { "epoch": 0.7555956358475503, "grad_norm": 0.07038059830665588, "learning_rate": 1.2265550807217474e-05, "loss": 0.0167, "step": 102220 }, { "epoch": 0.7556695544188522, "grad_norm": 0.08100122958421707, "learning_rate": 1.226184116809117e-05, "loss": 0.0196, "step": 102230 }, { "epoch": 0.7557434729901541, "grad_norm": 0.07784950733184814, "learning_rate": 1.2258131528964862e-05, "loss": 0.0182, "step": 102240 }, { "epoch": 0.7558173915614559, "grad_norm": 0.08980873972177505, "learning_rate": 1.2254421889838558e-05, "loss": 0.0178, "step": 102250 }, { "epoch": 0.7558913101327578, "grad_norm": 0.07078026235103607, "learning_rate": 1.2250712250712251e-05, "loss": 0.0167, "step": 102260 }, { "epoch": 0.7559652287040596, "grad_norm": 0.0744447410106659, "learning_rate": 1.2247002611585945e-05, "loss": 0.0156, "step": 102270 }, { "epoch": 0.7560391472753615, "grad_norm": 0.07403499633073807, "learning_rate": 1.224329297245964e-05, "loss": 0.0172, "step": 102280 }, { "epoch": 0.7561130658466633, "grad_norm": 0.07184556126594543, "learning_rate": 1.2239583333333334e-05, "loss": 0.0173, "step": 102290 }, { "epoch": 0.7561869844179652, "grad_norm": 0.09416437149047852, "learning_rate": 1.2235873694207028e-05, "loss": 0.0187, "step": 102300 }, { "epoch": 0.7562609029892671, "grad_norm": 0.0748690515756607, "learning_rate": 1.2232164055080722e-05, "loss": 0.0177, "step": 102310 }, { "epoch": 0.7563348215605689, "grad_norm": 0.07672201097011566, "learning_rate": 1.2228454415954417e-05, "loss": 0.0155, "step": 102320 }, { "epoch": 0.7564087401318708, "grad_norm": 0.09029083698987961, "learning_rate": 1.2224744776828111e-05, "loss": 0.0192, "step": 102330 }, { "epoch": 0.7564826587031726, "grad_norm": 0.08144398778676987, "learning_rate": 1.2221035137701805e-05, "loss": 0.0184, "step": 102340 }, { "epoch": 0.7565565772744745, "grad_norm": 0.08030198514461517, "learning_rate": 1.22173254985755e-05, "loss": 0.0168, "step": 102350 }, { "epoch": 0.7566304958457762, "grad_norm": 0.11785607784986496, "learning_rate": 1.2213615859449192e-05, "loss": 0.0193, "step": 102360 }, { "epoch": 0.7567044144170781, "grad_norm": 0.07621311396360397, "learning_rate": 1.2209906220322888e-05, "loss": 0.0162, "step": 102370 }, { "epoch": 0.75677833298838, "grad_norm": 0.050615742802619934, "learning_rate": 1.2206196581196582e-05, "loss": 0.0161, "step": 102380 }, { "epoch": 0.7568522515596818, "grad_norm": 0.08207380771636963, "learning_rate": 1.2202486942070277e-05, "loss": 0.0187, "step": 102390 }, { "epoch": 0.7569261701309837, "grad_norm": 0.06875210255384445, "learning_rate": 1.219877730294397e-05, "loss": 0.0197, "step": 102400 }, { "epoch": 0.7570000887022855, "grad_norm": 0.08055390417575836, "learning_rate": 1.2195067663817665e-05, "loss": 0.0185, "step": 102410 }, { "epoch": 0.7570740072735874, "grad_norm": 0.08164180815219879, "learning_rate": 1.219135802469136e-05, "loss": 0.0162, "step": 102420 }, { "epoch": 0.7571479258448893, "grad_norm": 0.07057615369558334, "learning_rate": 1.2187648385565052e-05, "loss": 0.0178, "step": 102430 }, { "epoch": 0.7572218444161911, "grad_norm": 0.13389045000076294, "learning_rate": 1.2183938746438748e-05, "loss": 0.0169, "step": 102440 }, { "epoch": 0.757295762987493, "grad_norm": 0.0784306675195694, "learning_rate": 1.218022910731244e-05, "loss": 0.016, "step": 102450 }, { "epoch": 0.7573696815587948, "grad_norm": 0.05959833040833473, "learning_rate": 1.2176519468186136e-05, "loss": 0.0161, "step": 102460 }, { "epoch": 0.7574436001300967, "grad_norm": 0.10358747839927673, "learning_rate": 1.2172809829059829e-05, "loss": 0.0198, "step": 102470 }, { "epoch": 0.7575175187013985, "grad_norm": 0.05675870180130005, "learning_rate": 1.2169100189933525e-05, "loss": 0.0175, "step": 102480 }, { "epoch": 0.7575914372727004, "grad_norm": 0.07311985641717911, "learning_rate": 1.2165390550807218e-05, "loss": 0.0175, "step": 102490 }, { "epoch": 0.7576653558440023, "grad_norm": 0.09267310053110123, "learning_rate": 1.2161680911680912e-05, "loss": 0.0161, "step": 102500 }, { "epoch": 0.7577392744153041, "grad_norm": 0.10184731334447861, "learning_rate": 1.2157971272554606e-05, "loss": 0.0155, "step": 102510 }, { "epoch": 0.757813192986606, "grad_norm": 0.08719828724861145, "learning_rate": 1.21542616334283e-05, "loss": 0.0171, "step": 102520 }, { "epoch": 0.7578871115579078, "grad_norm": 0.07820761948823929, "learning_rate": 1.2150551994301995e-05, "loss": 0.0155, "step": 102530 }, { "epoch": 0.7579610301292097, "grad_norm": 0.07845431566238403, "learning_rate": 1.2146842355175689e-05, "loss": 0.0176, "step": 102540 }, { "epoch": 0.7580349487005115, "grad_norm": 0.08127642422914505, "learning_rate": 1.2143132716049383e-05, "loss": 0.0203, "step": 102550 }, { "epoch": 0.7581088672718134, "grad_norm": 0.07473145425319672, "learning_rate": 1.2139423076923077e-05, "loss": 0.0174, "step": 102560 }, { "epoch": 0.7581827858431153, "grad_norm": 0.11189044266939163, "learning_rate": 1.2135713437796772e-05, "loss": 0.0153, "step": 102570 }, { "epoch": 0.7582567044144171, "grad_norm": 0.07234430313110352, "learning_rate": 1.2132003798670466e-05, "loss": 0.018, "step": 102580 }, { "epoch": 0.758330622985719, "grad_norm": 0.09144700318574905, "learning_rate": 1.212829415954416e-05, "loss": 0.0176, "step": 102590 }, { "epoch": 0.7584045415570208, "grad_norm": 0.06276418268680573, "learning_rate": 1.2124584520417855e-05, "loss": 0.0172, "step": 102600 }, { "epoch": 0.7584784601283227, "grad_norm": 0.09209229052066803, "learning_rate": 1.2120874881291549e-05, "loss": 0.0157, "step": 102610 }, { "epoch": 0.7585523786996244, "grad_norm": 0.05703670531511307, "learning_rate": 1.2117165242165243e-05, "loss": 0.0161, "step": 102620 }, { "epoch": 0.7586262972709263, "grad_norm": 0.07058379054069519, "learning_rate": 1.2113455603038937e-05, "loss": 0.0159, "step": 102630 }, { "epoch": 0.7587002158422282, "grad_norm": 0.08190514892339706, "learning_rate": 1.2109745963912632e-05, "loss": 0.0161, "step": 102640 }, { "epoch": 0.75877413441353, "grad_norm": 0.08964069187641144, "learning_rate": 1.2106036324786326e-05, "loss": 0.017, "step": 102650 }, { "epoch": 0.7588480529848319, "grad_norm": 0.07375913113355637, "learning_rate": 1.2102326685660019e-05, "loss": 0.0185, "step": 102660 }, { "epoch": 0.7589219715561337, "grad_norm": 0.07200281322002411, "learning_rate": 1.2098617046533714e-05, "loss": 0.0178, "step": 102670 }, { "epoch": 0.7589958901274356, "grad_norm": 0.08306648582220078, "learning_rate": 1.2094907407407407e-05, "loss": 0.0172, "step": 102680 }, { "epoch": 0.7590698086987375, "grad_norm": 0.07849260419607162, "learning_rate": 1.2091197768281103e-05, "loss": 0.0166, "step": 102690 }, { "epoch": 0.7591437272700393, "grad_norm": 0.06569449603557587, "learning_rate": 1.2087488129154796e-05, "loss": 0.017, "step": 102700 }, { "epoch": 0.7592176458413412, "grad_norm": 0.09783091396093369, "learning_rate": 1.2083778490028492e-05, "loss": 0.0186, "step": 102710 }, { "epoch": 0.759291564412643, "grad_norm": 0.08046203851699829, "learning_rate": 1.2080068850902184e-05, "loss": 0.0184, "step": 102720 }, { "epoch": 0.7593654829839449, "grad_norm": 0.0971355214715004, "learning_rate": 1.2076359211775878e-05, "loss": 0.0201, "step": 102730 }, { "epoch": 0.7594394015552467, "grad_norm": 0.09459102153778076, "learning_rate": 1.2072649572649573e-05, "loss": 0.0183, "step": 102740 }, { "epoch": 0.7595133201265486, "grad_norm": 0.04941713437438011, "learning_rate": 1.2068939933523267e-05, "loss": 0.0199, "step": 102750 }, { "epoch": 0.7595872386978505, "grad_norm": 0.06698715686798096, "learning_rate": 1.2065230294396961e-05, "loss": 0.0162, "step": 102760 }, { "epoch": 0.7596611572691523, "grad_norm": 0.09899577498435974, "learning_rate": 1.2061520655270656e-05, "loss": 0.0184, "step": 102770 }, { "epoch": 0.7597350758404542, "grad_norm": 0.09042756259441376, "learning_rate": 1.205781101614435e-05, "loss": 0.0165, "step": 102780 }, { "epoch": 0.759808994411756, "grad_norm": 0.10201259702444077, "learning_rate": 1.2054101377018044e-05, "loss": 0.0183, "step": 102790 }, { "epoch": 0.7598829129830579, "grad_norm": 0.08615417033433914, "learning_rate": 1.2050391737891738e-05, "loss": 0.0179, "step": 102800 }, { "epoch": 0.7599568315543597, "grad_norm": 0.0700421929359436, "learning_rate": 1.2046682098765433e-05, "loss": 0.0158, "step": 102810 }, { "epoch": 0.7600307501256616, "grad_norm": 0.07872634381055832, "learning_rate": 1.2042972459639127e-05, "loss": 0.0164, "step": 102820 }, { "epoch": 0.7601046686969635, "grad_norm": 0.07595248520374298, "learning_rate": 1.2039262820512821e-05, "loss": 0.0171, "step": 102830 }, { "epoch": 0.7601785872682653, "grad_norm": 0.04081696644425392, "learning_rate": 1.2035553181386515e-05, "loss": 0.0148, "step": 102840 }, { "epoch": 0.7602525058395672, "grad_norm": 0.057837892323732376, "learning_rate": 1.203184354226021e-05, "loss": 0.0164, "step": 102850 }, { "epoch": 0.760326424410869, "grad_norm": 0.09097220748662949, "learning_rate": 1.2028133903133904e-05, "loss": 0.0177, "step": 102860 }, { "epoch": 0.7604003429821709, "grad_norm": 0.08579658716917038, "learning_rate": 1.2024424264007598e-05, "loss": 0.0164, "step": 102870 }, { "epoch": 0.7604742615534726, "grad_norm": 0.0934729054570198, "learning_rate": 1.2020714624881293e-05, "loss": 0.0215, "step": 102880 }, { "epoch": 0.7605481801247745, "grad_norm": 0.06498159468173981, "learning_rate": 1.2017004985754985e-05, "loss": 0.0175, "step": 102890 }, { "epoch": 0.7606220986960764, "grad_norm": 0.11769692599773407, "learning_rate": 1.2013295346628681e-05, "loss": 0.0169, "step": 102900 }, { "epoch": 0.7606960172673782, "grad_norm": 0.09046773612499237, "learning_rate": 1.2009585707502374e-05, "loss": 0.0183, "step": 102910 }, { "epoch": 0.7607699358386801, "grad_norm": 0.08058417588472366, "learning_rate": 1.200587606837607e-05, "loss": 0.018, "step": 102920 }, { "epoch": 0.7608438544099819, "grad_norm": 0.07865067571401596, "learning_rate": 1.2002166429249762e-05, "loss": 0.0147, "step": 102930 }, { "epoch": 0.7609177729812838, "grad_norm": 0.05952581763267517, "learning_rate": 1.1998456790123458e-05, "loss": 0.016, "step": 102940 }, { "epoch": 0.7609916915525857, "grad_norm": 0.12254879623651505, "learning_rate": 1.199474715099715e-05, "loss": 0.0172, "step": 102950 }, { "epoch": 0.7610656101238875, "grad_norm": 0.09980471432209015, "learning_rate": 1.1991037511870845e-05, "loss": 0.0161, "step": 102960 }, { "epoch": 0.7611395286951894, "grad_norm": 0.07723431289196014, "learning_rate": 1.198732787274454e-05, "loss": 0.0165, "step": 102970 }, { "epoch": 0.7612134472664912, "grad_norm": 0.09354310482740402, "learning_rate": 1.1983618233618234e-05, "loss": 0.0199, "step": 102980 }, { "epoch": 0.7612873658377931, "grad_norm": 0.08254075050354004, "learning_rate": 1.197990859449193e-05, "loss": 0.0158, "step": 102990 }, { "epoch": 0.7613612844090949, "grad_norm": 0.05928228050470352, "learning_rate": 1.1976198955365622e-05, "loss": 0.0159, "step": 103000 }, { "epoch": 0.7614352029803968, "grad_norm": 0.08125203102827072, "learning_rate": 1.1972489316239318e-05, "loss": 0.0186, "step": 103010 }, { "epoch": 0.7615091215516987, "grad_norm": 0.07555700093507767, "learning_rate": 1.196877967711301e-05, "loss": 0.0194, "step": 103020 }, { "epoch": 0.7615830401230005, "grad_norm": 0.07443048059940338, "learning_rate": 1.1965070037986707e-05, "loss": 0.0168, "step": 103030 }, { "epoch": 0.7616569586943024, "grad_norm": 0.06883411109447479, "learning_rate": 1.19613603988604e-05, "loss": 0.0168, "step": 103040 }, { "epoch": 0.7617308772656042, "grad_norm": 0.0769045278429985, "learning_rate": 1.1957650759734093e-05, "loss": 0.0164, "step": 103050 }, { "epoch": 0.7618047958369061, "grad_norm": 0.07747851312160492, "learning_rate": 1.1953941120607788e-05, "loss": 0.0161, "step": 103060 }, { "epoch": 0.7618787144082079, "grad_norm": 0.08282840996980667, "learning_rate": 1.1950231481481482e-05, "loss": 0.0167, "step": 103070 }, { "epoch": 0.7619526329795098, "grad_norm": 0.08814528584480286, "learning_rate": 1.1946521842355176e-05, "loss": 0.0168, "step": 103080 }, { "epoch": 0.7620265515508117, "grad_norm": 0.08881138265132904, "learning_rate": 1.194281220322887e-05, "loss": 0.017, "step": 103090 }, { "epoch": 0.7621004701221135, "grad_norm": 0.09396179765462875, "learning_rate": 1.1939102564102565e-05, "loss": 0.0198, "step": 103100 }, { "epoch": 0.7621743886934154, "grad_norm": 0.0843079537153244, "learning_rate": 1.1935392924976259e-05, "loss": 0.0178, "step": 103110 }, { "epoch": 0.7622483072647172, "grad_norm": 0.07248686254024506, "learning_rate": 1.1931683285849952e-05, "loss": 0.0192, "step": 103120 }, { "epoch": 0.762322225836019, "grad_norm": 0.062101028859615326, "learning_rate": 1.1927973646723648e-05, "loss": 0.0152, "step": 103130 }, { "epoch": 0.7623961444073208, "grad_norm": 0.06474128365516663, "learning_rate": 1.192426400759734e-05, "loss": 0.0165, "step": 103140 }, { "epoch": 0.7624700629786227, "grad_norm": 0.06901828944683075, "learning_rate": 1.1920554368471036e-05, "loss": 0.018, "step": 103150 }, { "epoch": 0.7625439815499246, "grad_norm": 0.07394035160541534, "learning_rate": 1.191684472934473e-05, "loss": 0.0138, "step": 103160 }, { "epoch": 0.7626179001212264, "grad_norm": 0.0887165293097496, "learning_rate": 1.1913135090218425e-05, "loss": 0.0164, "step": 103170 }, { "epoch": 0.7626918186925283, "grad_norm": 0.09024112671613693, "learning_rate": 1.1909425451092119e-05, "loss": 0.0211, "step": 103180 }, { "epoch": 0.7627657372638301, "grad_norm": 0.09429723024368286, "learning_rate": 1.1905715811965812e-05, "loss": 0.0197, "step": 103190 }, { "epoch": 0.762839655835132, "grad_norm": 0.056649837642908096, "learning_rate": 1.1902006172839508e-05, "loss": 0.018, "step": 103200 }, { "epoch": 0.7629135744064339, "grad_norm": 0.09306730329990387, "learning_rate": 1.18982965337132e-05, "loss": 0.0166, "step": 103210 }, { "epoch": 0.7629874929777357, "grad_norm": 0.06880921125411987, "learning_rate": 1.1894586894586896e-05, "loss": 0.0164, "step": 103220 }, { "epoch": 0.7630614115490376, "grad_norm": 0.1179075613617897, "learning_rate": 1.1890877255460589e-05, "loss": 0.0166, "step": 103230 }, { "epoch": 0.7631353301203394, "grad_norm": 0.07119959592819214, "learning_rate": 1.1887167616334285e-05, "loss": 0.016, "step": 103240 }, { "epoch": 0.7632092486916413, "grad_norm": 0.06132645159959793, "learning_rate": 1.1883457977207977e-05, "loss": 0.0175, "step": 103250 }, { "epoch": 0.7632831672629431, "grad_norm": 0.11850385367870331, "learning_rate": 1.1879748338081673e-05, "loss": 0.0186, "step": 103260 }, { "epoch": 0.763357085834245, "grad_norm": 0.0714501440525055, "learning_rate": 1.1876038698955366e-05, "loss": 0.017, "step": 103270 }, { "epoch": 0.7634310044055469, "grad_norm": 0.05191829428076744, "learning_rate": 1.187232905982906e-05, "loss": 0.0154, "step": 103280 }, { "epoch": 0.7635049229768487, "grad_norm": 0.08176074177026749, "learning_rate": 1.1868619420702754e-05, "loss": 0.0172, "step": 103290 }, { "epoch": 0.7635788415481506, "grad_norm": 0.11602763831615448, "learning_rate": 1.1864909781576449e-05, "loss": 0.0182, "step": 103300 }, { "epoch": 0.7636527601194524, "grad_norm": 0.06472666561603546, "learning_rate": 1.1861200142450143e-05, "loss": 0.0175, "step": 103310 }, { "epoch": 0.7637266786907543, "grad_norm": 0.06399382650852203, "learning_rate": 1.1857490503323837e-05, "loss": 0.0167, "step": 103320 }, { "epoch": 0.7638005972620561, "grad_norm": 0.06925990432500839, "learning_rate": 1.1853780864197531e-05, "loss": 0.0169, "step": 103330 }, { "epoch": 0.763874515833358, "grad_norm": 0.09379049390554428, "learning_rate": 1.1850071225071226e-05, "loss": 0.0177, "step": 103340 }, { "epoch": 0.7639484344046599, "grad_norm": 0.062449511140584946, "learning_rate": 1.184636158594492e-05, "loss": 0.016, "step": 103350 }, { "epoch": 0.7640223529759617, "grad_norm": 0.07464705407619476, "learning_rate": 1.1842651946818614e-05, "loss": 0.0163, "step": 103360 }, { "epoch": 0.7640962715472636, "grad_norm": 0.06858009099960327, "learning_rate": 1.1838942307692309e-05, "loss": 0.0158, "step": 103370 }, { "epoch": 0.7641701901185654, "grad_norm": 0.08118981122970581, "learning_rate": 1.1835232668566003e-05, "loss": 0.018, "step": 103380 }, { "epoch": 0.7642441086898673, "grad_norm": 0.07128416001796722, "learning_rate": 1.1831523029439697e-05, "loss": 0.0152, "step": 103390 }, { "epoch": 0.764318027261169, "grad_norm": 0.0697440430521965, "learning_rate": 1.1827813390313391e-05, "loss": 0.0148, "step": 103400 }, { "epoch": 0.7643919458324709, "grad_norm": 0.05445777252316475, "learning_rate": 1.1824103751187086e-05, "loss": 0.0154, "step": 103410 }, { "epoch": 0.7644658644037728, "grad_norm": 0.07951023429632187, "learning_rate": 1.1820394112060778e-05, "loss": 0.0181, "step": 103420 }, { "epoch": 0.7645397829750746, "grad_norm": 0.06549987196922302, "learning_rate": 1.1816684472934474e-05, "loss": 0.0159, "step": 103430 }, { "epoch": 0.7646137015463765, "grad_norm": 0.0658888965845108, "learning_rate": 1.1812974833808167e-05, "loss": 0.0173, "step": 103440 }, { "epoch": 0.7646876201176783, "grad_norm": 0.07832251489162445, "learning_rate": 1.1809265194681863e-05, "loss": 0.0167, "step": 103450 }, { "epoch": 0.7647615386889802, "grad_norm": 0.08702033013105392, "learning_rate": 1.1805555555555555e-05, "loss": 0.0185, "step": 103460 }, { "epoch": 0.7648354572602821, "grad_norm": 0.0707923099398613, "learning_rate": 1.1801845916429251e-05, "loss": 0.017, "step": 103470 }, { "epoch": 0.7649093758315839, "grad_norm": 0.08582896739244461, "learning_rate": 1.1798136277302944e-05, "loss": 0.02, "step": 103480 }, { "epoch": 0.7649832944028858, "grad_norm": 0.09733167290687561, "learning_rate": 1.179442663817664e-05, "loss": 0.0179, "step": 103490 }, { "epoch": 0.7650572129741876, "grad_norm": 0.0878724455833435, "learning_rate": 1.1790716999050332e-05, "loss": 0.0176, "step": 103500 }, { "epoch": 0.7651311315454895, "grad_norm": 0.09843181073665619, "learning_rate": 1.1787007359924027e-05, "loss": 0.0159, "step": 103510 }, { "epoch": 0.7652050501167913, "grad_norm": 0.07592066377401352, "learning_rate": 1.1783297720797721e-05, "loss": 0.017, "step": 103520 }, { "epoch": 0.7652789686880932, "grad_norm": 0.06643280386924744, "learning_rate": 1.1779588081671415e-05, "loss": 0.0183, "step": 103530 }, { "epoch": 0.7653528872593951, "grad_norm": 0.07768280804157257, "learning_rate": 1.177587844254511e-05, "loss": 0.0173, "step": 103540 }, { "epoch": 0.7654268058306969, "grad_norm": 0.06870684027671814, "learning_rate": 1.1772168803418804e-05, "loss": 0.0175, "step": 103550 }, { "epoch": 0.7655007244019988, "grad_norm": 0.0949084684252739, "learning_rate": 1.1768459164292498e-05, "loss": 0.0165, "step": 103560 }, { "epoch": 0.7655746429733006, "grad_norm": 0.11342213302850723, "learning_rate": 1.1764749525166192e-05, "loss": 0.019, "step": 103570 }, { "epoch": 0.7656485615446025, "grad_norm": 0.09305454790592194, "learning_rate": 1.1761039886039887e-05, "loss": 0.0189, "step": 103580 }, { "epoch": 0.7657224801159043, "grad_norm": 0.05589432269334793, "learning_rate": 1.175733024691358e-05, "loss": 0.0168, "step": 103590 }, { "epoch": 0.7657963986872062, "grad_norm": 0.05523810163140297, "learning_rate": 1.1753620607787275e-05, "loss": 0.0189, "step": 103600 }, { "epoch": 0.7658703172585081, "grad_norm": 0.08939807116985321, "learning_rate": 1.174991096866097e-05, "loss": 0.0182, "step": 103610 }, { "epoch": 0.7659442358298099, "grad_norm": 0.057942889630794525, "learning_rate": 1.1746201329534664e-05, "loss": 0.0155, "step": 103620 }, { "epoch": 0.7660181544011118, "grad_norm": 0.07429220527410507, "learning_rate": 1.1742491690408358e-05, "loss": 0.018, "step": 103630 }, { "epoch": 0.7660920729724136, "grad_norm": 0.06361553817987442, "learning_rate": 1.1738782051282052e-05, "loss": 0.0157, "step": 103640 }, { "epoch": 0.7661659915437155, "grad_norm": 0.10519684106111526, "learning_rate": 1.1735072412155745e-05, "loss": 0.0166, "step": 103650 }, { "epoch": 0.7662399101150172, "grad_norm": 0.09359843283891678, "learning_rate": 1.173136277302944e-05, "loss": 0.0171, "step": 103660 }, { "epoch": 0.7663138286863191, "grad_norm": 0.08676464110612869, "learning_rate": 1.1727653133903133e-05, "loss": 0.0188, "step": 103670 }, { "epoch": 0.766387747257621, "grad_norm": 0.07525712251663208, "learning_rate": 1.172394349477683e-05, "loss": 0.0175, "step": 103680 }, { "epoch": 0.7664616658289228, "grad_norm": 0.05233407020568848, "learning_rate": 1.1720233855650522e-05, "loss": 0.0173, "step": 103690 }, { "epoch": 0.7665355844002247, "grad_norm": 0.07339996844530106, "learning_rate": 1.1716524216524218e-05, "loss": 0.0176, "step": 103700 }, { "epoch": 0.7666095029715265, "grad_norm": 0.07441697269678116, "learning_rate": 1.171281457739791e-05, "loss": 0.0182, "step": 103710 }, { "epoch": 0.7666834215428284, "grad_norm": 0.06306453794240952, "learning_rate": 1.1709104938271606e-05, "loss": 0.0176, "step": 103720 }, { "epoch": 0.7667573401141303, "grad_norm": 0.06008174642920494, "learning_rate": 1.1705395299145299e-05, "loss": 0.0188, "step": 103730 }, { "epoch": 0.7668312586854321, "grad_norm": 0.05986854434013367, "learning_rate": 1.1701685660018993e-05, "loss": 0.0175, "step": 103740 }, { "epoch": 0.766905177256734, "grad_norm": 0.0892731100320816, "learning_rate": 1.169797602089269e-05, "loss": 0.0175, "step": 103750 }, { "epoch": 0.7669790958280358, "grad_norm": 0.060458648949861526, "learning_rate": 1.1694266381766382e-05, "loss": 0.016, "step": 103760 }, { "epoch": 0.7670530143993377, "grad_norm": 0.059939801692962646, "learning_rate": 1.1690556742640078e-05, "loss": 0.0172, "step": 103770 }, { "epoch": 0.7671269329706395, "grad_norm": 0.08211683481931686, "learning_rate": 1.168684710351377e-05, "loss": 0.0149, "step": 103780 }, { "epoch": 0.7672008515419414, "grad_norm": 0.08790618181228638, "learning_rate": 1.1683137464387466e-05, "loss": 0.0171, "step": 103790 }, { "epoch": 0.7672747701132433, "grad_norm": 0.07280895859003067, "learning_rate": 1.1679427825261159e-05, "loss": 0.0156, "step": 103800 }, { "epoch": 0.7673486886845451, "grad_norm": 0.07520013302564621, "learning_rate": 1.1675718186134853e-05, "loss": 0.0179, "step": 103810 }, { "epoch": 0.767422607255847, "grad_norm": 0.0693606436252594, "learning_rate": 1.1672008547008547e-05, "loss": 0.0215, "step": 103820 }, { "epoch": 0.7674965258271488, "grad_norm": 0.08086995780467987, "learning_rate": 1.1668298907882242e-05, "loss": 0.018, "step": 103830 }, { "epoch": 0.7675704443984507, "grad_norm": 0.09201215952634811, "learning_rate": 1.1664589268755936e-05, "loss": 0.0164, "step": 103840 }, { "epoch": 0.7676443629697525, "grad_norm": 0.09462850540876389, "learning_rate": 1.166087962962963e-05, "loss": 0.0188, "step": 103850 }, { "epoch": 0.7677182815410544, "grad_norm": 0.05690096318721771, "learning_rate": 1.1657169990503324e-05, "loss": 0.0169, "step": 103860 }, { "epoch": 0.7677922001123563, "grad_norm": 0.09154248982667923, "learning_rate": 1.1653460351377019e-05, "loss": 0.0184, "step": 103870 }, { "epoch": 0.7678661186836581, "grad_norm": 0.09101495146751404, "learning_rate": 1.1649750712250711e-05, "loss": 0.019, "step": 103880 }, { "epoch": 0.76794003725496, "grad_norm": 0.1021990031003952, "learning_rate": 1.1646041073124407e-05, "loss": 0.0186, "step": 103890 }, { "epoch": 0.7680139558262618, "grad_norm": 0.08415243029594421, "learning_rate": 1.16423314339981e-05, "loss": 0.0177, "step": 103900 }, { "epoch": 0.7680878743975637, "grad_norm": 0.07058227062225342, "learning_rate": 1.1638621794871796e-05, "loss": 0.0174, "step": 103910 }, { "epoch": 0.7681617929688656, "grad_norm": 0.09863097965717316, "learning_rate": 1.163491215574549e-05, "loss": 0.0185, "step": 103920 }, { "epoch": 0.7682357115401673, "grad_norm": 0.06945838034152985, "learning_rate": 1.1631202516619184e-05, "loss": 0.0173, "step": 103930 }, { "epoch": 0.7683096301114692, "grad_norm": 0.09523185342550278, "learning_rate": 1.1627492877492879e-05, "loss": 0.0195, "step": 103940 }, { "epoch": 0.768383548682771, "grad_norm": 0.08189336955547333, "learning_rate": 1.1623783238366573e-05, "loss": 0.0149, "step": 103950 }, { "epoch": 0.7684574672540729, "grad_norm": 0.07451499253511429, "learning_rate": 1.1620073599240267e-05, "loss": 0.0172, "step": 103960 }, { "epoch": 0.7685313858253747, "grad_norm": 0.1129722073674202, "learning_rate": 1.161636396011396e-05, "loss": 0.0171, "step": 103970 }, { "epoch": 0.7686053043966766, "grad_norm": 0.08089788258075714, "learning_rate": 1.1612654320987656e-05, "loss": 0.0165, "step": 103980 }, { "epoch": 0.7686792229679785, "grad_norm": 0.05218294635415077, "learning_rate": 1.1608944681861348e-05, "loss": 0.0157, "step": 103990 }, { "epoch": 0.7687531415392803, "grad_norm": 0.08422937244176865, "learning_rate": 1.1605235042735044e-05, "loss": 0.0165, "step": 104000 }, { "epoch": 0.7688270601105822, "grad_norm": 0.09455084800720215, "learning_rate": 1.1601525403608737e-05, "loss": 0.0162, "step": 104010 }, { "epoch": 0.768900978681884, "grad_norm": 0.1238221526145935, "learning_rate": 1.1597815764482433e-05, "loss": 0.0193, "step": 104020 }, { "epoch": 0.7689748972531859, "grad_norm": 0.06627760827541351, "learning_rate": 1.1594106125356125e-05, "loss": 0.0166, "step": 104030 }, { "epoch": 0.7690488158244877, "grad_norm": 0.06207331269979477, "learning_rate": 1.159039648622982e-05, "loss": 0.018, "step": 104040 }, { "epoch": 0.7691227343957896, "grad_norm": 0.09232545644044876, "learning_rate": 1.1586686847103514e-05, "loss": 0.0161, "step": 104050 }, { "epoch": 0.7691966529670915, "grad_norm": 0.06648951768875122, "learning_rate": 1.1582977207977208e-05, "loss": 0.019, "step": 104060 }, { "epoch": 0.7692705715383933, "grad_norm": 0.06869245320558548, "learning_rate": 1.1579267568850903e-05, "loss": 0.0171, "step": 104070 }, { "epoch": 0.7693444901096952, "grad_norm": 0.09643948078155518, "learning_rate": 1.1575557929724597e-05, "loss": 0.0186, "step": 104080 }, { "epoch": 0.769418408680997, "grad_norm": 0.09167290478944778, "learning_rate": 1.1571848290598291e-05, "loss": 0.0213, "step": 104090 }, { "epoch": 0.7694923272522989, "grad_norm": 0.06981848180294037, "learning_rate": 1.1568138651471985e-05, "loss": 0.0175, "step": 104100 }, { "epoch": 0.7695662458236007, "grad_norm": 0.062315262854099274, "learning_rate": 1.156442901234568e-05, "loss": 0.018, "step": 104110 }, { "epoch": 0.7696401643949026, "grad_norm": 0.06809121370315552, "learning_rate": 1.1560719373219374e-05, "loss": 0.0164, "step": 104120 }, { "epoch": 0.7697140829662045, "grad_norm": 0.13859310746192932, "learning_rate": 1.1557009734093068e-05, "loss": 0.0196, "step": 104130 }, { "epoch": 0.7697880015375063, "grad_norm": 0.12869401276111603, "learning_rate": 1.1553300094966762e-05, "loss": 0.0178, "step": 104140 }, { "epoch": 0.7698619201088082, "grad_norm": 0.07242847234010696, "learning_rate": 1.1549590455840457e-05, "loss": 0.0178, "step": 104150 }, { "epoch": 0.76993583868011, "grad_norm": 0.11106471717357635, "learning_rate": 1.1545880816714151e-05, "loss": 0.0165, "step": 104160 }, { "epoch": 0.7700097572514119, "grad_norm": 0.08795207738876343, "learning_rate": 1.1542171177587845e-05, "loss": 0.0173, "step": 104170 }, { "epoch": 0.7700836758227138, "grad_norm": 0.05803506821393967, "learning_rate": 1.153846153846154e-05, "loss": 0.0172, "step": 104180 }, { "epoch": 0.7701575943940155, "grad_norm": 0.08186095952987671, "learning_rate": 1.1534751899335234e-05, "loss": 0.0172, "step": 104190 }, { "epoch": 0.7702315129653174, "grad_norm": 0.09746869653463364, "learning_rate": 1.1531042260208926e-05, "loss": 0.0194, "step": 104200 }, { "epoch": 0.7703054315366192, "grad_norm": 0.07588176429271698, "learning_rate": 1.1527332621082622e-05, "loss": 0.0173, "step": 104210 }, { "epoch": 0.7703793501079211, "grad_norm": 0.10306576639413834, "learning_rate": 1.1523622981956315e-05, "loss": 0.017, "step": 104220 }, { "epoch": 0.7704532686792229, "grad_norm": 0.10408895462751389, "learning_rate": 1.1519913342830011e-05, "loss": 0.0187, "step": 104230 }, { "epoch": 0.7705271872505248, "grad_norm": 0.05544160306453705, "learning_rate": 1.1516203703703703e-05, "loss": 0.0169, "step": 104240 }, { "epoch": 0.7706011058218267, "grad_norm": 0.06006123498082161, "learning_rate": 1.15124940645774e-05, "loss": 0.0166, "step": 104250 }, { "epoch": 0.7706750243931285, "grad_norm": 0.07598260790109634, "learning_rate": 1.1508784425451092e-05, "loss": 0.0161, "step": 104260 }, { "epoch": 0.7707489429644304, "grad_norm": 0.07916712760925293, "learning_rate": 1.1505074786324786e-05, "loss": 0.0177, "step": 104270 }, { "epoch": 0.7708228615357322, "grad_norm": 0.06070020794868469, "learning_rate": 1.150136514719848e-05, "loss": 0.0183, "step": 104280 }, { "epoch": 0.7708967801070341, "grad_norm": 0.07659520953893661, "learning_rate": 1.1497655508072175e-05, "loss": 0.019, "step": 104290 }, { "epoch": 0.7709706986783359, "grad_norm": 0.1022806465625763, "learning_rate": 1.1493945868945869e-05, "loss": 0.019, "step": 104300 }, { "epoch": 0.7710446172496378, "grad_norm": 0.07769617438316345, "learning_rate": 1.1490236229819563e-05, "loss": 0.0154, "step": 104310 }, { "epoch": 0.7711185358209397, "grad_norm": 0.08433184027671814, "learning_rate": 1.1486526590693258e-05, "loss": 0.0173, "step": 104320 }, { "epoch": 0.7711924543922415, "grad_norm": 0.08329308032989502, "learning_rate": 1.1482816951566952e-05, "loss": 0.0169, "step": 104330 }, { "epoch": 0.7712663729635434, "grad_norm": 0.08364029228687286, "learning_rate": 1.1479107312440646e-05, "loss": 0.0163, "step": 104340 }, { "epoch": 0.7713402915348452, "grad_norm": 0.10364638268947601, "learning_rate": 1.147539767331434e-05, "loss": 0.0169, "step": 104350 }, { "epoch": 0.7714142101061471, "grad_norm": 0.08816733956336975, "learning_rate": 1.1471688034188035e-05, "loss": 0.0154, "step": 104360 }, { "epoch": 0.7714881286774489, "grad_norm": 0.07040822505950928, "learning_rate": 1.1467978395061729e-05, "loss": 0.0154, "step": 104370 }, { "epoch": 0.7715620472487508, "grad_norm": 0.06682794541120529, "learning_rate": 1.1464268755935423e-05, "loss": 0.0163, "step": 104380 }, { "epoch": 0.7716359658200527, "grad_norm": 0.12376798689365387, "learning_rate": 1.1460559116809118e-05, "loss": 0.0168, "step": 104390 }, { "epoch": 0.7717098843913545, "grad_norm": 0.09192962199449539, "learning_rate": 1.1456849477682812e-05, "loss": 0.017, "step": 104400 }, { "epoch": 0.7717838029626564, "grad_norm": 0.09359622001647949, "learning_rate": 1.1453139838556506e-05, "loss": 0.0187, "step": 104410 }, { "epoch": 0.7718577215339582, "grad_norm": 0.08074385672807693, "learning_rate": 1.14494301994302e-05, "loss": 0.0169, "step": 104420 }, { "epoch": 0.77193164010526, "grad_norm": 0.0634748712182045, "learning_rate": 1.1445720560303893e-05, "loss": 0.0175, "step": 104430 }, { "epoch": 0.772005558676562, "grad_norm": 0.07420934736728668, "learning_rate": 1.1442010921177589e-05, "loss": 0.0211, "step": 104440 }, { "epoch": 0.7720794772478637, "grad_norm": 0.07205003499984741, "learning_rate": 1.1438301282051282e-05, "loss": 0.0177, "step": 104450 }, { "epoch": 0.7721533958191656, "grad_norm": 0.09320352226495743, "learning_rate": 1.1434591642924977e-05, "loss": 0.0169, "step": 104460 }, { "epoch": 0.7722273143904674, "grad_norm": 0.06319659948348999, "learning_rate": 1.143088200379867e-05, "loss": 0.0156, "step": 104470 }, { "epoch": 0.7723012329617693, "grad_norm": 0.05548546463251114, "learning_rate": 1.1427172364672366e-05, "loss": 0.0166, "step": 104480 }, { "epoch": 0.7723751515330711, "grad_norm": 0.08645886182785034, "learning_rate": 1.1423462725546059e-05, "loss": 0.0167, "step": 104490 }, { "epoch": 0.772449070104373, "grad_norm": 0.10371587425470352, "learning_rate": 1.1419753086419753e-05, "loss": 0.0183, "step": 104500 }, { "epoch": 0.7725229886756749, "grad_norm": 0.07877830415964127, "learning_rate": 1.1416043447293447e-05, "loss": 0.0179, "step": 104510 }, { "epoch": 0.7725969072469767, "grad_norm": 0.07617280632257462, "learning_rate": 1.1412333808167141e-05, "loss": 0.0182, "step": 104520 }, { "epoch": 0.7726708258182786, "grad_norm": 0.06728853285312653, "learning_rate": 1.1408624169040837e-05, "loss": 0.0191, "step": 104530 }, { "epoch": 0.7727447443895804, "grad_norm": 0.07350888848304749, "learning_rate": 1.140491452991453e-05, "loss": 0.0171, "step": 104540 }, { "epoch": 0.7728186629608823, "grad_norm": 0.07082013785839081, "learning_rate": 1.1401204890788226e-05, "loss": 0.0214, "step": 104550 }, { "epoch": 0.7728925815321841, "grad_norm": 0.09921420365571976, "learning_rate": 1.1397495251661919e-05, "loss": 0.0184, "step": 104560 }, { "epoch": 0.772966500103486, "grad_norm": 0.058607906103134155, "learning_rate": 1.1393785612535614e-05, "loss": 0.0177, "step": 104570 }, { "epoch": 0.7730404186747879, "grad_norm": 0.06936301290988922, "learning_rate": 1.1390075973409307e-05, "loss": 0.018, "step": 104580 }, { "epoch": 0.7731143372460897, "grad_norm": 0.07501678913831711, "learning_rate": 1.1386366334283001e-05, "loss": 0.0168, "step": 104590 }, { "epoch": 0.7731882558173916, "grad_norm": 0.08128910511732101, "learning_rate": 1.1382656695156696e-05, "loss": 0.0176, "step": 104600 }, { "epoch": 0.7732621743886934, "grad_norm": 0.09496302157640457, "learning_rate": 1.137894705603039e-05, "loss": 0.0149, "step": 104610 }, { "epoch": 0.7733360929599953, "grad_norm": 0.0731099471449852, "learning_rate": 1.1375237416904084e-05, "loss": 0.0165, "step": 104620 }, { "epoch": 0.7734100115312971, "grad_norm": 0.09244808554649353, "learning_rate": 1.1371527777777778e-05, "loss": 0.018, "step": 104630 }, { "epoch": 0.773483930102599, "grad_norm": 0.10306263715028763, "learning_rate": 1.1367818138651473e-05, "loss": 0.0182, "step": 104640 }, { "epoch": 0.7735578486739009, "grad_norm": 0.06824192404747009, "learning_rate": 1.1364108499525167e-05, "loss": 0.0176, "step": 104650 }, { "epoch": 0.7736317672452027, "grad_norm": 0.07538828998804092, "learning_rate": 1.136039886039886e-05, "loss": 0.0166, "step": 104660 }, { "epoch": 0.7737056858165046, "grad_norm": 0.06246098130941391, "learning_rate": 1.1356689221272556e-05, "loss": 0.0171, "step": 104670 }, { "epoch": 0.7737796043878064, "grad_norm": 0.09654530882835388, "learning_rate": 1.135297958214625e-05, "loss": 0.0166, "step": 104680 }, { "epoch": 0.7738535229591083, "grad_norm": 0.06581386178731918, "learning_rate": 1.1349269943019944e-05, "loss": 0.017, "step": 104690 }, { "epoch": 0.7739274415304102, "grad_norm": 0.06344723701477051, "learning_rate": 1.1345560303893638e-05, "loss": 0.0165, "step": 104700 }, { "epoch": 0.7740013601017119, "grad_norm": 0.07966010272502899, "learning_rate": 1.1341850664767333e-05, "loss": 0.0164, "step": 104710 }, { "epoch": 0.7740752786730138, "grad_norm": 0.07721570134162903, "learning_rate": 1.1338141025641027e-05, "loss": 0.0158, "step": 104720 }, { "epoch": 0.7741491972443156, "grad_norm": 0.07869109511375427, "learning_rate": 1.133443138651472e-05, "loss": 0.0159, "step": 104730 }, { "epoch": 0.7742231158156175, "grad_norm": 0.07940968871116638, "learning_rate": 1.1330721747388415e-05, "loss": 0.0171, "step": 104740 }, { "epoch": 0.7742970343869193, "grad_norm": 0.05805268883705139, "learning_rate": 1.1327012108262108e-05, "loss": 0.017, "step": 104750 }, { "epoch": 0.7743709529582212, "grad_norm": 0.09983490407466888, "learning_rate": 1.1323302469135804e-05, "loss": 0.0156, "step": 104760 }, { "epoch": 0.7744448715295231, "grad_norm": 0.09040165692567825, "learning_rate": 1.1319592830009497e-05, "loss": 0.0178, "step": 104770 }, { "epoch": 0.7745187901008249, "grad_norm": 0.08260339498519897, "learning_rate": 1.1315883190883193e-05, "loss": 0.017, "step": 104780 }, { "epoch": 0.7745927086721268, "grad_norm": 0.09427991509437561, "learning_rate": 1.1312173551756885e-05, "loss": 0.0208, "step": 104790 }, { "epoch": 0.7746666272434286, "grad_norm": 0.07443516701459885, "learning_rate": 1.1308463912630581e-05, "loss": 0.0196, "step": 104800 }, { "epoch": 0.7747405458147305, "grad_norm": 0.06800392270088196, "learning_rate": 1.1304754273504274e-05, "loss": 0.0178, "step": 104810 }, { "epoch": 0.7748144643860323, "grad_norm": 0.060037799179553986, "learning_rate": 1.1301044634377968e-05, "loss": 0.0176, "step": 104820 }, { "epoch": 0.7748883829573342, "grad_norm": 0.0839676484465599, "learning_rate": 1.1297334995251662e-05, "loss": 0.0161, "step": 104830 }, { "epoch": 0.7749623015286361, "grad_norm": 0.05880623683333397, "learning_rate": 1.1293625356125356e-05, "loss": 0.0164, "step": 104840 }, { "epoch": 0.7750362200999379, "grad_norm": 0.0863754004240036, "learning_rate": 1.128991571699905e-05, "loss": 0.0153, "step": 104850 }, { "epoch": 0.7751101386712398, "grad_norm": 0.08357366174459457, "learning_rate": 1.1286206077872745e-05, "loss": 0.0176, "step": 104860 }, { "epoch": 0.7751840572425416, "grad_norm": 0.0565825030207634, "learning_rate": 1.128249643874644e-05, "loss": 0.0157, "step": 104870 }, { "epoch": 0.7752579758138435, "grad_norm": 0.061214566230773926, "learning_rate": 1.1278786799620134e-05, "loss": 0.0171, "step": 104880 }, { "epoch": 0.7753318943851453, "grad_norm": 0.10586986690759659, "learning_rate": 1.1275077160493828e-05, "loss": 0.0171, "step": 104890 }, { "epoch": 0.7754058129564472, "grad_norm": 0.08357368409633636, "learning_rate": 1.1271367521367522e-05, "loss": 0.016, "step": 104900 }, { "epoch": 0.7754797315277491, "grad_norm": 0.09002059698104858, "learning_rate": 1.1267657882241216e-05, "loss": 0.0178, "step": 104910 }, { "epoch": 0.7755536500990509, "grad_norm": 0.06980524212121964, "learning_rate": 1.126394824311491e-05, "loss": 0.018, "step": 104920 }, { "epoch": 0.7756275686703528, "grad_norm": 0.07796944677829742, "learning_rate": 1.1260238603988605e-05, "loss": 0.0176, "step": 104930 }, { "epoch": 0.7757014872416546, "grad_norm": 0.053704190999269485, "learning_rate": 1.12565289648623e-05, "loss": 0.0148, "step": 104940 }, { "epoch": 0.7757754058129565, "grad_norm": 0.06982135772705078, "learning_rate": 1.1252819325735993e-05, "loss": 0.0183, "step": 104950 }, { "epoch": 0.7758493243842584, "grad_norm": 0.07299453765153885, "learning_rate": 1.1249109686609686e-05, "loss": 0.0167, "step": 104960 }, { "epoch": 0.7759232429555601, "grad_norm": 0.10618864744901657, "learning_rate": 1.1245400047483382e-05, "loss": 0.0183, "step": 104970 }, { "epoch": 0.775997161526862, "grad_norm": 0.08143940567970276, "learning_rate": 1.1241690408357075e-05, "loss": 0.0186, "step": 104980 }, { "epoch": 0.7760710800981638, "grad_norm": 0.06453964114189148, "learning_rate": 1.123798076923077e-05, "loss": 0.0172, "step": 104990 }, { "epoch": 0.7761449986694657, "grad_norm": 0.08334316313266754, "learning_rate": 1.1234271130104463e-05, "loss": 0.0174, "step": 105000 }, { "epoch": 0.7762189172407675, "grad_norm": 0.06593699008226395, "learning_rate": 1.1230561490978159e-05, "loss": 0.0171, "step": 105010 }, { "epoch": 0.7762928358120694, "grad_norm": 0.0909162312746048, "learning_rate": 1.1226851851851852e-05, "loss": 0.0169, "step": 105020 }, { "epoch": 0.7763667543833713, "grad_norm": 0.07157652080059052, "learning_rate": 1.1223142212725548e-05, "loss": 0.0168, "step": 105030 }, { "epoch": 0.7764406729546731, "grad_norm": 0.08083537966012955, "learning_rate": 1.121943257359924e-05, "loss": 0.0161, "step": 105040 }, { "epoch": 0.776514591525975, "grad_norm": 0.0855388268828392, "learning_rate": 1.1215722934472935e-05, "loss": 0.0166, "step": 105050 }, { "epoch": 0.7765885100972768, "grad_norm": 0.06755349785089493, "learning_rate": 1.1212013295346629e-05, "loss": 0.0164, "step": 105060 }, { "epoch": 0.7766624286685787, "grad_norm": 0.07067691534757614, "learning_rate": 1.1208303656220323e-05, "loss": 0.014, "step": 105070 }, { "epoch": 0.7767363472398805, "grad_norm": 0.11115356534719467, "learning_rate": 1.1204594017094017e-05, "loss": 0.0155, "step": 105080 }, { "epoch": 0.7768102658111824, "grad_norm": 0.05139143392443657, "learning_rate": 1.1200884377967712e-05, "loss": 0.0155, "step": 105090 }, { "epoch": 0.7768841843824843, "grad_norm": 0.08463030308485031, "learning_rate": 1.1197174738841406e-05, "loss": 0.0169, "step": 105100 }, { "epoch": 0.7769581029537861, "grad_norm": 0.06260386109352112, "learning_rate": 1.11934650997151e-05, "loss": 0.0169, "step": 105110 }, { "epoch": 0.777032021525088, "grad_norm": 0.06074802950024605, "learning_rate": 1.1189755460588794e-05, "loss": 0.0175, "step": 105120 }, { "epoch": 0.7771059400963898, "grad_norm": 0.06441828608512878, "learning_rate": 1.1186045821462489e-05, "loss": 0.0202, "step": 105130 }, { "epoch": 0.7771798586676917, "grad_norm": 0.07338294386863708, "learning_rate": 1.1182336182336183e-05, "loss": 0.0157, "step": 105140 }, { "epoch": 0.7772537772389935, "grad_norm": 0.07631634920835495, "learning_rate": 1.1178626543209877e-05, "loss": 0.0182, "step": 105150 }, { "epoch": 0.7773276958102954, "grad_norm": 0.08145643770694733, "learning_rate": 1.1174916904083572e-05, "loss": 0.0182, "step": 105160 }, { "epoch": 0.7774016143815973, "grad_norm": 0.07141675055027008, "learning_rate": 1.1171207264957266e-05, "loss": 0.0182, "step": 105170 }, { "epoch": 0.7774755329528991, "grad_norm": 0.14300119876861572, "learning_rate": 1.116749762583096e-05, "loss": 0.0201, "step": 105180 }, { "epoch": 0.777549451524201, "grad_norm": 0.07788577675819397, "learning_rate": 1.1163787986704653e-05, "loss": 0.0173, "step": 105190 }, { "epoch": 0.7776233700955028, "grad_norm": 0.056850410997867584, "learning_rate": 1.1160078347578349e-05, "loss": 0.0169, "step": 105200 }, { "epoch": 0.7776972886668047, "grad_norm": 0.08336438238620758, "learning_rate": 1.1156368708452041e-05, "loss": 0.0174, "step": 105210 }, { "epoch": 0.7777712072381066, "grad_norm": 0.10761765390634537, "learning_rate": 1.1152659069325737e-05, "loss": 0.0186, "step": 105220 }, { "epoch": 0.7778451258094083, "grad_norm": 0.07789470255374908, "learning_rate": 1.114894943019943e-05, "loss": 0.0158, "step": 105230 }, { "epoch": 0.7779190443807102, "grad_norm": 0.0754820704460144, "learning_rate": 1.1145239791073126e-05, "loss": 0.0189, "step": 105240 }, { "epoch": 0.777992962952012, "grad_norm": 0.06808315217494965, "learning_rate": 1.1141530151946818e-05, "loss": 0.0175, "step": 105250 }, { "epoch": 0.7780668815233139, "grad_norm": 0.06405656784772873, "learning_rate": 1.1137820512820514e-05, "loss": 0.0186, "step": 105260 }, { "epoch": 0.7781408000946157, "grad_norm": 0.06838709115982056, "learning_rate": 1.1134110873694207e-05, "loss": 0.0167, "step": 105270 }, { "epoch": 0.7782147186659176, "grad_norm": 0.09492330998182297, "learning_rate": 1.1130401234567901e-05, "loss": 0.0184, "step": 105280 }, { "epoch": 0.7782886372372195, "grad_norm": 0.05190137028694153, "learning_rate": 1.1126691595441597e-05, "loss": 0.0162, "step": 105290 }, { "epoch": 0.7783625558085213, "grad_norm": 0.106864333152771, "learning_rate": 1.112298195631529e-05, "loss": 0.0168, "step": 105300 }, { "epoch": 0.7784364743798232, "grad_norm": 0.0951375812292099, "learning_rate": 1.1119272317188986e-05, "loss": 0.0157, "step": 105310 }, { "epoch": 0.778510392951125, "grad_norm": 0.06838471442461014, "learning_rate": 1.1115562678062678e-05, "loss": 0.0172, "step": 105320 }, { "epoch": 0.7785843115224269, "grad_norm": 0.09433237463235855, "learning_rate": 1.1111853038936374e-05, "loss": 0.0182, "step": 105330 }, { "epoch": 0.7786582300937287, "grad_norm": 0.0777023509144783, "learning_rate": 1.1108143399810067e-05, "loss": 0.017, "step": 105340 }, { "epoch": 0.7787321486650306, "grad_norm": 0.10564722865819931, "learning_rate": 1.1104433760683761e-05, "loss": 0.0186, "step": 105350 }, { "epoch": 0.7788060672363325, "grad_norm": 0.09359659254550934, "learning_rate": 1.1100724121557455e-05, "loss": 0.0193, "step": 105360 }, { "epoch": 0.7788799858076343, "grad_norm": 0.06303531676530838, "learning_rate": 1.109701448243115e-05, "loss": 0.0158, "step": 105370 }, { "epoch": 0.7789539043789362, "grad_norm": 0.07465647161006927, "learning_rate": 1.1093304843304844e-05, "loss": 0.0165, "step": 105380 }, { "epoch": 0.779027822950238, "grad_norm": 0.08835309743881226, "learning_rate": 1.1089595204178538e-05, "loss": 0.0161, "step": 105390 }, { "epoch": 0.7791017415215399, "grad_norm": 0.0837300568819046, "learning_rate": 1.1085885565052232e-05, "loss": 0.0167, "step": 105400 }, { "epoch": 0.7791756600928417, "grad_norm": 0.06378339231014252, "learning_rate": 1.1082175925925927e-05, "loss": 0.0173, "step": 105410 }, { "epoch": 0.7792495786641436, "grad_norm": 0.08560236543416977, "learning_rate": 1.107846628679962e-05, "loss": 0.0198, "step": 105420 }, { "epoch": 0.7793234972354455, "grad_norm": 0.055065080523490906, "learning_rate": 1.1074756647673315e-05, "loss": 0.016, "step": 105430 }, { "epoch": 0.7793974158067473, "grad_norm": 0.08921148627996445, "learning_rate": 1.1071047008547008e-05, "loss": 0.0145, "step": 105440 }, { "epoch": 0.7794713343780492, "grad_norm": 0.07413645088672638, "learning_rate": 1.1067337369420704e-05, "loss": 0.0187, "step": 105450 }, { "epoch": 0.779545252949351, "grad_norm": 0.06614266335964203, "learning_rate": 1.1063627730294398e-05, "loss": 0.0184, "step": 105460 }, { "epoch": 0.7796191715206529, "grad_norm": 0.08686352521181107, "learning_rate": 1.1059918091168092e-05, "loss": 0.0158, "step": 105470 }, { "epoch": 0.7796930900919548, "grad_norm": 0.10010645538568497, "learning_rate": 1.1056208452041787e-05, "loss": 0.0181, "step": 105480 }, { "epoch": 0.7797670086632565, "grad_norm": 0.09116895496845245, "learning_rate": 1.105249881291548e-05, "loss": 0.0166, "step": 105490 }, { "epoch": 0.7798409272345584, "grad_norm": 0.06380877643823624, "learning_rate": 1.1048789173789175e-05, "loss": 0.0168, "step": 105500 }, { "epoch": 0.7799148458058602, "grad_norm": 0.09583408385515213, "learning_rate": 1.1045079534662868e-05, "loss": 0.0185, "step": 105510 }, { "epoch": 0.7799887643771621, "grad_norm": 0.05529385805130005, "learning_rate": 1.1041369895536564e-05, "loss": 0.015, "step": 105520 }, { "epoch": 0.7800626829484639, "grad_norm": 0.06574016809463501, "learning_rate": 1.1037660256410256e-05, "loss": 0.017, "step": 105530 }, { "epoch": 0.7801366015197658, "grad_norm": 0.08715861290693283, "learning_rate": 1.1033950617283952e-05, "loss": 0.0175, "step": 105540 }, { "epoch": 0.7802105200910677, "grad_norm": 0.08664470165967941, "learning_rate": 1.1030240978157645e-05, "loss": 0.0163, "step": 105550 }, { "epoch": 0.7802844386623695, "grad_norm": 0.08922585099935532, "learning_rate": 1.102653133903134e-05, "loss": 0.0143, "step": 105560 }, { "epoch": 0.7803583572336714, "grad_norm": 0.07631973177194595, "learning_rate": 1.1022821699905033e-05, "loss": 0.0177, "step": 105570 }, { "epoch": 0.7804322758049732, "grad_norm": 0.08425720036029816, "learning_rate": 1.1019112060778728e-05, "loss": 0.0182, "step": 105580 }, { "epoch": 0.7805061943762751, "grad_norm": 0.06511543691158295, "learning_rate": 1.1015402421652422e-05, "loss": 0.0151, "step": 105590 }, { "epoch": 0.7805801129475769, "grad_norm": 0.0878303050994873, "learning_rate": 1.1011692782526116e-05, "loss": 0.0177, "step": 105600 }, { "epoch": 0.7806540315188788, "grad_norm": 0.09406014531850815, "learning_rate": 1.100798314339981e-05, "loss": 0.0181, "step": 105610 }, { "epoch": 0.7807279500901807, "grad_norm": 0.07079024612903595, "learning_rate": 1.1004273504273505e-05, "loss": 0.0137, "step": 105620 }, { "epoch": 0.7808018686614825, "grad_norm": 0.08367685228586197, "learning_rate": 1.1000563865147199e-05, "loss": 0.0167, "step": 105630 }, { "epoch": 0.7808757872327844, "grad_norm": 0.06964623928070068, "learning_rate": 1.0996854226020893e-05, "loss": 0.0154, "step": 105640 }, { "epoch": 0.7809497058040862, "grad_norm": 0.0795077458024025, "learning_rate": 1.0993144586894587e-05, "loss": 0.0165, "step": 105650 }, { "epoch": 0.7810236243753881, "grad_norm": 0.0842127576470375, "learning_rate": 1.0989434947768282e-05, "loss": 0.018, "step": 105660 }, { "epoch": 0.7810975429466899, "grad_norm": 0.0904654860496521, "learning_rate": 1.0985725308641976e-05, "loss": 0.0182, "step": 105670 }, { "epoch": 0.7811714615179918, "grad_norm": 0.06659997254610062, "learning_rate": 1.098201566951567e-05, "loss": 0.0179, "step": 105680 }, { "epoch": 0.7812453800892937, "grad_norm": 0.19042523205280304, "learning_rate": 1.0978306030389365e-05, "loss": 0.0189, "step": 105690 }, { "epoch": 0.7813192986605955, "grad_norm": 0.09224945306777954, "learning_rate": 1.0974596391263059e-05, "loss": 0.0168, "step": 105700 }, { "epoch": 0.7813932172318974, "grad_norm": 0.07589827477931976, "learning_rate": 1.0970886752136753e-05, "loss": 0.018, "step": 105710 }, { "epoch": 0.7814671358031992, "grad_norm": 0.08175301551818848, "learning_rate": 1.0967177113010447e-05, "loss": 0.0178, "step": 105720 }, { "epoch": 0.781541054374501, "grad_norm": 0.06693083047866821, "learning_rate": 1.0963467473884142e-05, "loss": 0.0153, "step": 105730 }, { "epoch": 0.781614972945803, "grad_norm": 0.07367260009050369, "learning_rate": 1.0959757834757834e-05, "loss": 0.0151, "step": 105740 }, { "epoch": 0.7816888915171047, "grad_norm": 0.08174421638250351, "learning_rate": 1.095604819563153e-05, "loss": 0.0183, "step": 105750 }, { "epoch": 0.7817628100884066, "grad_norm": 0.06560919433832169, "learning_rate": 1.0952338556505223e-05, "loss": 0.0165, "step": 105760 }, { "epoch": 0.7818367286597084, "grad_norm": 0.07756762206554413, "learning_rate": 1.0948628917378919e-05, "loss": 0.0195, "step": 105770 }, { "epoch": 0.7819106472310103, "grad_norm": 0.1017007902264595, "learning_rate": 1.0944919278252611e-05, "loss": 0.0179, "step": 105780 }, { "epoch": 0.7819845658023121, "grad_norm": 0.07805784791707993, "learning_rate": 1.0941209639126307e-05, "loss": 0.0167, "step": 105790 }, { "epoch": 0.782058484373614, "grad_norm": 0.08423495292663574, "learning_rate": 1.09375e-05, "loss": 0.0193, "step": 105800 }, { "epoch": 0.7821324029449159, "grad_norm": 0.05597223341464996, "learning_rate": 1.0933790360873694e-05, "loss": 0.0154, "step": 105810 }, { "epoch": 0.7822063215162177, "grad_norm": 0.06490608304738998, "learning_rate": 1.0930080721747388e-05, "loss": 0.0191, "step": 105820 }, { "epoch": 0.7822802400875196, "grad_norm": 0.0634688213467598, "learning_rate": 1.0926371082621083e-05, "loss": 0.0168, "step": 105830 }, { "epoch": 0.7823541586588214, "grad_norm": 0.07929351180791855, "learning_rate": 1.0922661443494777e-05, "loss": 0.0173, "step": 105840 }, { "epoch": 0.7824280772301233, "grad_norm": 0.04854271933436394, "learning_rate": 1.0918951804368471e-05, "loss": 0.0149, "step": 105850 }, { "epoch": 0.7825019958014251, "grad_norm": 0.07452704757452011, "learning_rate": 1.0915242165242166e-05, "loss": 0.0154, "step": 105860 }, { "epoch": 0.782575914372727, "grad_norm": 0.06969328224658966, "learning_rate": 1.091153252611586e-05, "loss": 0.017, "step": 105870 }, { "epoch": 0.7826498329440289, "grad_norm": 0.07368597388267517, "learning_rate": 1.0907822886989554e-05, "loss": 0.0154, "step": 105880 }, { "epoch": 0.7827237515153307, "grad_norm": 0.06966342031955719, "learning_rate": 1.0904113247863248e-05, "loss": 0.0188, "step": 105890 }, { "epoch": 0.7827976700866326, "grad_norm": 0.06735853850841522, "learning_rate": 1.0900403608736943e-05, "loss": 0.0172, "step": 105900 }, { "epoch": 0.7828715886579344, "grad_norm": 0.08098297566175461, "learning_rate": 1.0896693969610637e-05, "loss": 0.0145, "step": 105910 }, { "epoch": 0.7829455072292363, "grad_norm": 0.09270311146974564, "learning_rate": 1.0892984330484331e-05, "loss": 0.017, "step": 105920 }, { "epoch": 0.7830194258005382, "grad_norm": 0.09456925839185715, "learning_rate": 1.0889274691358025e-05, "loss": 0.0157, "step": 105930 }, { "epoch": 0.78309334437184, "grad_norm": 0.0878613069653511, "learning_rate": 1.088556505223172e-05, "loss": 0.0197, "step": 105940 }, { "epoch": 0.7831672629431419, "grad_norm": 0.06533868610858917, "learning_rate": 1.0881855413105414e-05, "loss": 0.0162, "step": 105950 }, { "epoch": 0.7832411815144437, "grad_norm": 0.11173345893621445, "learning_rate": 1.0878145773979108e-05, "loss": 0.021, "step": 105960 }, { "epoch": 0.7833151000857456, "grad_norm": 0.10665670782327652, "learning_rate": 1.08744361348528e-05, "loss": 0.0177, "step": 105970 }, { "epoch": 0.7833890186570474, "grad_norm": 0.09183719754219055, "learning_rate": 1.0870726495726497e-05, "loss": 0.0196, "step": 105980 }, { "epoch": 0.7834629372283493, "grad_norm": 0.0717807337641716, "learning_rate": 1.086701685660019e-05, "loss": 0.0184, "step": 105990 }, { "epoch": 0.7835368557996512, "grad_norm": 0.09864635765552521, "learning_rate": 1.0863307217473885e-05, "loss": 0.0184, "step": 106000 }, { "epoch": 0.783610774370953, "grad_norm": 0.08241227269172668, "learning_rate": 1.0859597578347578e-05, "loss": 0.0175, "step": 106010 }, { "epoch": 0.7836846929422548, "grad_norm": 0.10284945368766785, "learning_rate": 1.0855887939221274e-05, "loss": 0.0162, "step": 106020 }, { "epoch": 0.7837586115135566, "grad_norm": 0.07604247331619263, "learning_rate": 1.0852178300094966e-05, "loss": 0.0181, "step": 106030 }, { "epoch": 0.7838325300848585, "grad_norm": 0.06977668404579163, "learning_rate": 1.084846866096866e-05, "loss": 0.0169, "step": 106040 }, { "epoch": 0.7839064486561603, "grad_norm": 0.08929763734340668, "learning_rate": 1.0844759021842357e-05, "loss": 0.0212, "step": 106050 }, { "epoch": 0.7839803672274622, "grad_norm": 0.08664583414793015, "learning_rate": 1.084104938271605e-05, "loss": 0.0178, "step": 106060 }, { "epoch": 0.7840542857987641, "grad_norm": 0.086372509598732, "learning_rate": 1.0837339743589745e-05, "loss": 0.0169, "step": 106070 }, { "epoch": 0.7841282043700659, "grad_norm": 0.09958035498857498, "learning_rate": 1.0833630104463438e-05, "loss": 0.0172, "step": 106080 }, { "epoch": 0.7842021229413678, "grad_norm": 0.08798729628324509, "learning_rate": 1.0829920465337134e-05, "loss": 0.0161, "step": 106090 }, { "epoch": 0.7842760415126696, "grad_norm": 0.07195137441158295, "learning_rate": 1.0826210826210826e-05, "loss": 0.0154, "step": 106100 }, { "epoch": 0.7843499600839715, "grad_norm": 0.07254501432180405, "learning_rate": 1.082250118708452e-05, "loss": 0.0176, "step": 106110 }, { "epoch": 0.7844238786552733, "grad_norm": 0.10619547963142395, "learning_rate": 1.0818791547958215e-05, "loss": 0.0156, "step": 106120 }, { "epoch": 0.7844977972265752, "grad_norm": 0.0825144425034523, "learning_rate": 1.081508190883191e-05, "loss": 0.0175, "step": 106130 }, { "epoch": 0.7845717157978771, "grad_norm": 0.06593617796897888, "learning_rate": 1.0811372269705603e-05, "loss": 0.0173, "step": 106140 }, { "epoch": 0.7846456343691789, "grad_norm": 0.07409842312335968, "learning_rate": 1.0807662630579298e-05, "loss": 0.0187, "step": 106150 }, { "epoch": 0.7847195529404808, "grad_norm": 0.06352999806404114, "learning_rate": 1.0803952991452992e-05, "loss": 0.0169, "step": 106160 }, { "epoch": 0.7847934715117826, "grad_norm": 0.08738528192043304, "learning_rate": 1.0800243352326686e-05, "loss": 0.0171, "step": 106170 }, { "epoch": 0.7848673900830845, "grad_norm": 0.10091836750507355, "learning_rate": 1.079653371320038e-05, "loss": 0.0178, "step": 106180 }, { "epoch": 0.7849413086543864, "grad_norm": 0.07956136763095856, "learning_rate": 1.0792824074074075e-05, "loss": 0.0183, "step": 106190 }, { "epoch": 0.7850152272256882, "grad_norm": 0.07380006462335587, "learning_rate": 1.0789114434947767e-05, "loss": 0.0143, "step": 106200 }, { "epoch": 0.7850891457969901, "grad_norm": 0.07521267235279083, "learning_rate": 1.0785404795821463e-05, "loss": 0.0147, "step": 106210 }, { "epoch": 0.7851630643682919, "grad_norm": 0.07009510695934296, "learning_rate": 1.0781695156695158e-05, "loss": 0.0172, "step": 106220 }, { "epoch": 0.7852369829395938, "grad_norm": 0.06240467727184296, "learning_rate": 1.0777985517568852e-05, "loss": 0.0164, "step": 106230 }, { "epoch": 0.7853109015108956, "grad_norm": 0.07551458477973938, "learning_rate": 1.0774275878442546e-05, "loss": 0.0156, "step": 106240 }, { "epoch": 0.7853848200821975, "grad_norm": 0.06955337524414062, "learning_rate": 1.077056623931624e-05, "loss": 0.0179, "step": 106250 }, { "epoch": 0.7854587386534994, "grad_norm": 0.07557106763124466, "learning_rate": 1.0766856600189935e-05, "loss": 0.0153, "step": 106260 }, { "epoch": 0.7855326572248011, "grad_norm": 0.10826913267374039, "learning_rate": 1.0763146961063627e-05, "loss": 0.0179, "step": 106270 }, { "epoch": 0.785606575796103, "grad_norm": 0.09861350804567337, "learning_rate": 1.0759437321937323e-05, "loss": 0.0162, "step": 106280 }, { "epoch": 0.7856804943674048, "grad_norm": 0.08968888968229294, "learning_rate": 1.0755727682811016e-05, "loss": 0.0172, "step": 106290 }, { "epoch": 0.7857544129387067, "grad_norm": 0.07313573360443115, "learning_rate": 1.0752018043684712e-05, "loss": 0.0173, "step": 106300 }, { "epoch": 0.7858283315100085, "grad_norm": 0.06940294802188873, "learning_rate": 1.0748308404558404e-05, "loss": 0.0166, "step": 106310 }, { "epoch": 0.7859022500813104, "grad_norm": 0.10839875042438507, "learning_rate": 1.07445987654321e-05, "loss": 0.0177, "step": 106320 }, { "epoch": 0.7859761686526123, "grad_norm": 0.06451968848705292, "learning_rate": 1.0740889126305793e-05, "loss": 0.0162, "step": 106330 }, { "epoch": 0.7860500872239141, "grad_norm": 0.08700212091207504, "learning_rate": 1.0737179487179487e-05, "loss": 0.0173, "step": 106340 }, { "epoch": 0.786124005795216, "grad_norm": 0.06500065326690674, "learning_rate": 1.0733469848053182e-05, "loss": 0.0173, "step": 106350 }, { "epoch": 0.7861979243665178, "grad_norm": 0.07031098753213882, "learning_rate": 1.0729760208926876e-05, "loss": 0.0161, "step": 106360 }, { "epoch": 0.7862718429378197, "grad_norm": 0.05699130520224571, "learning_rate": 1.072605056980057e-05, "loss": 0.017, "step": 106370 }, { "epoch": 0.7863457615091215, "grad_norm": 0.05871216580271721, "learning_rate": 1.0722340930674264e-05, "loss": 0.016, "step": 106380 }, { "epoch": 0.7864196800804234, "grad_norm": 0.0568096786737442, "learning_rate": 1.0718631291547959e-05, "loss": 0.0147, "step": 106390 }, { "epoch": 0.7864935986517253, "grad_norm": 0.08190196007490158, "learning_rate": 1.0714921652421653e-05, "loss": 0.0177, "step": 106400 }, { "epoch": 0.7865675172230271, "grad_norm": 0.07658049464225769, "learning_rate": 1.0711212013295347e-05, "loss": 0.0161, "step": 106410 }, { "epoch": 0.786641435794329, "grad_norm": 0.0688110813498497, "learning_rate": 1.0707502374169041e-05, "loss": 0.0172, "step": 106420 }, { "epoch": 0.7867153543656308, "grad_norm": 0.07113152742385864, "learning_rate": 1.0703792735042736e-05, "loss": 0.018, "step": 106430 }, { "epoch": 0.7867892729369327, "grad_norm": 0.10106844455003738, "learning_rate": 1.070008309591643e-05, "loss": 0.017, "step": 106440 }, { "epoch": 0.7868631915082346, "grad_norm": 0.07266218960285187, "learning_rate": 1.0696373456790124e-05, "loss": 0.0178, "step": 106450 }, { "epoch": 0.7869371100795364, "grad_norm": 0.07605311274528503, "learning_rate": 1.0692663817663819e-05, "loss": 0.018, "step": 106460 }, { "epoch": 0.7870110286508383, "grad_norm": 0.08731962740421295, "learning_rate": 1.0688954178537513e-05, "loss": 0.0148, "step": 106470 }, { "epoch": 0.7870849472221401, "grad_norm": 0.08087794482707977, "learning_rate": 1.0685244539411207e-05, "loss": 0.0183, "step": 106480 }, { "epoch": 0.787158865793442, "grad_norm": 0.08370697498321533, "learning_rate": 1.0681534900284901e-05, "loss": 0.0197, "step": 106490 }, { "epoch": 0.7872327843647438, "grad_norm": 0.08696702122688293, "learning_rate": 1.0677825261158594e-05, "loss": 0.0147, "step": 106500 }, { "epoch": 0.7873067029360457, "grad_norm": 0.08174639195203781, "learning_rate": 1.067411562203229e-05, "loss": 0.0163, "step": 106510 }, { "epoch": 0.7873806215073476, "grad_norm": 0.06434164196252823, "learning_rate": 1.0670405982905982e-05, "loss": 0.0155, "step": 106520 }, { "epoch": 0.7874545400786493, "grad_norm": 0.07333972305059433, "learning_rate": 1.0666696343779678e-05, "loss": 0.0173, "step": 106530 }, { "epoch": 0.7875284586499512, "grad_norm": 0.08010855317115784, "learning_rate": 1.0662986704653371e-05, "loss": 0.0189, "step": 106540 }, { "epoch": 0.787602377221253, "grad_norm": 0.09208738803863525, "learning_rate": 1.0659277065527067e-05, "loss": 0.0172, "step": 106550 }, { "epoch": 0.7876762957925549, "grad_norm": 0.05726606398820877, "learning_rate": 1.065556742640076e-05, "loss": 0.017, "step": 106560 }, { "epoch": 0.7877502143638567, "grad_norm": 0.08467677235603333, "learning_rate": 1.0651857787274454e-05, "loss": 0.0171, "step": 106570 }, { "epoch": 0.7878241329351586, "grad_norm": 0.08000260591506958, "learning_rate": 1.0648148148148148e-05, "loss": 0.0179, "step": 106580 }, { "epoch": 0.7878980515064605, "grad_norm": 0.09210962057113647, "learning_rate": 1.0644438509021842e-05, "loss": 0.0194, "step": 106590 }, { "epoch": 0.7879719700777623, "grad_norm": 0.07953054457902908, "learning_rate": 1.0640728869895537e-05, "loss": 0.0167, "step": 106600 }, { "epoch": 0.7880458886490642, "grad_norm": 0.06847818195819855, "learning_rate": 1.0637019230769231e-05, "loss": 0.0178, "step": 106610 }, { "epoch": 0.788119807220366, "grad_norm": 0.07081107795238495, "learning_rate": 1.0633309591642925e-05, "loss": 0.0181, "step": 106620 }, { "epoch": 0.7881937257916679, "grad_norm": 0.08538413792848587, "learning_rate": 1.062959995251662e-05, "loss": 0.0161, "step": 106630 }, { "epoch": 0.7882676443629697, "grad_norm": 0.09170427918434143, "learning_rate": 1.0625890313390314e-05, "loss": 0.0197, "step": 106640 }, { "epoch": 0.7883415629342716, "grad_norm": 0.07497674226760864, "learning_rate": 1.0622180674264008e-05, "loss": 0.0164, "step": 106650 }, { "epoch": 0.7884154815055735, "grad_norm": 0.0653548613190651, "learning_rate": 1.0618471035137702e-05, "loss": 0.0155, "step": 106660 }, { "epoch": 0.7884894000768753, "grad_norm": 0.05626005306839943, "learning_rate": 1.0614761396011397e-05, "loss": 0.0156, "step": 106670 }, { "epoch": 0.7885633186481772, "grad_norm": 0.06508783251047134, "learning_rate": 1.061105175688509e-05, "loss": 0.0149, "step": 106680 }, { "epoch": 0.788637237219479, "grad_norm": 0.07062501460313797, "learning_rate": 1.0607342117758785e-05, "loss": 0.0192, "step": 106690 }, { "epoch": 0.7887111557907809, "grad_norm": 0.0726802796125412, "learning_rate": 1.060363247863248e-05, "loss": 0.0168, "step": 106700 }, { "epoch": 0.7887850743620828, "grad_norm": 0.08327441662549973, "learning_rate": 1.0599922839506174e-05, "loss": 0.0175, "step": 106710 }, { "epoch": 0.7888589929333846, "grad_norm": 0.062011830508708954, "learning_rate": 1.0596213200379868e-05, "loss": 0.0185, "step": 106720 }, { "epoch": 0.7889329115046865, "grad_norm": 0.0819663405418396, "learning_rate": 1.059250356125356e-05, "loss": 0.0181, "step": 106730 }, { "epoch": 0.7890068300759883, "grad_norm": 0.08031091094017029, "learning_rate": 1.0588793922127256e-05, "loss": 0.017, "step": 106740 }, { "epoch": 0.7890807486472902, "grad_norm": 0.061876364052295685, "learning_rate": 1.0585084283000949e-05, "loss": 0.0153, "step": 106750 }, { "epoch": 0.789154667218592, "grad_norm": 0.08079424500465393, "learning_rate": 1.0581374643874645e-05, "loss": 0.0162, "step": 106760 }, { "epoch": 0.7892285857898939, "grad_norm": 0.05742736905813217, "learning_rate": 1.0577665004748338e-05, "loss": 0.015, "step": 106770 }, { "epoch": 0.7893025043611958, "grad_norm": 0.06490683555603027, "learning_rate": 1.0573955365622034e-05, "loss": 0.0178, "step": 106780 }, { "epoch": 0.7893764229324975, "grad_norm": 0.10366056859493256, "learning_rate": 1.0570245726495726e-05, "loss": 0.0172, "step": 106790 }, { "epoch": 0.7894503415037994, "grad_norm": 0.06522786617279053, "learning_rate": 1.056653608736942e-05, "loss": 0.0167, "step": 106800 }, { "epoch": 0.7895242600751012, "grad_norm": 0.06334855407476425, "learning_rate": 1.0562826448243115e-05, "loss": 0.019, "step": 106810 }, { "epoch": 0.7895981786464031, "grad_norm": 0.10202807933092117, "learning_rate": 1.0559116809116809e-05, "loss": 0.0185, "step": 106820 }, { "epoch": 0.7896720972177049, "grad_norm": 0.06709656119346619, "learning_rate": 1.0555407169990505e-05, "loss": 0.0175, "step": 106830 }, { "epoch": 0.7897460157890068, "grad_norm": 0.07228951901197433, "learning_rate": 1.0551697530864197e-05, "loss": 0.0155, "step": 106840 }, { "epoch": 0.7898199343603087, "grad_norm": 0.05604925751686096, "learning_rate": 1.0547987891737893e-05, "loss": 0.0167, "step": 106850 }, { "epoch": 0.7898938529316105, "grad_norm": 0.07428887486457825, "learning_rate": 1.0544278252611586e-05, "loss": 0.0177, "step": 106860 }, { "epoch": 0.7899677715029124, "grad_norm": 0.07909681648015976, "learning_rate": 1.0540568613485282e-05, "loss": 0.0161, "step": 106870 }, { "epoch": 0.7900416900742142, "grad_norm": 0.08342929184436798, "learning_rate": 1.0536858974358975e-05, "loss": 0.0191, "step": 106880 }, { "epoch": 0.7901156086455161, "grad_norm": 0.0777612179517746, "learning_rate": 1.0533149335232669e-05, "loss": 0.0173, "step": 106890 }, { "epoch": 0.7901895272168179, "grad_norm": 0.053565897047519684, "learning_rate": 1.0529439696106363e-05, "loss": 0.0161, "step": 106900 }, { "epoch": 0.7902634457881198, "grad_norm": 0.06764553487300873, "learning_rate": 1.0525730056980057e-05, "loss": 0.017, "step": 106910 }, { "epoch": 0.7903373643594217, "grad_norm": 0.1511474996805191, "learning_rate": 1.0522020417853752e-05, "loss": 0.0182, "step": 106920 }, { "epoch": 0.7904112829307235, "grad_norm": 0.05807117000222206, "learning_rate": 1.0518310778727446e-05, "loss": 0.0184, "step": 106930 }, { "epoch": 0.7904852015020254, "grad_norm": 0.07401245087385178, "learning_rate": 1.051460113960114e-05, "loss": 0.0148, "step": 106940 }, { "epoch": 0.7905591200733272, "grad_norm": 0.0792866125702858, "learning_rate": 1.0510891500474834e-05, "loss": 0.014, "step": 106950 }, { "epoch": 0.7906330386446291, "grad_norm": 0.0900711938738823, "learning_rate": 1.0507181861348527e-05, "loss": 0.0202, "step": 106960 }, { "epoch": 0.790706957215931, "grad_norm": 0.06799784302711487, "learning_rate": 1.0503472222222223e-05, "loss": 0.0179, "step": 106970 }, { "epoch": 0.7907808757872328, "grad_norm": 0.06278761476278305, "learning_rate": 1.0499762583095917e-05, "loss": 0.0189, "step": 106980 }, { "epoch": 0.7908547943585347, "grad_norm": 0.08859692513942719, "learning_rate": 1.0496052943969612e-05, "loss": 0.0147, "step": 106990 }, { "epoch": 0.7909287129298365, "grad_norm": 0.05939367040991783, "learning_rate": 1.0492343304843306e-05, "loss": 0.0163, "step": 107000 }, { "epoch": 0.7910026315011384, "grad_norm": 0.11033795028924942, "learning_rate": 1.0488633665717e-05, "loss": 0.0208, "step": 107010 }, { "epoch": 0.7910765500724402, "grad_norm": 0.08448134362697601, "learning_rate": 1.0484924026590694e-05, "loss": 0.0193, "step": 107020 }, { "epoch": 0.791150468643742, "grad_norm": 0.09254121780395508, "learning_rate": 1.0481214387464387e-05, "loss": 0.0177, "step": 107030 }, { "epoch": 0.791224387215044, "grad_norm": 0.11402133107185364, "learning_rate": 1.0477504748338083e-05, "loss": 0.0174, "step": 107040 }, { "epoch": 0.7912983057863457, "grad_norm": 0.07104026526212692, "learning_rate": 1.0473795109211776e-05, "loss": 0.0167, "step": 107050 }, { "epoch": 0.7913722243576476, "grad_norm": 0.09040091931819916, "learning_rate": 1.0470085470085471e-05, "loss": 0.0171, "step": 107060 }, { "epoch": 0.7914461429289494, "grad_norm": 0.0920732393860817, "learning_rate": 1.0466375830959164e-05, "loss": 0.0163, "step": 107070 }, { "epoch": 0.7915200615002513, "grad_norm": 0.06260927021503448, "learning_rate": 1.046266619183286e-05, "loss": 0.0158, "step": 107080 }, { "epoch": 0.7915939800715531, "grad_norm": 0.08942589163780212, "learning_rate": 1.0458956552706553e-05, "loss": 0.0191, "step": 107090 }, { "epoch": 0.791667898642855, "grad_norm": 0.08758687973022461, "learning_rate": 1.0455246913580249e-05, "loss": 0.0175, "step": 107100 }, { "epoch": 0.7917418172141569, "grad_norm": 0.08504850417375565, "learning_rate": 1.0451537274453941e-05, "loss": 0.0167, "step": 107110 }, { "epoch": 0.7918157357854587, "grad_norm": 0.08797286450862885, "learning_rate": 1.0447827635327635e-05, "loss": 0.0173, "step": 107120 }, { "epoch": 0.7918896543567606, "grad_norm": 0.07588206231594086, "learning_rate": 1.044411799620133e-05, "loss": 0.0149, "step": 107130 }, { "epoch": 0.7919635729280624, "grad_norm": 0.08773746341466904, "learning_rate": 1.0440408357075024e-05, "loss": 0.0161, "step": 107140 }, { "epoch": 0.7920374914993643, "grad_norm": 0.10242946445941925, "learning_rate": 1.0436698717948718e-05, "loss": 0.0156, "step": 107150 }, { "epoch": 0.7921114100706661, "grad_norm": 0.06126590818166733, "learning_rate": 1.0432989078822413e-05, "loss": 0.0175, "step": 107160 }, { "epoch": 0.792185328641968, "grad_norm": 0.08768381923437119, "learning_rate": 1.0429279439696107e-05, "loss": 0.017, "step": 107170 }, { "epoch": 0.7922592472132699, "grad_norm": 0.09814704209566116, "learning_rate": 1.0425569800569801e-05, "loss": 0.0176, "step": 107180 }, { "epoch": 0.7923331657845717, "grad_norm": 0.08230622857809067, "learning_rate": 1.0421860161443495e-05, "loss": 0.0192, "step": 107190 }, { "epoch": 0.7924070843558736, "grad_norm": 0.08793134987354279, "learning_rate": 1.041815052231719e-05, "loss": 0.017, "step": 107200 }, { "epoch": 0.7924810029271754, "grad_norm": 0.0937204509973526, "learning_rate": 1.0414440883190884e-05, "loss": 0.0168, "step": 107210 }, { "epoch": 0.7925549214984773, "grad_norm": 0.06012919172644615, "learning_rate": 1.0410731244064578e-05, "loss": 0.0172, "step": 107220 }, { "epoch": 0.7926288400697792, "grad_norm": 0.0912671908736229, "learning_rate": 1.0407021604938272e-05, "loss": 0.0194, "step": 107230 }, { "epoch": 0.792702758641081, "grad_norm": 0.05338483676314354, "learning_rate": 1.0403311965811967e-05, "loss": 0.0171, "step": 107240 }, { "epoch": 0.7927766772123829, "grad_norm": 0.0971188098192215, "learning_rate": 1.0399602326685661e-05, "loss": 0.0169, "step": 107250 }, { "epoch": 0.7928505957836847, "grad_norm": 0.07669887691736221, "learning_rate": 1.0395892687559355e-05, "loss": 0.0175, "step": 107260 }, { "epoch": 0.7929245143549866, "grad_norm": 0.07737741619348526, "learning_rate": 1.039218304843305e-05, "loss": 0.0167, "step": 107270 }, { "epoch": 0.7929984329262884, "grad_norm": 0.09764139354228973, "learning_rate": 1.0388473409306742e-05, "loss": 0.017, "step": 107280 }, { "epoch": 0.7930723514975903, "grad_norm": 0.09034200757741928, "learning_rate": 1.0384763770180438e-05, "loss": 0.0186, "step": 107290 }, { "epoch": 0.7931462700688922, "grad_norm": 0.0868106558918953, "learning_rate": 1.038105413105413e-05, "loss": 0.0168, "step": 107300 }, { "epoch": 0.793220188640194, "grad_norm": 0.08318565785884857, "learning_rate": 1.0377344491927827e-05, "loss": 0.017, "step": 107310 }, { "epoch": 0.7932941072114958, "grad_norm": 0.09246040880680084, "learning_rate": 1.037363485280152e-05, "loss": 0.0181, "step": 107320 }, { "epoch": 0.7933680257827976, "grad_norm": 0.08603844046592712, "learning_rate": 1.0369925213675215e-05, "loss": 0.0191, "step": 107330 }, { "epoch": 0.7934419443540995, "grad_norm": 0.08298540115356445, "learning_rate": 1.0366215574548908e-05, "loss": 0.0189, "step": 107340 }, { "epoch": 0.7935158629254013, "grad_norm": 0.08032820373773575, "learning_rate": 1.0362505935422602e-05, "loss": 0.0165, "step": 107350 }, { "epoch": 0.7935897814967032, "grad_norm": 0.07046546041965485, "learning_rate": 1.0358796296296296e-05, "loss": 0.0169, "step": 107360 }, { "epoch": 0.7936637000680051, "grad_norm": 0.10293319076299667, "learning_rate": 1.035508665716999e-05, "loss": 0.0164, "step": 107370 }, { "epoch": 0.7937376186393069, "grad_norm": 0.08239208161830902, "learning_rate": 1.0351377018043685e-05, "loss": 0.0175, "step": 107380 }, { "epoch": 0.7938115372106088, "grad_norm": 0.06712669134140015, "learning_rate": 1.0347667378917379e-05, "loss": 0.0163, "step": 107390 }, { "epoch": 0.7938854557819106, "grad_norm": 0.09521905332803726, "learning_rate": 1.0343957739791073e-05, "loss": 0.0161, "step": 107400 }, { "epoch": 0.7939593743532125, "grad_norm": 0.08448793739080429, "learning_rate": 1.0340248100664768e-05, "loss": 0.015, "step": 107410 }, { "epoch": 0.7940332929245143, "grad_norm": 0.09332919865846634, "learning_rate": 1.0336538461538462e-05, "loss": 0.0191, "step": 107420 }, { "epoch": 0.7941072114958162, "grad_norm": 0.0625322014093399, "learning_rate": 1.0332828822412156e-05, "loss": 0.0155, "step": 107430 }, { "epoch": 0.7941811300671181, "grad_norm": 0.06839042901992798, "learning_rate": 1.032911918328585e-05, "loss": 0.0169, "step": 107440 }, { "epoch": 0.7942550486384199, "grad_norm": 0.09588529914617538, "learning_rate": 1.0325409544159545e-05, "loss": 0.0171, "step": 107450 }, { "epoch": 0.7943289672097218, "grad_norm": 0.09027482569217682, "learning_rate": 1.0321699905033239e-05, "loss": 0.0165, "step": 107460 }, { "epoch": 0.7944028857810236, "grad_norm": 0.07311699539422989, "learning_rate": 1.0317990265906933e-05, "loss": 0.0162, "step": 107470 }, { "epoch": 0.7944768043523255, "grad_norm": 0.07894931733608246, "learning_rate": 1.0314280626780628e-05, "loss": 0.0177, "step": 107480 }, { "epoch": 0.7945507229236274, "grad_norm": 0.07096471637487411, "learning_rate": 1.0310570987654322e-05, "loss": 0.0174, "step": 107490 }, { "epoch": 0.7946246414949292, "grad_norm": 0.07074417918920517, "learning_rate": 1.0306861348528016e-05, "loss": 0.0155, "step": 107500 }, { "epoch": 0.7946985600662311, "grad_norm": 0.087070032954216, "learning_rate": 1.0303151709401709e-05, "loss": 0.0194, "step": 107510 }, { "epoch": 0.7947724786375329, "grad_norm": 0.0836467370390892, "learning_rate": 1.0299442070275405e-05, "loss": 0.0163, "step": 107520 }, { "epoch": 0.7948463972088348, "grad_norm": 0.058517493307590485, "learning_rate": 1.0295732431149097e-05, "loss": 0.0152, "step": 107530 }, { "epoch": 0.7949203157801366, "grad_norm": 0.09755102545022964, "learning_rate": 1.0292022792022793e-05, "loss": 0.0181, "step": 107540 }, { "epoch": 0.7949942343514385, "grad_norm": 0.0626494511961937, "learning_rate": 1.0288313152896486e-05, "loss": 0.0166, "step": 107550 }, { "epoch": 0.7950681529227404, "grad_norm": 0.06871600449085236, "learning_rate": 1.0284603513770182e-05, "loss": 0.0162, "step": 107560 }, { "epoch": 0.7951420714940421, "grad_norm": 0.0932798832654953, "learning_rate": 1.0280893874643874e-05, "loss": 0.0178, "step": 107570 }, { "epoch": 0.795215990065344, "grad_norm": 0.0795370489358902, "learning_rate": 1.0277184235517569e-05, "loss": 0.0184, "step": 107580 }, { "epoch": 0.7952899086366458, "grad_norm": 0.09961181879043579, "learning_rate": 1.0273474596391265e-05, "loss": 0.0177, "step": 107590 }, { "epoch": 0.7953638272079477, "grad_norm": 0.07366261631250381, "learning_rate": 1.0269764957264957e-05, "loss": 0.0159, "step": 107600 }, { "epoch": 0.7954377457792495, "grad_norm": 0.08025870472192764, "learning_rate": 1.0266055318138653e-05, "loss": 0.0162, "step": 107610 }, { "epoch": 0.7955116643505514, "grad_norm": 0.06702578067779541, "learning_rate": 1.0262345679012346e-05, "loss": 0.0155, "step": 107620 }, { "epoch": 0.7955855829218533, "grad_norm": 0.0553353913128376, "learning_rate": 1.0258636039886042e-05, "loss": 0.0171, "step": 107630 }, { "epoch": 0.7956595014931551, "grad_norm": 0.088180311024189, "learning_rate": 1.0254926400759734e-05, "loss": 0.0179, "step": 107640 }, { "epoch": 0.795733420064457, "grad_norm": 0.05583290383219719, "learning_rate": 1.0251216761633429e-05, "loss": 0.0172, "step": 107650 }, { "epoch": 0.7958073386357588, "grad_norm": 0.09140726178884506, "learning_rate": 1.0247507122507123e-05, "loss": 0.015, "step": 107660 }, { "epoch": 0.7958812572070607, "grad_norm": 0.0885956883430481, "learning_rate": 1.0243797483380817e-05, "loss": 0.0161, "step": 107670 }, { "epoch": 0.7959551757783625, "grad_norm": 0.06041378155350685, "learning_rate": 1.0240087844254511e-05, "loss": 0.0156, "step": 107680 }, { "epoch": 0.7960290943496644, "grad_norm": 0.052953656762838364, "learning_rate": 1.0236378205128206e-05, "loss": 0.0145, "step": 107690 }, { "epoch": 0.7961030129209663, "grad_norm": 0.05720691755414009, "learning_rate": 1.02326685660019e-05, "loss": 0.0176, "step": 107700 }, { "epoch": 0.7961769314922681, "grad_norm": 0.07044905424118042, "learning_rate": 1.0228958926875594e-05, "loss": 0.0166, "step": 107710 }, { "epoch": 0.79625085006357, "grad_norm": 0.07559455186128616, "learning_rate": 1.0225249287749288e-05, "loss": 0.0173, "step": 107720 }, { "epoch": 0.7963247686348718, "grad_norm": 0.11719612777233124, "learning_rate": 1.0221539648622983e-05, "loss": 0.019, "step": 107730 }, { "epoch": 0.7963986872061737, "grad_norm": 0.07006300985813141, "learning_rate": 1.0217830009496675e-05, "loss": 0.0191, "step": 107740 }, { "epoch": 0.7964726057774756, "grad_norm": 0.09146808832883835, "learning_rate": 1.0214120370370371e-05, "loss": 0.0179, "step": 107750 }, { "epoch": 0.7965465243487774, "grad_norm": 0.05993087589740753, "learning_rate": 1.0210410731244066e-05, "loss": 0.0163, "step": 107760 }, { "epoch": 0.7966204429200793, "grad_norm": 0.09870940446853638, "learning_rate": 1.020670109211776e-05, "loss": 0.0148, "step": 107770 }, { "epoch": 0.7966943614913811, "grad_norm": 0.07571858912706375, "learning_rate": 1.0202991452991454e-05, "loss": 0.0173, "step": 107780 }, { "epoch": 0.796768280062683, "grad_norm": 0.0883622094988823, "learning_rate": 1.0199281813865148e-05, "loss": 0.0155, "step": 107790 }, { "epoch": 0.7968421986339848, "grad_norm": 0.0919145941734314, "learning_rate": 1.0195572174738843e-05, "loss": 0.0178, "step": 107800 }, { "epoch": 0.7969161172052867, "grad_norm": 0.0809524804353714, "learning_rate": 1.0191862535612535e-05, "loss": 0.0157, "step": 107810 }, { "epoch": 0.7969900357765886, "grad_norm": 0.07559646666049957, "learning_rate": 1.0188152896486231e-05, "loss": 0.0161, "step": 107820 }, { "epoch": 0.7970639543478903, "grad_norm": 0.09900683909654617, "learning_rate": 1.0184443257359924e-05, "loss": 0.0187, "step": 107830 }, { "epoch": 0.7971378729191922, "grad_norm": 0.09665405005216599, "learning_rate": 1.018073361823362e-05, "loss": 0.02, "step": 107840 }, { "epoch": 0.797211791490494, "grad_norm": 0.08485321700572968, "learning_rate": 1.0177023979107312e-05, "loss": 0.0168, "step": 107850 }, { "epoch": 0.7972857100617959, "grad_norm": 0.09139339625835419, "learning_rate": 1.0173314339981008e-05, "loss": 0.0168, "step": 107860 }, { "epoch": 0.7973596286330977, "grad_norm": 0.05562750995159149, "learning_rate": 1.01696047008547e-05, "loss": 0.0159, "step": 107870 }, { "epoch": 0.7974335472043996, "grad_norm": 0.0958489254117012, "learning_rate": 1.0165895061728395e-05, "loss": 0.0154, "step": 107880 }, { "epoch": 0.7975074657757015, "grad_norm": 0.10791540890932083, "learning_rate": 1.016218542260209e-05, "loss": 0.0159, "step": 107890 }, { "epoch": 0.7975813843470033, "grad_norm": 0.07161233574151993, "learning_rate": 1.0158475783475784e-05, "loss": 0.0157, "step": 107900 }, { "epoch": 0.7976553029183052, "grad_norm": 0.08552657812833786, "learning_rate": 1.0154766144349478e-05, "loss": 0.0169, "step": 107910 }, { "epoch": 0.797729221489607, "grad_norm": 0.06651432812213898, "learning_rate": 1.0151056505223172e-05, "loss": 0.0178, "step": 107920 }, { "epoch": 0.7978031400609089, "grad_norm": 0.0792415663599968, "learning_rate": 1.0147346866096866e-05, "loss": 0.0177, "step": 107930 }, { "epoch": 0.7978770586322108, "grad_norm": 0.09755530953407288, "learning_rate": 1.014363722697056e-05, "loss": 0.0166, "step": 107940 }, { "epoch": 0.7979509772035126, "grad_norm": 0.0528554804623127, "learning_rate": 1.0139927587844255e-05, "loss": 0.0157, "step": 107950 }, { "epoch": 0.7980248957748145, "grad_norm": 0.0647616758942604, "learning_rate": 1.013621794871795e-05, "loss": 0.0142, "step": 107960 }, { "epoch": 0.7980988143461163, "grad_norm": 0.0763440877199173, "learning_rate": 1.0132508309591644e-05, "loss": 0.0162, "step": 107970 }, { "epoch": 0.7981727329174182, "grad_norm": 0.0879247710108757, "learning_rate": 1.0128798670465338e-05, "loss": 0.0177, "step": 107980 }, { "epoch": 0.79824665148872, "grad_norm": 0.05888223648071289, "learning_rate": 1.0125089031339032e-05, "loss": 0.0138, "step": 107990 }, { "epoch": 0.7983205700600219, "grad_norm": 0.09061430394649506, "learning_rate": 1.0121379392212726e-05, "loss": 0.0173, "step": 108000 }, { "epoch": 0.7983944886313238, "grad_norm": 0.07279080152511597, "learning_rate": 1.011766975308642e-05, "loss": 0.0169, "step": 108010 }, { "epoch": 0.7984684072026256, "grad_norm": 0.08346417546272278, "learning_rate": 1.0113960113960115e-05, "loss": 0.0195, "step": 108020 }, { "epoch": 0.7985423257739275, "grad_norm": 0.0859362855553627, "learning_rate": 1.011025047483381e-05, "loss": 0.0203, "step": 108030 }, { "epoch": 0.7986162443452293, "grad_norm": 0.059124864637851715, "learning_rate": 1.0106540835707502e-05, "loss": 0.017, "step": 108040 }, { "epoch": 0.7986901629165312, "grad_norm": 0.09157084673643112, "learning_rate": 1.0102831196581198e-05, "loss": 0.0182, "step": 108050 }, { "epoch": 0.798764081487833, "grad_norm": 0.08674320578575134, "learning_rate": 1.009912155745489e-05, "loss": 0.0204, "step": 108060 }, { "epoch": 0.7988380000591349, "grad_norm": 0.08345095813274384, "learning_rate": 1.0095411918328586e-05, "loss": 0.0163, "step": 108070 }, { "epoch": 0.7989119186304368, "grad_norm": 0.08811159431934357, "learning_rate": 1.0091702279202279e-05, "loss": 0.0191, "step": 108080 }, { "epoch": 0.7989858372017385, "grad_norm": 0.06645365804433823, "learning_rate": 1.0087992640075975e-05, "loss": 0.0157, "step": 108090 }, { "epoch": 0.7990597557730404, "grad_norm": 0.1114099770784378, "learning_rate": 1.0084283000949667e-05, "loss": 0.0194, "step": 108100 }, { "epoch": 0.7991336743443422, "grad_norm": 0.048499658703804016, "learning_rate": 1.0080573361823362e-05, "loss": 0.0156, "step": 108110 }, { "epoch": 0.7992075929156441, "grad_norm": 0.08671610802412033, "learning_rate": 1.0076863722697056e-05, "loss": 0.0171, "step": 108120 }, { "epoch": 0.7992815114869459, "grad_norm": 0.081625796854496, "learning_rate": 1.007315408357075e-05, "loss": 0.0163, "step": 108130 }, { "epoch": 0.7993554300582478, "grad_norm": 0.09771347790956497, "learning_rate": 1.0069444444444445e-05, "loss": 0.0168, "step": 108140 }, { "epoch": 0.7994293486295497, "grad_norm": 0.07416078448295593, "learning_rate": 1.0065734805318139e-05, "loss": 0.0164, "step": 108150 }, { "epoch": 0.7995032672008515, "grad_norm": 0.08635777980089188, "learning_rate": 1.0062025166191833e-05, "loss": 0.0183, "step": 108160 }, { "epoch": 0.7995771857721534, "grad_norm": 0.08301952481269836, "learning_rate": 1.0058315527065527e-05, "loss": 0.0197, "step": 108170 }, { "epoch": 0.7996511043434552, "grad_norm": 0.09106559306383133, "learning_rate": 1.0054605887939222e-05, "loss": 0.0167, "step": 108180 }, { "epoch": 0.7997250229147571, "grad_norm": 0.099912628531456, "learning_rate": 1.0050896248812916e-05, "loss": 0.0186, "step": 108190 }, { "epoch": 0.799798941486059, "grad_norm": 0.06465443223714828, "learning_rate": 1.004718660968661e-05, "loss": 0.0184, "step": 108200 }, { "epoch": 0.7998728600573608, "grad_norm": 0.05838299170136452, "learning_rate": 1.0043476970560304e-05, "loss": 0.0188, "step": 108210 }, { "epoch": 0.7999467786286627, "grad_norm": 0.11985216289758682, "learning_rate": 1.0039767331433999e-05, "loss": 0.0171, "step": 108220 }, { "epoch": 0.8000206971999645, "grad_norm": 0.0747854933142662, "learning_rate": 1.0036057692307693e-05, "loss": 0.0184, "step": 108230 }, { "epoch": 0.8000946157712664, "grad_norm": 0.09232344478368759, "learning_rate": 1.0032348053181387e-05, "loss": 0.0185, "step": 108240 }, { "epoch": 0.8001685343425682, "grad_norm": 0.07320012152194977, "learning_rate": 1.0028638414055082e-05, "loss": 0.0145, "step": 108250 }, { "epoch": 0.8002424529138701, "grad_norm": 0.04968751594424248, "learning_rate": 1.0024928774928776e-05, "loss": 0.0165, "step": 108260 }, { "epoch": 0.800316371485172, "grad_norm": 0.06803929060697556, "learning_rate": 1.0021219135802468e-05, "loss": 0.0195, "step": 108270 }, { "epoch": 0.8003902900564738, "grad_norm": 0.09313097596168518, "learning_rate": 1.0017509496676164e-05, "loss": 0.0186, "step": 108280 }, { "epoch": 0.8004642086277757, "grad_norm": 0.08711668848991394, "learning_rate": 1.0013799857549857e-05, "loss": 0.0185, "step": 108290 }, { "epoch": 0.8005381271990775, "grad_norm": 0.06377244740724564, "learning_rate": 1.0010090218423553e-05, "loss": 0.0162, "step": 108300 }, { "epoch": 0.8006120457703794, "grad_norm": 0.07629870623350143, "learning_rate": 1.0006380579297245e-05, "loss": 0.017, "step": 108310 }, { "epoch": 0.8006859643416812, "grad_norm": 0.10412617027759552, "learning_rate": 1.0002670940170941e-05, "loss": 0.0151, "step": 108320 }, { "epoch": 0.8007598829129831, "grad_norm": 0.07674720883369446, "learning_rate": 9.998961301044634e-06, "loss": 0.0176, "step": 108330 }, { "epoch": 0.800833801484285, "grad_norm": 0.07292506843805313, "learning_rate": 9.995251661918328e-06, "loss": 0.0151, "step": 108340 }, { "epoch": 0.8009077200555867, "grad_norm": 0.07875438779592514, "learning_rate": 9.991542022792024e-06, "loss": 0.0178, "step": 108350 }, { "epoch": 0.8009816386268886, "grad_norm": 0.07804681360721588, "learning_rate": 9.987832383665717e-06, "loss": 0.0194, "step": 108360 }, { "epoch": 0.8010555571981904, "grad_norm": 0.07799090445041656, "learning_rate": 9.984122744539413e-06, "loss": 0.0159, "step": 108370 }, { "epoch": 0.8011294757694923, "grad_norm": 0.061510853469371796, "learning_rate": 9.980413105413105e-06, "loss": 0.0161, "step": 108380 }, { "epoch": 0.8012033943407941, "grad_norm": 0.10582411289215088, "learning_rate": 9.976703466286801e-06, "loss": 0.0192, "step": 108390 }, { "epoch": 0.801277312912096, "grad_norm": 0.06720519065856934, "learning_rate": 9.972993827160494e-06, "loss": 0.0149, "step": 108400 }, { "epoch": 0.8013512314833979, "grad_norm": 0.08093603700399399, "learning_rate": 9.96928418803419e-06, "loss": 0.0161, "step": 108410 }, { "epoch": 0.8014251500546997, "grad_norm": 0.08576540648937225, "learning_rate": 9.965574548907882e-06, "loss": 0.0157, "step": 108420 }, { "epoch": 0.8014990686260016, "grad_norm": 0.06301461905241013, "learning_rate": 9.961864909781577e-06, "loss": 0.0144, "step": 108430 }, { "epoch": 0.8015729871973034, "grad_norm": 0.07179810851812363, "learning_rate": 9.958155270655271e-06, "loss": 0.0151, "step": 108440 }, { "epoch": 0.8016469057686053, "grad_norm": 0.07506982237100601, "learning_rate": 9.954445631528965e-06, "loss": 0.0192, "step": 108450 }, { "epoch": 0.8017208243399072, "grad_norm": 0.10046809166669846, "learning_rate": 9.95073599240266e-06, "loss": 0.0179, "step": 108460 }, { "epoch": 0.801794742911209, "grad_norm": 0.06781169772148132, "learning_rate": 9.947026353276354e-06, "loss": 0.0186, "step": 108470 }, { "epoch": 0.8018686614825109, "grad_norm": 0.06781306862831116, "learning_rate": 9.943316714150048e-06, "loss": 0.0163, "step": 108480 }, { "epoch": 0.8019425800538127, "grad_norm": 0.05869549140334129, "learning_rate": 9.939607075023742e-06, "loss": 0.0175, "step": 108490 }, { "epoch": 0.8020164986251146, "grad_norm": 0.06874193996191025, "learning_rate": 9.935897435897435e-06, "loss": 0.0176, "step": 108500 }, { "epoch": 0.8020904171964164, "grad_norm": 0.0705341100692749, "learning_rate": 9.932187796771131e-06, "loss": 0.015, "step": 108510 }, { "epoch": 0.8021643357677183, "grad_norm": 0.07271615415811539, "learning_rate": 9.928478157644825e-06, "loss": 0.0166, "step": 108520 }, { "epoch": 0.8022382543390202, "grad_norm": 0.09714877605438232, "learning_rate": 9.92476851851852e-06, "loss": 0.0171, "step": 108530 }, { "epoch": 0.802312172910322, "grad_norm": 0.08740437030792236, "learning_rate": 9.921058879392214e-06, "loss": 0.0156, "step": 108540 }, { "epoch": 0.8023860914816239, "grad_norm": 0.07252812385559082, "learning_rate": 9.917349240265908e-06, "loss": 0.0176, "step": 108550 }, { "epoch": 0.8024600100529257, "grad_norm": 0.05812780186533928, "learning_rate": 9.913639601139602e-06, "loss": 0.0194, "step": 108560 }, { "epoch": 0.8025339286242276, "grad_norm": 0.08060342073440552, "learning_rate": 9.909929962013295e-06, "loss": 0.0175, "step": 108570 }, { "epoch": 0.8026078471955294, "grad_norm": 0.09011170268058777, "learning_rate": 9.90622032288699e-06, "loss": 0.0176, "step": 108580 }, { "epoch": 0.8026817657668313, "grad_norm": 0.08751154690980911, "learning_rate": 9.902510683760683e-06, "loss": 0.0171, "step": 108590 }, { "epoch": 0.8027556843381332, "grad_norm": 0.08538112044334412, "learning_rate": 9.89880104463438e-06, "loss": 0.0148, "step": 108600 }, { "epoch": 0.802829602909435, "grad_norm": 0.07628414034843445, "learning_rate": 9.895091405508072e-06, "loss": 0.0181, "step": 108610 }, { "epoch": 0.8029035214807368, "grad_norm": 0.07035236805677414, "learning_rate": 9.891381766381768e-06, "loss": 0.0167, "step": 108620 }, { "epoch": 0.8029774400520386, "grad_norm": 0.10552142560482025, "learning_rate": 9.88767212725546e-06, "loss": 0.0189, "step": 108630 }, { "epoch": 0.8030513586233405, "grad_norm": 0.06383045017719269, "learning_rate": 9.883962488129156e-06, "loss": 0.016, "step": 108640 }, { "epoch": 0.8031252771946423, "grad_norm": 0.06668663769960403, "learning_rate": 9.880252849002849e-06, "loss": 0.015, "step": 108650 }, { "epoch": 0.8031991957659442, "grad_norm": 0.09065516293048859, "learning_rate": 9.876543209876543e-06, "loss": 0.018, "step": 108660 }, { "epoch": 0.8032731143372461, "grad_norm": 0.08935684710741043, "learning_rate": 9.872833570750238e-06, "loss": 0.0195, "step": 108670 }, { "epoch": 0.8033470329085479, "grad_norm": 0.09216105192899704, "learning_rate": 9.869123931623932e-06, "loss": 0.0184, "step": 108680 }, { "epoch": 0.8034209514798498, "grad_norm": 0.09943180531263351, "learning_rate": 9.865414292497626e-06, "loss": 0.0207, "step": 108690 }, { "epoch": 0.8034948700511516, "grad_norm": 0.08068699389696121, "learning_rate": 9.86170465337132e-06, "loss": 0.0188, "step": 108700 }, { "epoch": 0.8035687886224535, "grad_norm": 0.07922651618719101, "learning_rate": 9.857995014245015e-06, "loss": 0.0186, "step": 108710 }, { "epoch": 0.8036427071937554, "grad_norm": 0.12515637278556824, "learning_rate": 9.854285375118709e-06, "loss": 0.0176, "step": 108720 }, { "epoch": 0.8037166257650572, "grad_norm": 0.07942978292703629, "learning_rate": 9.850575735992403e-06, "loss": 0.0175, "step": 108730 }, { "epoch": 0.8037905443363591, "grad_norm": 0.09532030671834946, "learning_rate": 9.846866096866097e-06, "loss": 0.0192, "step": 108740 }, { "epoch": 0.8038644629076609, "grad_norm": 0.07980860769748688, "learning_rate": 9.843156457739792e-06, "loss": 0.0184, "step": 108750 }, { "epoch": 0.8039383814789628, "grad_norm": 0.0913219228386879, "learning_rate": 9.839446818613486e-06, "loss": 0.0164, "step": 108760 }, { "epoch": 0.8040123000502646, "grad_norm": 0.07639884203672409, "learning_rate": 9.83573717948718e-06, "loss": 0.0179, "step": 108770 }, { "epoch": 0.8040862186215665, "grad_norm": 0.06440051645040512, "learning_rate": 9.832027540360875e-06, "loss": 0.0161, "step": 108780 }, { "epoch": 0.8041601371928684, "grad_norm": 0.08487991243600845, "learning_rate": 9.828317901234569e-06, "loss": 0.0165, "step": 108790 }, { "epoch": 0.8042340557641702, "grad_norm": 0.06237079203128815, "learning_rate": 9.824608262108261e-06, "loss": 0.0184, "step": 108800 }, { "epoch": 0.8043079743354721, "grad_norm": 0.08246572315692902, "learning_rate": 9.820898622981957e-06, "loss": 0.0164, "step": 108810 }, { "epoch": 0.8043818929067739, "grad_norm": 0.07118767499923706, "learning_rate": 9.81718898385565e-06, "loss": 0.0173, "step": 108820 }, { "epoch": 0.8044558114780758, "grad_norm": 0.09393595904111862, "learning_rate": 9.813479344729346e-06, "loss": 0.0166, "step": 108830 }, { "epoch": 0.8045297300493776, "grad_norm": 0.098183773458004, "learning_rate": 9.809769705603039e-06, "loss": 0.0173, "step": 108840 }, { "epoch": 0.8046036486206795, "grad_norm": 0.0713919922709465, "learning_rate": 9.806060066476734e-06, "loss": 0.0172, "step": 108850 }, { "epoch": 0.8046775671919814, "grad_norm": 0.05390486493706703, "learning_rate": 9.802350427350427e-06, "loss": 0.0178, "step": 108860 }, { "epoch": 0.8047514857632831, "grad_norm": 0.09047302603721619, "learning_rate": 9.798640788224123e-06, "loss": 0.0163, "step": 108870 }, { "epoch": 0.804825404334585, "grad_norm": 0.08637501299381256, "learning_rate": 9.794931149097816e-06, "loss": 0.0174, "step": 108880 }, { "epoch": 0.8048993229058868, "grad_norm": 0.07068019360303879, "learning_rate": 9.79122150997151e-06, "loss": 0.0157, "step": 108890 }, { "epoch": 0.8049732414771887, "grad_norm": 0.06551461666822433, "learning_rate": 9.787511870845204e-06, "loss": 0.0178, "step": 108900 }, { "epoch": 0.8050471600484905, "grad_norm": 0.05838355794548988, "learning_rate": 9.783802231718898e-06, "loss": 0.0185, "step": 108910 }, { "epoch": 0.8051210786197924, "grad_norm": 0.10512775182723999, "learning_rate": 9.780092592592593e-06, "loss": 0.0195, "step": 108920 }, { "epoch": 0.8051949971910943, "grad_norm": 0.08655641227960587, "learning_rate": 9.776382953466287e-06, "loss": 0.016, "step": 108930 }, { "epoch": 0.8052689157623961, "grad_norm": 0.10874267667531967, "learning_rate": 9.772673314339981e-06, "loss": 0.0167, "step": 108940 }, { "epoch": 0.805342834333698, "grad_norm": 0.09654273092746735, "learning_rate": 9.768963675213676e-06, "loss": 0.0168, "step": 108950 }, { "epoch": 0.8054167529049998, "grad_norm": 0.08570707589387894, "learning_rate": 9.76525403608737e-06, "loss": 0.0176, "step": 108960 }, { "epoch": 0.8054906714763017, "grad_norm": 0.1013251543045044, "learning_rate": 9.761544396961064e-06, "loss": 0.0174, "step": 108970 }, { "epoch": 0.8055645900476036, "grad_norm": 0.07357745617628098, "learning_rate": 9.757834757834758e-06, "loss": 0.0169, "step": 108980 }, { "epoch": 0.8056385086189054, "grad_norm": 0.1125309094786644, "learning_rate": 9.754125118708453e-06, "loss": 0.0173, "step": 108990 }, { "epoch": 0.8057124271902073, "grad_norm": 0.10419237613677979, "learning_rate": 9.750415479582147e-06, "loss": 0.0156, "step": 109000 }, { "epoch": 0.8057863457615091, "grad_norm": 0.06879635900259018, "learning_rate": 9.746705840455841e-06, "loss": 0.0172, "step": 109010 }, { "epoch": 0.805860264332811, "grad_norm": 0.07967539876699448, "learning_rate": 9.742996201329535e-06, "loss": 0.0163, "step": 109020 }, { "epoch": 0.8059341829041128, "grad_norm": 0.06467405706644058, "learning_rate": 9.739286562203228e-06, "loss": 0.0171, "step": 109030 }, { "epoch": 0.8060081014754147, "grad_norm": 0.08744856715202332, "learning_rate": 9.735576923076924e-06, "loss": 0.0172, "step": 109040 }, { "epoch": 0.8060820200467166, "grad_norm": 0.06352761387825012, "learning_rate": 9.731867283950617e-06, "loss": 0.0176, "step": 109050 }, { "epoch": 0.8061559386180184, "grad_norm": 0.09876241534948349, "learning_rate": 9.728157644824313e-06, "loss": 0.0194, "step": 109060 }, { "epoch": 0.8062298571893203, "grad_norm": 0.09139581769704819, "learning_rate": 9.724448005698005e-06, "loss": 0.0181, "step": 109070 }, { "epoch": 0.8063037757606221, "grad_norm": 0.06005903705954552, "learning_rate": 9.720738366571701e-06, "loss": 0.0177, "step": 109080 }, { "epoch": 0.806377694331924, "grad_norm": 0.06951243430376053, "learning_rate": 9.717028727445394e-06, "loss": 0.0178, "step": 109090 }, { "epoch": 0.8064516129032258, "grad_norm": 0.05802584066987038, "learning_rate": 9.71331908831909e-06, "loss": 0.0137, "step": 109100 }, { "epoch": 0.8065255314745277, "grad_norm": 0.0676131471991539, "learning_rate": 9.709609449192782e-06, "loss": 0.0171, "step": 109110 }, { "epoch": 0.8065994500458296, "grad_norm": 0.06124149635434151, "learning_rate": 9.705899810066476e-06, "loss": 0.0178, "step": 109120 }, { "epoch": 0.8066733686171313, "grad_norm": 0.0544743649661541, "learning_rate": 9.702190170940172e-06, "loss": 0.0164, "step": 109130 }, { "epoch": 0.8067472871884332, "grad_norm": 0.0752519741654396, "learning_rate": 9.698480531813865e-06, "loss": 0.0178, "step": 109140 }, { "epoch": 0.806821205759735, "grad_norm": 0.07265692949295044, "learning_rate": 9.694770892687561e-06, "loss": 0.0173, "step": 109150 }, { "epoch": 0.8068951243310369, "grad_norm": 0.08553270995616913, "learning_rate": 9.691061253561254e-06, "loss": 0.0175, "step": 109160 }, { "epoch": 0.8069690429023387, "grad_norm": 0.06974239647388458, "learning_rate": 9.68735161443495e-06, "loss": 0.0181, "step": 109170 }, { "epoch": 0.8070429614736406, "grad_norm": 0.08455439656972885, "learning_rate": 9.683641975308642e-06, "loss": 0.0164, "step": 109180 }, { "epoch": 0.8071168800449425, "grad_norm": 0.08338673412799835, "learning_rate": 9.679932336182336e-06, "loss": 0.0169, "step": 109190 }, { "epoch": 0.8071907986162443, "grad_norm": 0.06884989887475967, "learning_rate": 9.67622269705603e-06, "loss": 0.0169, "step": 109200 }, { "epoch": 0.8072647171875462, "grad_norm": 0.08037258684635162, "learning_rate": 9.672513057929725e-06, "loss": 0.0162, "step": 109210 }, { "epoch": 0.807338635758848, "grad_norm": 0.07604662328958511, "learning_rate": 9.66880341880342e-06, "loss": 0.0189, "step": 109220 }, { "epoch": 0.8074125543301499, "grad_norm": 0.07522002607584, "learning_rate": 9.665093779677113e-06, "loss": 0.015, "step": 109230 }, { "epoch": 0.8074864729014518, "grad_norm": 0.06911370903253555, "learning_rate": 9.661384140550808e-06, "loss": 0.0169, "step": 109240 }, { "epoch": 0.8075603914727536, "grad_norm": 0.05914295092225075, "learning_rate": 9.657674501424502e-06, "loss": 0.0178, "step": 109250 }, { "epoch": 0.8076343100440555, "grad_norm": 0.06423388421535492, "learning_rate": 9.653964862298195e-06, "loss": 0.0172, "step": 109260 }, { "epoch": 0.8077082286153573, "grad_norm": 0.08206699043512344, "learning_rate": 9.65025522317189e-06, "loss": 0.0134, "step": 109270 }, { "epoch": 0.8077821471866592, "grad_norm": 0.0941147580742836, "learning_rate": 9.646545584045585e-06, "loss": 0.0153, "step": 109280 }, { "epoch": 0.807856065757961, "grad_norm": 0.07409139722585678, "learning_rate": 9.642835944919279e-06, "loss": 0.0178, "step": 109290 }, { "epoch": 0.8079299843292629, "grad_norm": 0.12559185922145844, "learning_rate": 9.639126305792973e-06, "loss": 0.0169, "step": 109300 }, { "epoch": 0.8080039029005648, "grad_norm": 0.06743061542510986, "learning_rate": 9.635416666666668e-06, "loss": 0.0182, "step": 109310 }, { "epoch": 0.8080778214718666, "grad_norm": 0.0892493724822998, "learning_rate": 9.631707027540362e-06, "loss": 0.0168, "step": 109320 }, { "epoch": 0.8081517400431685, "grad_norm": 0.06646522134542465, "learning_rate": 9.627997388414056e-06, "loss": 0.0159, "step": 109330 }, { "epoch": 0.8082256586144703, "grad_norm": 0.0646686926484108, "learning_rate": 9.62428774928775e-06, "loss": 0.0148, "step": 109340 }, { "epoch": 0.8082995771857722, "grad_norm": 0.06950532644987106, "learning_rate": 9.620578110161443e-06, "loss": 0.0179, "step": 109350 }, { "epoch": 0.808373495757074, "grad_norm": 0.08026842027902603, "learning_rate": 9.616868471035139e-06, "loss": 0.0153, "step": 109360 }, { "epoch": 0.8084474143283759, "grad_norm": 0.0855989009141922, "learning_rate": 9.613158831908832e-06, "loss": 0.0197, "step": 109370 }, { "epoch": 0.8085213328996778, "grad_norm": 0.07575526088476181, "learning_rate": 9.609449192782528e-06, "loss": 0.0148, "step": 109380 }, { "epoch": 0.8085952514709795, "grad_norm": 0.06633900105953217, "learning_rate": 9.60573955365622e-06, "loss": 0.0167, "step": 109390 }, { "epoch": 0.8086691700422814, "grad_norm": 0.09347169101238251, "learning_rate": 9.602029914529916e-06, "loss": 0.0152, "step": 109400 }, { "epoch": 0.8087430886135832, "grad_norm": 0.08023204654455185, "learning_rate": 9.598320275403609e-06, "loss": 0.0168, "step": 109410 }, { "epoch": 0.8088170071848851, "grad_norm": 0.11619100719690323, "learning_rate": 9.594610636277303e-06, "loss": 0.018, "step": 109420 }, { "epoch": 0.8088909257561869, "grad_norm": 0.06467882543802261, "learning_rate": 9.590900997150997e-06, "loss": 0.0178, "step": 109430 }, { "epoch": 0.8089648443274888, "grad_norm": 0.0748329907655716, "learning_rate": 9.587191358024692e-06, "loss": 0.0187, "step": 109440 }, { "epoch": 0.8090387628987907, "grad_norm": 0.08651736378669739, "learning_rate": 9.583481718898386e-06, "loss": 0.0166, "step": 109450 }, { "epoch": 0.8091126814700925, "grad_norm": 0.06881321966648102, "learning_rate": 9.57977207977208e-06, "loss": 0.018, "step": 109460 }, { "epoch": 0.8091866000413944, "grad_norm": 0.09105443954467773, "learning_rate": 9.576062440645774e-06, "loss": 0.0193, "step": 109470 }, { "epoch": 0.8092605186126962, "grad_norm": 0.0879446268081665, "learning_rate": 9.572352801519469e-06, "loss": 0.0159, "step": 109480 }, { "epoch": 0.8093344371839981, "grad_norm": 0.0992131382226944, "learning_rate": 9.568643162393163e-06, "loss": 0.0176, "step": 109490 }, { "epoch": 0.8094083557553, "grad_norm": 0.0872669368982315, "learning_rate": 9.564933523266857e-06, "loss": 0.0148, "step": 109500 }, { "epoch": 0.8094822743266018, "grad_norm": 0.08526704460382462, "learning_rate": 9.561223884140551e-06, "loss": 0.0177, "step": 109510 }, { "epoch": 0.8095561928979037, "grad_norm": 0.10563500970602036, "learning_rate": 9.557514245014246e-06, "loss": 0.0162, "step": 109520 }, { "epoch": 0.8096301114692055, "grad_norm": 0.09399376064538956, "learning_rate": 9.55380460588794e-06, "loss": 0.0179, "step": 109530 }, { "epoch": 0.8097040300405074, "grad_norm": 0.07743006944656372, "learning_rate": 9.550094966761634e-06, "loss": 0.0166, "step": 109540 }, { "epoch": 0.8097779486118092, "grad_norm": 0.08263615518808365, "learning_rate": 9.546385327635329e-06, "loss": 0.0171, "step": 109550 }, { "epoch": 0.8098518671831111, "grad_norm": 0.10280684381723404, "learning_rate": 9.542675688509023e-06, "loss": 0.017, "step": 109560 }, { "epoch": 0.809925785754413, "grad_norm": 0.07834198325872421, "learning_rate": 9.538966049382717e-06, "loss": 0.0165, "step": 109570 }, { "epoch": 0.8099997043257148, "grad_norm": 0.08863791823387146, "learning_rate": 9.53525641025641e-06, "loss": 0.0159, "step": 109580 }, { "epoch": 0.8100736228970167, "grad_norm": 0.07828589528799057, "learning_rate": 9.531546771130106e-06, "loss": 0.0191, "step": 109590 }, { "epoch": 0.8101475414683185, "grad_norm": 0.07181723415851593, "learning_rate": 9.527837132003798e-06, "loss": 0.0187, "step": 109600 }, { "epoch": 0.8102214600396204, "grad_norm": 0.06643036007881165, "learning_rate": 9.524127492877494e-06, "loss": 0.016, "step": 109610 }, { "epoch": 0.8102953786109222, "grad_norm": 0.04769090190529823, "learning_rate": 9.520417853751187e-06, "loss": 0.0198, "step": 109620 }, { "epoch": 0.8103692971822241, "grad_norm": 0.0632459968328476, "learning_rate": 9.516708214624883e-06, "loss": 0.017, "step": 109630 }, { "epoch": 0.810443215753526, "grad_norm": 0.05598078668117523, "learning_rate": 9.512998575498575e-06, "loss": 0.0194, "step": 109640 }, { "epoch": 0.8105171343248277, "grad_norm": 0.08820049464702606, "learning_rate": 9.50928893637227e-06, "loss": 0.0171, "step": 109650 }, { "epoch": 0.8105910528961296, "grad_norm": 0.07200802862644196, "learning_rate": 9.505579297245964e-06, "loss": 0.0174, "step": 109660 }, { "epoch": 0.8106649714674314, "grad_norm": 0.06750143319368362, "learning_rate": 9.501869658119658e-06, "loss": 0.0177, "step": 109670 }, { "epoch": 0.8107388900387333, "grad_norm": 0.061461541801691055, "learning_rate": 9.498160018993352e-06, "loss": 0.0176, "step": 109680 }, { "epoch": 0.8108128086100352, "grad_norm": 0.056660935282707214, "learning_rate": 9.494450379867047e-06, "loss": 0.0177, "step": 109690 }, { "epoch": 0.810886727181337, "grad_norm": 0.06250043213367462, "learning_rate": 9.490740740740741e-06, "loss": 0.0176, "step": 109700 }, { "epoch": 0.8109606457526389, "grad_norm": 0.06084416061639786, "learning_rate": 9.487031101614435e-06, "loss": 0.017, "step": 109710 }, { "epoch": 0.8110345643239407, "grad_norm": 0.08368322998285294, "learning_rate": 9.48332146248813e-06, "loss": 0.0174, "step": 109720 }, { "epoch": 0.8111084828952426, "grad_norm": 0.06921650469303131, "learning_rate": 9.479611823361824e-06, "loss": 0.0158, "step": 109730 }, { "epoch": 0.8111824014665444, "grad_norm": 0.08464595675468445, "learning_rate": 9.475902184235518e-06, "loss": 0.0184, "step": 109740 }, { "epoch": 0.8112563200378463, "grad_norm": 0.09766136109828949, "learning_rate": 9.472192545109212e-06, "loss": 0.0157, "step": 109750 }, { "epoch": 0.8113302386091482, "grad_norm": 0.10288077592849731, "learning_rate": 9.468482905982907e-06, "loss": 0.0169, "step": 109760 }, { "epoch": 0.81140415718045, "grad_norm": 0.07717662304639816, "learning_rate": 9.4647732668566e-06, "loss": 0.0152, "step": 109770 }, { "epoch": 0.8114780757517519, "grad_norm": 0.07156575471162796, "learning_rate": 9.461063627730295e-06, "loss": 0.0162, "step": 109780 }, { "epoch": 0.8115519943230537, "grad_norm": 0.08129802346229553, "learning_rate": 9.45735398860399e-06, "loss": 0.0185, "step": 109790 }, { "epoch": 0.8116259128943556, "grad_norm": 0.08766285330057144, "learning_rate": 9.453644349477684e-06, "loss": 0.0172, "step": 109800 }, { "epoch": 0.8116998314656574, "grad_norm": 0.07962168753147125, "learning_rate": 9.449934710351376e-06, "loss": 0.0189, "step": 109810 }, { "epoch": 0.8117737500369593, "grad_norm": 0.04780622199177742, "learning_rate": 9.446225071225072e-06, "loss": 0.0173, "step": 109820 }, { "epoch": 0.8118476686082612, "grad_norm": 0.05502776801586151, "learning_rate": 9.442515432098765e-06, "loss": 0.0139, "step": 109830 }, { "epoch": 0.811921587179563, "grad_norm": 0.07169415056705475, "learning_rate": 9.43880579297246e-06, "loss": 0.0189, "step": 109840 }, { "epoch": 0.8119955057508649, "grad_norm": 0.08810874819755554, "learning_rate": 9.435096153846153e-06, "loss": 0.0191, "step": 109850 }, { "epoch": 0.8120694243221667, "grad_norm": 0.07159397751092911, "learning_rate": 9.43138651471985e-06, "loss": 0.0187, "step": 109860 }, { "epoch": 0.8121433428934686, "grad_norm": 0.11870425194501877, "learning_rate": 9.427676875593542e-06, "loss": 0.0191, "step": 109870 }, { "epoch": 0.8122172614647704, "grad_norm": 0.0735914409160614, "learning_rate": 9.423967236467236e-06, "loss": 0.0161, "step": 109880 }, { "epoch": 0.8122911800360723, "grad_norm": 0.06938380748033524, "learning_rate": 9.420257597340932e-06, "loss": 0.0173, "step": 109890 }, { "epoch": 0.8123650986073742, "grad_norm": 0.0808085948228836, "learning_rate": 9.416547958214625e-06, "loss": 0.0173, "step": 109900 }, { "epoch": 0.812439017178676, "grad_norm": 0.0733335018157959, "learning_rate": 9.41283831908832e-06, "loss": 0.0181, "step": 109910 }, { "epoch": 0.8125129357499778, "grad_norm": 0.09480886906385422, "learning_rate": 9.409128679962013e-06, "loss": 0.0188, "step": 109920 }, { "epoch": 0.8125868543212796, "grad_norm": 0.05744516849517822, "learning_rate": 9.40541904083571e-06, "loss": 0.0171, "step": 109930 }, { "epoch": 0.8126607728925815, "grad_norm": 0.08588145673274994, "learning_rate": 9.401709401709402e-06, "loss": 0.0162, "step": 109940 }, { "epoch": 0.8127346914638834, "grad_norm": 0.08707545697689056, "learning_rate": 9.397999762583098e-06, "loss": 0.0172, "step": 109950 }, { "epoch": 0.8128086100351852, "grad_norm": 0.06720160692930222, "learning_rate": 9.39429012345679e-06, "loss": 0.018, "step": 109960 }, { "epoch": 0.8128825286064871, "grad_norm": 0.0780353993177414, "learning_rate": 9.390580484330485e-06, "loss": 0.0176, "step": 109970 }, { "epoch": 0.8129564471777889, "grad_norm": 0.062965989112854, "learning_rate": 9.386870845204179e-06, "loss": 0.0172, "step": 109980 }, { "epoch": 0.8130303657490908, "grad_norm": 0.10180020332336426, "learning_rate": 9.383161206077873e-06, "loss": 0.0166, "step": 109990 }, { "epoch": 0.8131042843203926, "grad_norm": 0.089419424533844, "learning_rate": 9.379451566951567e-06, "loss": 0.02, "step": 110000 }, { "epoch": 0.8131042843203926, "eval_f1": 0.6308062269462349, "eval_loss": 0.016855139285326004, "eval_precision": 0.5022165389113012, "eval_recall": 0.8479083322773926, "eval_runtime": 2929.1275, "eval_samples_per_second": 184.742, "eval_steps_per_second": 2.887, "step": 110000 }, { "epoch": 0.8131782028916945, "grad_norm": 0.09093558043241501, "learning_rate": 9.375741927825262e-06, "loss": 0.0173, "step": 110010 }, { "epoch": 0.8132521214629964, "grad_norm": 0.09518486261367798, "learning_rate": 9.372032288698956e-06, "loss": 0.0192, "step": 110020 }, { "epoch": 0.8133260400342982, "grad_norm": 0.07745856046676636, "learning_rate": 9.36832264957265e-06, "loss": 0.0156, "step": 110030 }, { "epoch": 0.8133999586056001, "grad_norm": 0.07020919770002365, "learning_rate": 9.364613010446343e-06, "loss": 0.0175, "step": 110040 }, { "epoch": 0.8134738771769019, "grad_norm": 0.06830445677042007, "learning_rate": 9.360903371320039e-06, "loss": 0.0163, "step": 110050 }, { "epoch": 0.8135477957482038, "grad_norm": 0.09864474833011627, "learning_rate": 9.357193732193733e-06, "loss": 0.0172, "step": 110060 }, { "epoch": 0.8136217143195056, "grad_norm": 0.08051852881908417, "learning_rate": 9.353484093067427e-06, "loss": 0.018, "step": 110070 }, { "epoch": 0.8136956328908075, "grad_norm": 0.06373199075460434, "learning_rate": 9.349774453941122e-06, "loss": 0.0182, "step": 110080 }, { "epoch": 0.8137695514621094, "grad_norm": 0.07208918035030365, "learning_rate": 9.346064814814816e-06, "loss": 0.0176, "step": 110090 }, { "epoch": 0.8138434700334112, "grad_norm": 0.06866229325532913, "learning_rate": 9.34235517568851e-06, "loss": 0.0152, "step": 110100 }, { "epoch": 0.8139173886047131, "grad_norm": 0.09499530494213104, "learning_rate": 9.338645536562203e-06, "loss": 0.0186, "step": 110110 }, { "epoch": 0.8139913071760149, "grad_norm": 0.08893073350191116, "learning_rate": 9.334935897435899e-06, "loss": 0.0206, "step": 110120 }, { "epoch": 0.8140652257473168, "grad_norm": 0.06883365660905838, "learning_rate": 9.331226258309591e-06, "loss": 0.0137, "step": 110130 }, { "epoch": 0.8141391443186186, "grad_norm": 0.08844020217657089, "learning_rate": 9.327516619183287e-06, "loss": 0.0179, "step": 110140 }, { "epoch": 0.8142130628899205, "grad_norm": 0.10410746932029724, "learning_rate": 9.32380698005698e-06, "loss": 0.019, "step": 110150 }, { "epoch": 0.8142869814612224, "grad_norm": 0.07305875420570374, "learning_rate": 9.320097340930676e-06, "loss": 0.0154, "step": 110160 }, { "epoch": 0.8143609000325241, "grad_norm": 0.09578864276409149, "learning_rate": 9.316387701804368e-06, "loss": 0.0176, "step": 110170 }, { "epoch": 0.814434818603826, "grad_norm": 0.10740100592374802, "learning_rate": 9.312678062678064e-06, "loss": 0.02, "step": 110180 }, { "epoch": 0.8145087371751278, "grad_norm": 0.1025090143084526, "learning_rate": 9.308968423551757e-06, "loss": 0.0191, "step": 110190 }, { "epoch": 0.8145826557464297, "grad_norm": 0.07519301027059555, "learning_rate": 9.305258784425451e-06, "loss": 0.0189, "step": 110200 }, { "epoch": 0.8146565743177316, "grad_norm": 0.08736584335565567, "learning_rate": 9.301549145299145e-06, "loss": 0.0174, "step": 110210 }, { "epoch": 0.8147304928890334, "grad_norm": 0.08328347653150558, "learning_rate": 9.29783950617284e-06, "loss": 0.0165, "step": 110220 }, { "epoch": 0.8148044114603353, "grad_norm": 0.0822676569223404, "learning_rate": 9.294129867046534e-06, "loss": 0.0175, "step": 110230 }, { "epoch": 0.8148783300316371, "grad_norm": 0.08251149207353592, "learning_rate": 9.290420227920228e-06, "loss": 0.0178, "step": 110240 }, { "epoch": 0.814952248602939, "grad_norm": 0.0925745889544487, "learning_rate": 9.286710588793923e-06, "loss": 0.0158, "step": 110250 }, { "epoch": 0.8150261671742408, "grad_norm": 0.09380180388689041, "learning_rate": 9.283000949667617e-06, "loss": 0.0161, "step": 110260 }, { "epoch": 0.8151000857455427, "grad_norm": 0.05887442082166672, "learning_rate": 9.279291310541311e-06, "loss": 0.0153, "step": 110270 }, { "epoch": 0.8151740043168446, "grad_norm": 0.0812305212020874, "learning_rate": 9.275581671415005e-06, "loss": 0.018, "step": 110280 }, { "epoch": 0.8152479228881464, "grad_norm": 0.06080586463212967, "learning_rate": 9.2718720322887e-06, "loss": 0.018, "step": 110290 }, { "epoch": 0.8153218414594483, "grad_norm": 0.0710943415760994, "learning_rate": 9.268162393162394e-06, "loss": 0.0171, "step": 110300 }, { "epoch": 0.8153957600307501, "grad_norm": 0.06379541009664536, "learning_rate": 9.264452754036088e-06, "loss": 0.0157, "step": 110310 }, { "epoch": 0.815469678602052, "grad_norm": 0.11508116126060486, "learning_rate": 9.260743114909782e-06, "loss": 0.0167, "step": 110320 }, { "epoch": 0.8155435971733538, "grad_norm": 0.09485437721014023, "learning_rate": 9.257033475783477e-06, "loss": 0.0165, "step": 110330 }, { "epoch": 0.8156175157446557, "grad_norm": 0.062455859035253525, "learning_rate": 9.25332383665717e-06, "loss": 0.0167, "step": 110340 }, { "epoch": 0.8156914343159576, "grad_norm": 0.0841631069779396, "learning_rate": 9.249614197530865e-06, "loss": 0.0152, "step": 110350 }, { "epoch": 0.8157653528872594, "grad_norm": 0.07254151254892349, "learning_rate": 9.245904558404558e-06, "loss": 0.0187, "step": 110360 }, { "epoch": 0.8158392714585613, "grad_norm": 0.07702041417360306, "learning_rate": 9.242194919278254e-06, "loss": 0.0184, "step": 110370 }, { "epoch": 0.8159131900298631, "grad_norm": 0.06007539853453636, "learning_rate": 9.238485280151946e-06, "loss": 0.0167, "step": 110380 }, { "epoch": 0.815987108601165, "grad_norm": 0.07187853008508682, "learning_rate": 9.234775641025642e-06, "loss": 0.019, "step": 110390 }, { "epoch": 0.8160610271724668, "grad_norm": 0.07426824420690536, "learning_rate": 9.231066001899335e-06, "loss": 0.0169, "step": 110400 }, { "epoch": 0.8161349457437687, "grad_norm": 0.081462062895298, "learning_rate": 9.227356362773031e-06, "loss": 0.0179, "step": 110410 }, { "epoch": 0.8162088643150706, "grad_norm": 0.08284270763397217, "learning_rate": 9.223646723646723e-06, "loss": 0.0152, "step": 110420 }, { "epoch": 0.8162827828863723, "grad_norm": 0.09823929518461227, "learning_rate": 9.219937084520418e-06, "loss": 0.0178, "step": 110430 }, { "epoch": 0.8163567014576742, "grad_norm": 0.09018296003341675, "learning_rate": 9.216227445394112e-06, "loss": 0.0168, "step": 110440 }, { "epoch": 0.816430620028976, "grad_norm": 0.07779600471258163, "learning_rate": 9.212517806267806e-06, "loss": 0.0179, "step": 110450 }, { "epoch": 0.8165045386002779, "grad_norm": 0.09467915445566177, "learning_rate": 9.2088081671415e-06, "loss": 0.0189, "step": 110460 }, { "epoch": 0.8165784571715798, "grad_norm": 0.06862441450357437, "learning_rate": 9.205098528015195e-06, "loss": 0.0171, "step": 110470 }, { "epoch": 0.8166523757428816, "grad_norm": 0.0598343126475811, "learning_rate": 9.201388888888889e-06, "loss": 0.0168, "step": 110480 }, { "epoch": 0.8167262943141835, "grad_norm": 0.06925109028816223, "learning_rate": 9.197679249762583e-06, "loss": 0.0162, "step": 110490 }, { "epoch": 0.8168002128854853, "grad_norm": 0.10528044402599335, "learning_rate": 9.193969610636278e-06, "loss": 0.0185, "step": 110500 }, { "epoch": 0.8168741314567872, "grad_norm": 0.09137672930955887, "learning_rate": 9.190259971509972e-06, "loss": 0.019, "step": 110510 }, { "epoch": 0.816948050028089, "grad_norm": 0.07503055036067963, "learning_rate": 9.186550332383666e-06, "loss": 0.0162, "step": 110520 }, { "epoch": 0.8170219685993909, "grad_norm": 0.08323784172534943, "learning_rate": 9.18284069325736e-06, "loss": 0.0194, "step": 110530 }, { "epoch": 0.8170958871706928, "grad_norm": 0.07209733873605728, "learning_rate": 9.179131054131055e-06, "loss": 0.0181, "step": 110540 }, { "epoch": 0.8171698057419946, "grad_norm": 0.07893183827400208, "learning_rate": 9.175421415004749e-06, "loss": 0.0166, "step": 110550 }, { "epoch": 0.8172437243132965, "grad_norm": 0.05670410022139549, "learning_rate": 9.171711775878443e-06, "loss": 0.0162, "step": 110560 }, { "epoch": 0.8173176428845983, "grad_norm": 0.09070245921611786, "learning_rate": 9.168002136752136e-06, "loss": 0.0173, "step": 110570 }, { "epoch": 0.8173915614559002, "grad_norm": 0.10759375989437103, "learning_rate": 9.164292497625832e-06, "loss": 0.019, "step": 110580 }, { "epoch": 0.817465480027202, "grad_norm": 0.06987611204385757, "learning_rate": 9.160582858499524e-06, "loss": 0.0165, "step": 110590 }, { "epoch": 0.8175393985985039, "grad_norm": 0.06290466338396072, "learning_rate": 9.15687321937322e-06, "loss": 0.0174, "step": 110600 }, { "epoch": 0.8176133171698058, "grad_norm": 0.05969390273094177, "learning_rate": 9.153163580246913e-06, "loss": 0.0181, "step": 110610 }, { "epoch": 0.8176872357411076, "grad_norm": 0.06636475771665573, "learning_rate": 9.149453941120609e-06, "loss": 0.0191, "step": 110620 }, { "epoch": 0.8177611543124095, "grad_norm": 0.0508534200489521, "learning_rate": 9.145744301994302e-06, "loss": 0.0185, "step": 110630 }, { "epoch": 0.8178350728837113, "grad_norm": 0.08707664906978607, "learning_rate": 9.142034662867997e-06, "loss": 0.0174, "step": 110640 }, { "epoch": 0.8179089914550132, "grad_norm": 0.0794239416718483, "learning_rate": 9.138325023741692e-06, "loss": 0.0181, "step": 110650 }, { "epoch": 0.817982910026315, "grad_norm": 0.07165955752134323, "learning_rate": 9.134615384615384e-06, "loss": 0.0196, "step": 110660 }, { "epoch": 0.8180568285976169, "grad_norm": 0.07645541429519653, "learning_rate": 9.13090574548908e-06, "loss": 0.0176, "step": 110670 }, { "epoch": 0.8181307471689188, "grad_norm": 0.08750036358833313, "learning_rate": 9.127196106362773e-06, "loss": 0.0161, "step": 110680 }, { "epoch": 0.8182046657402205, "grad_norm": 0.10095136612653732, "learning_rate": 9.123486467236469e-06, "loss": 0.0189, "step": 110690 }, { "epoch": 0.8182785843115225, "grad_norm": 0.07623224705457687, "learning_rate": 9.119776828110161e-06, "loss": 0.0151, "step": 110700 }, { "epoch": 0.8183525028828242, "grad_norm": 0.09414133429527283, "learning_rate": 9.116067188983857e-06, "loss": 0.0176, "step": 110710 }, { "epoch": 0.8184264214541261, "grad_norm": 0.062059711664915085, "learning_rate": 9.11235754985755e-06, "loss": 0.0158, "step": 110720 }, { "epoch": 0.818500340025428, "grad_norm": 0.11147674918174744, "learning_rate": 9.108647910731244e-06, "loss": 0.0185, "step": 110730 }, { "epoch": 0.8185742585967298, "grad_norm": 0.0802476778626442, "learning_rate": 9.104938271604939e-06, "loss": 0.0165, "step": 110740 }, { "epoch": 0.8186481771680317, "grad_norm": 0.056705329567193985, "learning_rate": 9.101228632478633e-06, "loss": 0.0166, "step": 110750 }, { "epoch": 0.8187220957393335, "grad_norm": 0.10482749342918396, "learning_rate": 9.097518993352327e-06, "loss": 0.0169, "step": 110760 }, { "epoch": 0.8187960143106354, "grad_norm": 0.07661043107509613, "learning_rate": 9.093809354226021e-06, "loss": 0.0171, "step": 110770 }, { "epoch": 0.8188699328819372, "grad_norm": 0.04403127729892731, "learning_rate": 9.090099715099716e-06, "loss": 0.0158, "step": 110780 }, { "epoch": 0.8189438514532391, "grad_norm": 0.10330238193273544, "learning_rate": 9.08639007597341e-06, "loss": 0.0179, "step": 110790 }, { "epoch": 0.819017770024541, "grad_norm": 0.07194700092077255, "learning_rate": 9.082680436847102e-06, "loss": 0.0166, "step": 110800 }, { "epoch": 0.8190916885958428, "grad_norm": 0.07015722990036011, "learning_rate": 9.078970797720798e-06, "loss": 0.0165, "step": 110810 }, { "epoch": 0.8191656071671447, "grad_norm": 0.09940315037965775, "learning_rate": 9.075261158594493e-06, "loss": 0.0186, "step": 110820 }, { "epoch": 0.8192395257384465, "grad_norm": 0.08396787196397781, "learning_rate": 9.071551519468187e-06, "loss": 0.0155, "step": 110830 }, { "epoch": 0.8193134443097484, "grad_norm": 0.09233757108449936, "learning_rate": 9.067841880341881e-06, "loss": 0.0168, "step": 110840 }, { "epoch": 0.8193873628810502, "grad_norm": 0.0720495656132698, "learning_rate": 9.064132241215576e-06, "loss": 0.0164, "step": 110850 }, { "epoch": 0.8194612814523521, "grad_norm": 0.08925806730985641, "learning_rate": 9.06042260208927e-06, "loss": 0.017, "step": 110860 }, { "epoch": 0.819535200023654, "grad_norm": 0.08104828745126724, "learning_rate": 9.056712962962964e-06, "loss": 0.0188, "step": 110870 }, { "epoch": 0.8196091185949558, "grad_norm": 0.07082411646842957, "learning_rate": 9.053003323836658e-06, "loss": 0.0197, "step": 110880 }, { "epoch": 0.8196830371662577, "grad_norm": 0.08721831440925598, "learning_rate": 9.049293684710351e-06, "loss": 0.0153, "step": 110890 }, { "epoch": 0.8197569557375595, "grad_norm": 0.11455877870321274, "learning_rate": 9.045584045584047e-06, "loss": 0.0176, "step": 110900 }, { "epoch": 0.8198308743088614, "grad_norm": 0.08649104833602905, "learning_rate": 9.04187440645774e-06, "loss": 0.0171, "step": 110910 }, { "epoch": 0.8199047928801632, "grad_norm": 0.08350533246994019, "learning_rate": 9.038164767331435e-06, "loss": 0.0167, "step": 110920 }, { "epoch": 0.8199787114514651, "grad_norm": 0.0666193887591362, "learning_rate": 9.034455128205128e-06, "loss": 0.0162, "step": 110930 }, { "epoch": 0.820052630022767, "grad_norm": 0.08098326623439789, "learning_rate": 9.030745489078824e-06, "loss": 0.0157, "step": 110940 }, { "epoch": 0.8201265485940687, "grad_norm": 0.06648987531661987, "learning_rate": 9.027035849952517e-06, "loss": 0.0163, "step": 110950 }, { "epoch": 0.8202004671653707, "grad_norm": 0.09723273664712906, "learning_rate": 9.02332621082621e-06, "loss": 0.017, "step": 110960 }, { "epoch": 0.8202743857366724, "grad_norm": 0.09789856523275375, "learning_rate": 9.019616571699905e-06, "loss": 0.0159, "step": 110970 }, { "epoch": 0.8203483043079743, "grad_norm": 0.07456418126821518, "learning_rate": 9.0159069325736e-06, "loss": 0.0147, "step": 110980 }, { "epoch": 0.8204222228792762, "grad_norm": 0.08231586217880249, "learning_rate": 9.012197293447294e-06, "loss": 0.0183, "step": 110990 }, { "epoch": 0.820496141450578, "grad_norm": 0.0645674616098404, "learning_rate": 9.008487654320988e-06, "loss": 0.0158, "step": 111000 }, { "epoch": 0.8205700600218799, "grad_norm": 0.07256720215082169, "learning_rate": 9.004778015194682e-06, "loss": 0.0199, "step": 111010 }, { "epoch": 0.8206439785931817, "grad_norm": 0.09442053735256195, "learning_rate": 9.001068376068376e-06, "loss": 0.0171, "step": 111020 }, { "epoch": 0.8207178971644836, "grad_norm": 0.08105292916297913, "learning_rate": 8.99735873694207e-06, "loss": 0.0162, "step": 111030 }, { "epoch": 0.8207918157357854, "grad_norm": 0.06316964328289032, "learning_rate": 8.993649097815765e-06, "loss": 0.015, "step": 111040 }, { "epoch": 0.8208657343070873, "grad_norm": 0.1590401977300644, "learning_rate": 8.98993945868946e-06, "loss": 0.016, "step": 111050 }, { "epoch": 0.8209396528783892, "grad_norm": 0.1914466768503189, "learning_rate": 8.986229819563154e-06, "loss": 0.0171, "step": 111060 }, { "epoch": 0.821013571449691, "grad_norm": 0.062346745282411575, "learning_rate": 8.982520180436848e-06, "loss": 0.0186, "step": 111070 }, { "epoch": 0.8210874900209929, "grad_norm": 0.06994392722845078, "learning_rate": 8.978810541310542e-06, "loss": 0.018, "step": 111080 }, { "epoch": 0.8211614085922947, "grad_norm": 0.05640175938606262, "learning_rate": 8.975100902184236e-06, "loss": 0.0151, "step": 111090 }, { "epoch": 0.8212353271635966, "grad_norm": 0.08227109163999557, "learning_rate": 8.97139126305793e-06, "loss": 0.018, "step": 111100 }, { "epoch": 0.8213092457348984, "grad_norm": 0.09539218246936798, "learning_rate": 8.967681623931625e-06, "loss": 0.0163, "step": 111110 }, { "epoch": 0.8213831643062003, "grad_norm": 0.07617813348770142, "learning_rate": 8.963971984805318e-06, "loss": 0.0189, "step": 111120 }, { "epoch": 0.8214570828775022, "grad_norm": 0.07727860659360886, "learning_rate": 8.960262345679013e-06, "loss": 0.0177, "step": 111130 }, { "epoch": 0.821531001448804, "grad_norm": 0.08091080188751221, "learning_rate": 8.956552706552706e-06, "loss": 0.0169, "step": 111140 }, { "epoch": 0.8216049200201059, "grad_norm": 0.07870712131261826, "learning_rate": 8.952843067426402e-06, "loss": 0.0177, "step": 111150 }, { "epoch": 0.8216788385914077, "grad_norm": 0.07519575208425522, "learning_rate": 8.949133428300095e-06, "loss": 0.0181, "step": 111160 }, { "epoch": 0.8217527571627096, "grad_norm": 0.09125541150569916, "learning_rate": 8.94542378917379e-06, "loss": 0.0165, "step": 111170 }, { "epoch": 0.8218266757340114, "grad_norm": 0.07214830815792084, "learning_rate": 8.941714150047483e-06, "loss": 0.0163, "step": 111180 }, { "epoch": 0.8219005943053133, "grad_norm": 0.07173985987901688, "learning_rate": 8.938004510921177e-06, "loss": 0.0184, "step": 111190 }, { "epoch": 0.8219745128766152, "grad_norm": 0.06702403724193573, "learning_rate": 8.934294871794872e-06, "loss": 0.0185, "step": 111200 }, { "epoch": 0.822048431447917, "grad_norm": 0.08879362046718597, "learning_rate": 8.930585232668566e-06, "loss": 0.0161, "step": 111210 }, { "epoch": 0.8221223500192189, "grad_norm": 0.10882284492254257, "learning_rate": 8.92687559354226e-06, "loss": 0.0192, "step": 111220 }, { "epoch": 0.8221962685905206, "grad_norm": 0.08666688948869705, "learning_rate": 8.923165954415955e-06, "loss": 0.0189, "step": 111230 }, { "epoch": 0.8222701871618225, "grad_norm": 0.08701031655073166, "learning_rate": 8.919456315289649e-06, "loss": 0.0175, "step": 111240 }, { "epoch": 0.8223441057331244, "grad_norm": 0.08602787554264069, "learning_rate": 8.915746676163343e-06, "loss": 0.0189, "step": 111250 }, { "epoch": 0.8224180243044262, "grad_norm": 0.0800880491733551, "learning_rate": 8.912037037037037e-06, "loss": 0.0164, "step": 111260 }, { "epoch": 0.8224919428757281, "grad_norm": 0.08936553448438644, "learning_rate": 8.908327397910732e-06, "loss": 0.0168, "step": 111270 }, { "epoch": 0.8225658614470299, "grad_norm": 0.06623522192239761, "learning_rate": 8.904617758784426e-06, "loss": 0.0152, "step": 111280 }, { "epoch": 0.8226397800183318, "grad_norm": 0.07130187749862671, "learning_rate": 8.90090811965812e-06, "loss": 0.02, "step": 111290 }, { "epoch": 0.8227136985896336, "grad_norm": 0.04986598715186119, "learning_rate": 8.897198480531814e-06, "loss": 0.0166, "step": 111300 }, { "epoch": 0.8227876171609355, "grad_norm": 0.07575799524784088, "learning_rate": 8.893488841405509e-06, "loss": 0.0173, "step": 111310 }, { "epoch": 0.8228615357322374, "grad_norm": 0.0706147775053978, "learning_rate": 8.889779202279203e-06, "loss": 0.0169, "step": 111320 }, { "epoch": 0.8229354543035392, "grad_norm": 0.07670772075653076, "learning_rate": 8.886069563152897e-06, "loss": 0.0187, "step": 111330 }, { "epoch": 0.8230093728748411, "grad_norm": 0.09012507647275925, "learning_rate": 8.882359924026592e-06, "loss": 0.0178, "step": 111340 }, { "epoch": 0.8230832914461429, "grad_norm": 0.06751397997140884, "learning_rate": 8.878650284900284e-06, "loss": 0.0188, "step": 111350 }, { "epoch": 0.8231572100174448, "grad_norm": 0.06585299223661423, "learning_rate": 8.87494064577398e-06, "loss": 0.0175, "step": 111360 }, { "epoch": 0.8232311285887466, "grad_norm": 0.08249194175004959, "learning_rate": 8.871231006647673e-06, "loss": 0.0193, "step": 111370 }, { "epoch": 0.8233050471600485, "grad_norm": 0.06290542334318161, "learning_rate": 8.867521367521369e-06, "loss": 0.0157, "step": 111380 }, { "epoch": 0.8233789657313504, "grad_norm": 0.09153600037097931, "learning_rate": 8.863811728395061e-06, "loss": 0.0198, "step": 111390 }, { "epoch": 0.8234528843026522, "grad_norm": 0.07733996957540512, "learning_rate": 8.860102089268757e-06, "loss": 0.0162, "step": 111400 }, { "epoch": 0.8235268028739541, "grad_norm": 0.0745389461517334, "learning_rate": 8.85639245014245e-06, "loss": 0.0188, "step": 111410 }, { "epoch": 0.8236007214452559, "grad_norm": 0.06475479900836945, "learning_rate": 8.852682811016144e-06, "loss": 0.0176, "step": 111420 }, { "epoch": 0.8236746400165578, "grad_norm": 0.05812176316976547, "learning_rate": 8.84897317188984e-06, "loss": 0.0172, "step": 111430 }, { "epoch": 0.8237485585878596, "grad_norm": 0.1021883636713028, "learning_rate": 8.845263532763533e-06, "loss": 0.0186, "step": 111440 }, { "epoch": 0.8238224771591615, "grad_norm": 0.06590006500482559, "learning_rate": 8.841553893637229e-06, "loss": 0.019, "step": 111450 }, { "epoch": 0.8238963957304634, "grad_norm": 0.0770881250500679, "learning_rate": 8.837844254510921e-06, "loss": 0.0159, "step": 111460 }, { "epoch": 0.8239703143017651, "grad_norm": 0.05062444135546684, "learning_rate": 8.834134615384617e-06, "loss": 0.0161, "step": 111470 }, { "epoch": 0.824044232873067, "grad_norm": 0.08017632365226746, "learning_rate": 8.83042497625831e-06, "loss": 0.017, "step": 111480 }, { "epoch": 0.8241181514443688, "grad_norm": 0.08347149938344955, "learning_rate": 8.826715337132004e-06, "loss": 0.0201, "step": 111490 }, { "epoch": 0.8241920700156707, "grad_norm": 0.08138225227594376, "learning_rate": 8.823005698005698e-06, "loss": 0.0184, "step": 111500 }, { "epoch": 0.8242659885869726, "grad_norm": 0.09080573171377182, "learning_rate": 8.819296058879392e-06, "loss": 0.017, "step": 111510 }, { "epoch": 0.8243399071582744, "grad_norm": 0.09142883867025375, "learning_rate": 8.815586419753087e-06, "loss": 0.0189, "step": 111520 }, { "epoch": 0.8244138257295763, "grad_norm": 0.08484750241041183, "learning_rate": 8.811876780626781e-06, "loss": 0.0146, "step": 111530 }, { "epoch": 0.8244877443008781, "grad_norm": 0.08886722475290298, "learning_rate": 8.808167141500475e-06, "loss": 0.0166, "step": 111540 }, { "epoch": 0.82456166287218, "grad_norm": 0.05006714537739754, "learning_rate": 8.80445750237417e-06, "loss": 0.0148, "step": 111550 }, { "epoch": 0.8246355814434818, "grad_norm": 0.0701959878206253, "learning_rate": 8.800747863247864e-06, "loss": 0.015, "step": 111560 }, { "epoch": 0.8247095000147837, "grad_norm": 0.06328166276216507, "learning_rate": 8.797038224121558e-06, "loss": 0.0169, "step": 111570 }, { "epoch": 0.8247834185860856, "grad_norm": 0.10973607003688812, "learning_rate": 8.793328584995252e-06, "loss": 0.0186, "step": 111580 }, { "epoch": 0.8248573371573874, "grad_norm": 0.05845305323600769, "learning_rate": 8.789618945868947e-06, "loss": 0.0163, "step": 111590 }, { "epoch": 0.8249312557286893, "grad_norm": 0.0932711809873581, "learning_rate": 8.785909306742641e-06, "loss": 0.019, "step": 111600 }, { "epoch": 0.8250051742999911, "grad_norm": 0.07008972018957138, "learning_rate": 8.782199667616335e-06, "loss": 0.0165, "step": 111610 }, { "epoch": 0.825079092871293, "grad_norm": 0.1511513590812683, "learning_rate": 8.77849002849003e-06, "loss": 0.0185, "step": 111620 }, { "epoch": 0.8251530114425948, "grad_norm": 0.08395200967788696, "learning_rate": 8.774780389363724e-06, "loss": 0.0155, "step": 111630 }, { "epoch": 0.8252269300138967, "grad_norm": 0.06920085102319717, "learning_rate": 8.771070750237418e-06, "loss": 0.0199, "step": 111640 }, { "epoch": 0.8253008485851986, "grad_norm": 0.06494973599910736, "learning_rate": 8.76736111111111e-06, "loss": 0.0157, "step": 111650 }, { "epoch": 0.8253747671565004, "grad_norm": 0.09502291679382324, "learning_rate": 8.763651471984807e-06, "loss": 0.016, "step": 111660 }, { "epoch": 0.8254486857278023, "grad_norm": 0.06811536103487015, "learning_rate": 8.759941832858499e-06, "loss": 0.0157, "step": 111670 }, { "epoch": 0.8255226042991041, "grad_norm": 0.051820866763591766, "learning_rate": 8.756232193732195e-06, "loss": 0.0165, "step": 111680 }, { "epoch": 0.825596522870406, "grad_norm": 0.08794544637203217, "learning_rate": 8.752522554605888e-06, "loss": 0.0168, "step": 111690 }, { "epoch": 0.8256704414417079, "grad_norm": 0.08136577159166336, "learning_rate": 8.748812915479584e-06, "loss": 0.0166, "step": 111700 }, { "epoch": 0.8257443600130097, "grad_norm": 0.06894486397504807, "learning_rate": 8.745103276353276e-06, "loss": 0.0159, "step": 111710 }, { "epoch": 0.8258182785843116, "grad_norm": 0.07653698325157166, "learning_rate": 8.74139363722697e-06, "loss": 0.0179, "step": 111720 }, { "epoch": 0.8258921971556134, "grad_norm": 0.0793883353471756, "learning_rate": 8.737683998100665e-06, "loss": 0.0166, "step": 111730 }, { "epoch": 0.8259661157269153, "grad_norm": 0.06822627782821655, "learning_rate": 8.733974358974359e-06, "loss": 0.0163, "step": 111740 }, { "epoch": 0.826040034298217, "grad_norm": 0.0709918662905693, "learning_rate": 8.730264719848053e-06, "loss": 0.0179, "step": 111750 }, { "epoch": 0.8261139528695189, "grad_norm": 0.09269371628761292, "learning_rate": 8.726555080721748e-06, "loss": 0.019, "step": 111760 }, { "epoch": 0.8261878714408208, "grad_norm": 0.09144306927919388, "learning_rate": 8.722845441595442e-06, "loss": 0.0173, "step": 111770 }, { "epoch": 0.8262617900121226, "grad_norm": 0.06571599096059799, "learning_rate": 8.719135802469136e-06, "loss": 0.0165, "step": 111780 }, { "epoch": 0.8263357085834245, "grad_norm": 0.0643790140748024, "learning_rate": 8.71542616334283e-06, "loss": 0.0165, "step": 111790 }, { "epoch": 0.8264096271547263, "grad_norm": 0.06768159568309784, "learning_rate": 8.711716524216525e-06, "loss": 0.0177, "step": 111800 }, { "epoch": 0.8264835457260282, "grad_norm": 0.08055918663740158, "learning_rate": 8.708006885090219e-06, "loss": 0.0172, "step": 111810 }, { "epoch": 0.82655746429733, "grad_norm": 0.06265651434659958, "learning_rate": 8.704297245963913e-06, "loss": 0.0157, "step": 111820 }, { "epoch": 0.8266313828686319, "grad_norm": 0.08364095538854599, "learning_rate": 8.700587606837607e-06, "loss": 0.0158, "step": 111830 }, { "epoch": 0.8267053014399338, "grad_norm": 0.08273705095052719, "learning_rate": 8.696877967711302e-06, "loss": 0.0152, "step": 111840 }, { "epoch": 0.8267792200112356, "grad_norm": 0.09029737114906311, "learning_rate": 8.693168328584996e-06, "loss": 0.0174, "step": 111850 }, { "epoch": 0.8268531385825375, "grad_norm": 0.0713384598493576, "learning_rate": 8.68945868945869e-06, "loss": 0.0165, "step": 111860 }, { "epoch": 0.8269270571538393, "grad_norm": 0.08109275251626968, "learning_rate": 8.685749050332385e-06, "loss": 0.0181, "step": 111870 }, { "epoch": 0.8270009757251412, "grad_norm": 0.0740184634923935, "learning_rate": 8.682039411206077e-06, "loss": 0.0181, "step": 111880 }, { "epoch": 0.827074894296443, "grad_norm": 0.07795722037553787, "learning_rate": 8.678329772079773e-06, "loss": 0.0159, "step": 111890 }, { "epoch": 0.8271488128677449, "grad_norm": 0.06488138437271118, "learning_rate": 8.674620132953466e-06, "loss": 0.0163, "step": 111900 }, { "epoch": 0.8272227314390468, "grad_norm": 0.07889160513877869, "learning_rate": 8.670910493827162e-06, "loss": 0.0164, "step": 111910 }, { "epoch": 0.8272966500103486, "grad_norm": 0.07143231481313705, "learning_rate": 8.667200854700854e-06, "loss": 0.0149, "step": 111920 }, { "epoch": 0.8273705685816505, "grad_norm": 0.07042443007230759, "learning_rate": 8.66349121557455e-06, "loss": 0.0163, "step": 111930 }, { "epoch": 0.8274444871529523, "grad_norm": 0.08917763829231262, "learning_rate": 8.659781576448243e-06, "loss": 0.0179, "step": 111940 }, { "epoch": 0.8275184057242542, "grad_norm": 0.06888213753700256, "learning_rate": 8.656071937321937e-06, "loss": 0.015, "step": 111950 }, { "epoch": 0.8275923242955561, "grad_norm": 0.07403473556041718, "learning_rate": 8.652362298195631e-06, "loss": 0.0167, "step": 111960 }, { "epoch": 0.8276662428668579, "grad_norm": 0.06935565918684006, "learning_rate": 8.648652659069326e-06, "loss": 0.0189, "step": 111970 }, { "epoch": 0.8277401614381598, "grad_norm": 0.07324974238872528, "learning_rate": 8.64494301994302e-06, "loss": 0.0167, "step": 111980 }, { "epoch": 0.8278140800094616, "grad_norm": 0.06783628463745117, "learning_rate": 8.641233380816714e-06, "loss": 0.0177, "step": 111990 }, { "epoch": 0.8278879985807635, "grad_norm": 0.07136841118335724, "learning_rate": 8.637523741690408e-06, "loss": 0.0169, "step": 112000 }, { "epoch": 0.8279619171520652, "grad_norm": 0.058445774018764496, "learning_rate": 8.633814102564103e-06, "loss": 0.0187, "step": 112010 }, { "epoch": 0.8280358357233671, "grad_norm": 0.07820957899093628, "learning_rate": 8.630104463437797e-06, "loss": 0.0164, "step": 112020 }, { "epoch": 0.828109754294669, "grad_norm": 0.0695708617568016, "learning_rate": 8.626394824311491e-06, "loss": 0.0179, "step": 112030 }, { "epoch": 0.8281836728659708, "grad_norm": 0.10643965005874634, "learning_rate": 8.622685185185186e-06, "loss": 0.0176, "step": 112040 }, { "epoch": 0.8282575914372727, "grad_norm": 0.06634803116321564, "learning_rate": 8.61897554605888e-06, "loss": 0.0144, "step": 112050 }, { "epoch": 0.8283315100085745, "grad_norm": 0.09435508400201797, "learning_rate": 8.615265906932574e-06, "loss": 0.018, "step": 112060 }, { "epoch": 0.8284054285798764, "grad_norm": 0.08138860762119293, "learning_rate": 8.611556267806268e-06, "loss": 0.0169, "step": 112070 }, { "epoch": 0.8284793471511782, "grad_norm": 0.07405047863721848, "learning_rate": 8.607846628679963e-06, "loss": 0.018, "step": 112080 }, { "epoch": 0.8285532657224801, "grad_norm": 0.08439838141202927, "learning_rate": 8.604136989553657e-06, "loss": 0.0183, "step": 112090 }, { "epoch": 0.828627184293782, "grad_norm": 0.09000565856695175, "learning_rate": 8.600427350427351e-06, "loss": 0.0182, "step": 112100 }, { "epoch": 0.8287011028650838, "grad_norm": 0.11695726960897446, "learning_rate": 8.596717711301044e-06, "loss": 0.0151, "step": 112110 }, { "epoch": 0.8287750214363857, "grad_norm": 0.07910315692424774, "learning_rate": 8.59300807217474e-06, "loss": 0.0174, "step": 112120 }, { "epoch": 0.8288489400076875, "grad_norm": 0.07704861462116241, "learning_rate": 8.589298433048432e-06, "loss": 0.0166, "step": 112130 }, { "epoch": 0.8289228585789894, "grad_norm": 0.05719153210520744, "learning_rate": 8.585588793922128e-06, "loss": 0.0181, "step": 112140 }, { "epoch": 0.8289967771502912, "grad_norm": 0.0725087821483612, "learning_rate": 8.58187915479582e-06, "loss": 0.0165, "step": 112150 }, { "epoch": 0.8290706957215931, "grad_norm": 0.10840688645839691, "learning_rate": 8.578169515669517e-06, "loss": 0.0165, "step": 112160 }, { "epoch": 0.829144614292895, "grad_norm": 0.10662669688463211, "learning_rate": 8.57445987654321e-06, "loss": 0.0185, "step": 112170 }, { "epoch": 0.8292185328641968, "grad_norm": 0.07385453581809998, "learning_rate": 8.570750237416904e-06, "loss": 0.018, "step": 112180 }, { "epoch": 0.8292924514354987, "grad_norm": 0.06940528750419617, "learning_rate": 8.5670405982906e-06, "loss": 0.0162, "step": 112190 }, { "epoch": 0.8293663700068005, "grad_norm": 0.08754272758960724, "learning_rate": 8.563330959164292e-06, "loss": 0.0178, "step": 112200 }, { "epoch": 0.8294402885781024, "grad_norm": 0.09364213049411774, "learning_rate": 8.559621320037988e-06, "loss": 0.0163, "step": 112210 }, { "epoch": 0.8295142071494043, "grad_norm": 0.050510212779045105, "learning_rate": 8.55591168091168e-06, "loss": 0.0163, "step": 112220 }, { "epoch": 0.8295881257207061, "grad_norm": 0.07437706738710403, "learning_rate": 8.552202041785377e-06, "loss": 0.0168, "step": 112230 }, { "epoch": 0.829662044292008, "grad_norm": 0.0602116733789444, "learning_rate": 8.54849240265907e-06, "loss": 0.0152, "step": 112240 }, { "epoch": 0.8297359628633098, "grad_norm": 0.08567924797534943, "learning_rate": 8.544782763532765e-06, "loss": 0.0193, "step": 112250 }, { "epoch": 0.8298098814346117, "grad_norm": 0.07743331044912338, "learning_rate": 8.541073124406458e-06, "loss": 0.0143, "step": 112260 }, { "epoch": 0.8298838000059134, "grad_norm": 0.10275121033191681, "learning_rate": 8.537363485280152e-06, "loss": 0.0198, "step": 112270 }, { "epoch": 0.8299577185772153, "grad_norm": 0.09203140437602997, "learning_rate": 8.533653846153846e-06, "loss": 0.0159, "step": 112280 }, { "epoch": 0.8300316371485172, "grad_norm": 0.09986036270856857, "learning_rate": 8.52994420702754e-06, "loss": 0.0179, "step": 112290 }, { "epoch": 0.830105555719819, "grad_norm": 0.08891689032316208, "learning_rate": 8.526234567901235e-06, "loss": 0.0179, "step": 112300 }, { "epoch": 0.8301794742911209, "grad_norm": 0.0643787607550621, "learning_rate": 8.52252492877493e-06, "loss": 0.0159, "step": 112310 }, { "epoch": 0.8302533928624227, "grad_norm": 0.08382133394479752, "learning_rate": 8.518815289648623e-06, "loss": 0.0169, "step": 112320 }, { "epoch": 0.8303273114337246, "grad_norm": 0.06668587774038315, "learning_rate": 8.515105650522318e-06, "loss": 0.0168, "step": 112330 }, { "epoch": 0.8304012300050264, "grad_norm": 0.04851803183555603, "learning_rate": 8.51139601139601e-06, "loss": 0.0154, "step": 112340 }, { "epoch": 0.8304751485763283, "grad_norm": 0.0806727260351181, "learning_rate": 8.507686372269706e-06, "loss": 0.0156, "step": 112350 }, { "epoch": 0.8305490671476302, "grad_norm": 0.09491311013698578, "learning_rate": 8.5039767331434e-06, "loss": 0.0188, "step": 112360 }, { "epoch": 0.830622985718932, "grad_norm": 0.08105544745922089, "learning_rate": 8.500267094017095e-06, "loss": 0.0191, "step": 112370 }, { "epoch": 0.8306969042902339, "grad_norm": 0.10776547342538834, "learning_rate": 8.496557454890789e-06, "loss": 0.0159, "step": 112380 }, { "epoch": 0.8307708228615357, "grad_norm": 0.06740918010473251, "learning_rate": 8.492847815764483e-06, "loss": 0.0165, "step": 112390 }, { "epoch": 0.8308447414328376, "grad_norm": 0.07434792071580887, "learning_rate": 8.489138176638178e-06, "loss": 0.0168, "step": 112400 }, { "epoch": 0.8309186600041394, "grad_norm": 0.097455695271492, "learning_rate": 8.485428537511872e-06, "loss": 0.0176, "step": 112410 }, { "epoch": 0.8309925785754413, "grad_norm": 0.09368898719549179, "learning_rate": 8.481718898385566e-06, "loss": 0.0186, "step": 112420 }, { "epoch": 0.8310664971467432, "grad_norm": 0.07678214460611343, "learning_rate": 8.478009259259259e-06, "loss": 0.0192, "step": 112430 }, { "epoch": 0.831140415718045, "grad_norm": 0.0795578882098198, "learning_rate": 8.474299620132955e-06, "loss": 0.0177, "step": 112440 }, { "epoch": 0.8312143342893469, "grad_norm": 0.08029112219810486, "learning_rate": 8.470589981006647e-06, "loss": 0.0175, "step": 112450 }, { "epoch": 0.8312882528606487, "grad_norm": 0.09500591456890106, "learning_rate": 8.466880341880343e-06, "loss": 0.0193, "step": 112460 }, { "epoch": 0.8313621714319506, "grad_norm": 0.06177762895822525, "learning_rate": 8.463170702754036e-06, "loss": 0.0169, "step": 112470 }, { "epoch": 0.8314360900032525, "grad_norm": 0.0490424744784832, "learning_rate": 8.459461063627732e-06, "loss": 0.0189, "step": 112480 }, { "epoch": 0.8315100085745543, "grad_norm": 0.06845315545797348, "learning_rate": 8.455751424501424e-06, "loss": 0.0163, "step": 112490 }, { "epoch": 0.8315839271458562, "grad_norm": 0.0934458076953888, "learning_rate": 8.452041785375119e-06, "loss": 0.0179, "step": 112500 }, { "epoch": 0.831657845717158, "grad_norm": 0.07834131270647049, "learning_rate": 8.448332146248813e-06, "loss": 0.0175, "step": 112510 }, { "epoch": 0.8317317642884599, "grad_norm": 0.08731988817453384, "learning_rate": 8.444622507122507e-06, "loss": 0.0177, "step": 112520 }, { "epoch": 0.8318056828597616, "grad_norm": 0.10199026763439178, "learning_rate": 8.440912867996202e-06, "loss": 0.0173, "step": 112530 }, { "epoch": 0.8318796014310635, "grad_norm": 0.06616607308387756, "learning_rate": 8.437203228869896e-06, "loss": 0.018, "step": 112540 }, { "epoch": 0.8319535200023654, "grad_norm": 0.07040773332118988, "learning_rate": 8.43349358974359e-06, "loss": 0.0159, "step": 112550 }, { "epoch": 0.8320274385736672, "grad_norm": 0.07667144387960434, "learning_rate": 8.429783950617284e-06, "loss": 0.0156, "step": 112560 }, { "epoch": 0.8321013571449691, "grad_norm": 0.06196437403559685, "learning_rate": 8.426074311490979e-06, "loss": 0.0165, "step": 112570 }, { "epoch": 0.8321752757162709, "grad_norm": 0.07039918005466461, "learning_rate": 8.422364672364673e-06, "loss": 0.0163, "step": 112580 }, { "epoch": 0.8322491942875728, "grad_norm": 0.11908307671546936, "learning_rate": 8.418655033238367e-06, "loss": 0.0159, "step": 112590 }, { "epoch": 0.8323231128588746, "grad_norm": 0.07255138456821442, "learning_rate": 8.414945394112061e-06, "loss": 0.0178, "step": 112600 }, { "epoch": 0.8323970314301765, "grad_norm": 0.07477567344903946, "learning_rate": 8.411235754985756e-06, "loss": 0.0164, "step": 112610 }, { "epoch": 0.8324709500014784, "grad_norm": 0.0650930404663086, "learning_rate": 8.40752611585945e-06, "loss": 0.0189, "step": 112620 }, { "epoch": 0.8325448685727802, "grad_norm": 0.09028556942939758, "learning_rate": 8.403816476733144e-06, "loss": 0.0174, "step": 112630 }, { "epoch": 0.8326187871440821, "grad_norm": 0.08841590583324432, "learning_rate": 8.400106837606839e-06, "loss": 0.0214, "step": 112640 }, { "epoch": 0.8326927057153839, "grad_norm": 0.06070448085665703, "learning_rate": 8.396397198480533e-06, "loss": 0.0169, "step": 112650 }, { "epoch": 0.8327666242866858, "grad_norm": 0.0896754264831543, "learning_rate": 8.392687559354225e-06, "loss": 0.0157, "step": 112660 }, { "epoch": 0.8328405428579876, "grad_norm": 0.09884040802717209, "learning_rate": 8.388977920227921e-06, "loss": 0.0173, "step": 112670 }, { "epoch": 0.8329144614292895, "grad_norm": 0.06297778338193893, "learning_rate": 8.385268281101614e-06, "loss": 0.0187, "step": 112680 }, { "epoch": 0.8329883800005914, "grad_norm": 0.07070671766996384, "learning_rate": 8.38155864197531e-06, "loss": 0.0193, "step": 112690 }, { "epoch": 0.8330622985718932, "grad_norm": 0.06912417709827423, "learning_rate": 8.377849002849002e-06, "loss": 0.0187, "step": 112700 }, { "epoch": 0.8331362171431951, "grad_norm": 0.05659592151641846, "learning_rate": 8.374139363722698e-06, "loss": 0.0166, "step": 112710 }, { "epoch": 0.8332101357144969, "grad_norm": 0.06409688293933868, "learning_rate": 8.370429724596391e-06, "loss": 0.0154, "step": 112720 }, { "epoch": 0.8332840542857988, "grad_norm": 0.05440429225564003, "learning_rate": 8.366720085470085e-06, "loss": 0.0159, "step": 112730 }, { "epoch": 0.8333579728571007, "grad_norm": 0.07336640357971191, "learning_rate": 8.36301044634378e-06, "loss": 0.0179, "step": 112740 }, { "epoch": 0.8334318914284025, "grad_norm": 0.08996220678091049, "learning_rate": 8.359300807217474e-06, "loss": 0.0165, "step": 112750 }, { "epoch": 0.8335058099997044, "grad_norm": 0.05664195865392685, "learning_rate": 8.355591168091168e-06, "loss": 0.0162, "step": 112760 }, { "epoch": 0.8335797285710062, "grad_norm": 0.07927602529525757, "learning_rate": 8.351881528964862e-06, "loss": 0.0181, "step": 112770 }, { "epoch": 0.833653647142308, "grad_norm": 0.08353345841169357, "learning_rate": 8.348171889838557e-06, "loss": 0.0162, "step": 112780 }, { "epoch": 0.8337275657136098, "grad_norm": 0.07481536269187927, "learning_rate": 8.344462250712251e-06, "loss": 0.0179, "step": 112790 }, { "epoch": 0.8338014842849117, "grad_norm": 0.07783810794353485, "learning_rate": 8.340752611585945e-06, "loss": 0.0164, "step": 112800 }, { "epoch": 0.8338754028562136, "grad_norm": 0.0775987058877945, "learning_rate": 8.33704297245964e-06, "loss": 0.0181, "step": 112810 }, { "epoch": 0.8339493214275154, "grad_norm": 0.07131469249725342, "learning_rate": 8.333333333333334e-06, "loss": 0.018, "step": 112820 }, { "epoch": 0.8340232399988173, "grad_norm": 0.088409923017025, "learning_rate": 8.329623694207028e-06, "loss": 0.0185, "step": 112830 }, { "epoch": 0.8340971585701191, "grad_norm": 0.09589328616857529, "learning_rate": 8.325914055080722e-06, "loss": 0.0154, "step": 112840 }, { "epoch": 0.834171077141421, "grad_norm": 0.06863109767436981, "learning_rate": 8.322204415954417e-06, "loss": 0.0167, "step": 112850 }, { "epoch": 0.8342449957127228, "grad_norm": 0.08470222353935242, "learning_rate": 8.31849477682811e-06, "loss": 0.0143, "step": 112860 }, { "epoch": 0.8343189142840247, "grad_norm": 0.07842252403497696, "learning_rate": 8.314785137701805e-06, "loss": 0.0182, "step": 112870 }, { "epoch": 0.8343928328553266, "grad_norm": 0.08063441514968872, "learning_rate": 8.3110754985755e-06, "loss": 0.0193, "step": 112880 }, { "epoch": 0.8344667514266284, "grad_norm": 0.05889301747083664, "learning_rate": 8.307365859449192e-06, "loss": 0.0186, "step": 112890 }, { "epoch": 0.8345406699979303, "grad_norm": 0.06008196622133255, "learning_rate": 8.303656220322888e-06, "loss": 0.0187, "step": 112900 }, { "epoch": 0.8346145885692321, "grad_norm": 0.07193055003881454, "learning_rate": 8.29994658119658e-06, "loss": 0.0203, "step": 112910 }, { "epoch": 0.834688507140534, "grad_norm": 0.1248464584350586, "learning_rate": 8.296236942070276e-06, "loss": 0.0156, "step": 112920 }, { "epoch": 0.8347624257118358, "grad_norm": 0.06551370769739151, "learning_rate": 8.292527302943969e-06, "loss": 0.0189, "step": 112930 }, { "epoch": 0.8348363442831377, "grad_norm": 0.08450902998447418, "learning_rate": 8.288817663817665e-06, "loss": 0.0194, "step": 112940 }, { "epoch": 0.8349102628544396, "grad_norm": 0.07757827639579773, "learning_rate": 8.28510802469136e-06, "loss": 0.0182, "step": 112950 }, { "epoch": 0.8349841814257414, "grad_norm": 0.08068808168172836, "learning_rate": 8.281398385565052e-06, "loss": 0.0172, "step": 112960 }, { "epoch": 0.8350580999970433, "grad_norm": 0.07426097244024277, "learning_rate": 8.277688746438748e-06, "loss": 0.0185, "step": 112970 }, { "epoch": 0.8351320185683451, "grad_norm": 0.07921002060174942, "learning_rate": 8.27397910731244e-06, "loss": 0.019, "step": 112980 }, { "epoch": 0.835205937139647, "grad_norm": 0.06329020857810974, "learning_rate": 8.270269468186136e-06, "loss": 0.0171, "step": 112990 }, { "epoch": 0.8352798557109489, "grad_norm": 0.062443338334560394, "learning_rate": 8.266559829059829e-06, "loss": 0.0142, "step": 113000 }, { "epoch": 0.8353537742822507, "grad_norm": 0.10196477174758911, "learning_rate": 8.262850189933525e-06, "loss": 0.0195, "step": 113010 }, { "epoch": 0.8354276928535526, "grad_norm": 0.0638945996761322, "learning_rate": 8.259140550807217e-06, "loss": 0.0175, "step": 113020 }, { "epoch": 0.8355016114248544, "grad_norm": 0.0810115784406662, "learning_rate": 8.255430911680912e-06, "loss": 0.0151, "step": 113030 }, { "epoch": 0.8355755299961563, "grad_norm": 0.07323488593101501, "learning_rate": 8.251721272554606e-06, "loss": 0.0189, "step": 113040 }, { "epoch": 0.835649448567458, "grad_norm": 0.06533502042293549, "learning_rate": 8.2480116334283e-06, "loss": 0.0162, "step": 113050 }, { "epoch": 0.8357233671387599, "grad_norm": 0.08269846439361572, "learning_rate": 8.244301994301995e-06, "loss": 0.0189, "step": 113060 }, { "epoch": 0.8357972857100618, "grad_norm": 0.05778762325644493, "learning_rate": 8.240592355175689e-06, "loss": 0.016, "step": 113070 }, { "epoch": 0.8358712042813636, "grad_norm": 0.0773656889796257, "learning_rate": 8.236882716049383e-06, "loss": 0.0168, "step": 113080 }, { "epoch": 0.8359451228526655, "grad_norm": 0.08560159057378769, "learning_rate": 8.233173076923077e-06, "loss": 0.0179, "step": 113090 }, { "epoch": 0.8360190414239673, "grad_norm": 0.060995034873485565, "learning_rate": 8.229463437796772e-06, "loss": 0.0185, "step": 113100 }, { "epoch": 0.8360929599952692, "grad_norm": 0.10020701587200165, "learning_rate": 8.225753798670466e-06, "loss": 0.0171, "step": 113110 }, { "epoch": 0.836166878566571, "grad_norm": 0.06637522578239441, "learning_rate": 8.22204415954416e-06, "loss": 0.0157, "step": 113120 }, { "epoch": 0.8362407971378729, "grad_norm": 0.07538586109876633, "learning_rate": 8.218334520417854e-06, "loss": 0.0171, "step": 113130 }, { "epoch": 0.8363147157091748, "grad_norm": 0.061674199998378754, "learning_rate": 8.214624881291549e-06, "loss": 0.0137, "step": 113140 }, { "epoch": 0.8363886342804766, "grad_norm": 0.10180699080228806, "learning_rate": 8.210915242165243e-06, "loss": 0.0157, "step": 113150 }, { "epoch": 0.8364625528517785, "grad_norm": 0.07494241744279861, "learning_rate": 8.207205603038937e-06, "loss": 0.0165, "step": 113160 }, { "epoch": 0.8365364714230803, "grad_norm": 0.07682108879089355, "learning_rate": 8.203495963912632e-06, "loss": 0.0158, "step": 113170 }, { "epoch": 0.8366103899943822, "grad_norm": 0.09721241891384125, "learning_rate": 8.199786324786326e-06, "loss": 0.0157, "step": 113180 }, { "epoch": 0.836684308565684, "grad_norm": 0.11956478655338287, "learning_rate": 8.196076685660018e-06, "loss": 0.0177, "step": 113190 }, { "epoch": 0.8367582271369859, "grad_norm": 0.08993244171142578, "learning_rate": 8.192367046533714e-06, "loss": 0.0167, "step": 113200 }, { "epoch": 0.8368321457082878, "grad_norm": 0.08010384440422058, "learning_rate": 8.188657407407407e-06, "loss": 0.0182, "step": 113210 }, { "epoch": 0.8369060642795896, "grad_norm": 0.06815112382173538, "learning_rate": 8.184947768281103e-06, "loss": 0.014, "step": 113220 }, { "epoch": 0.8369799828508915, "grad_norm": 0.06534769386053085, "learning_rate": 8.181238129154796e-06, "loss": 0.0165, "step": 113230 }, { "epoch": 0.8370539014221933, "grad_norm": 0.0753227025270462, "learning_rate": 8.177528490028492e-06, "loss": 0.0182, "step": 113240 }, { "epoch": 0.8371278199934952, "grad_norm": 0.10362353920936584, "learning_rate": 8.173818850902184e-06, "loss": 0.018, "step": 113250 }, { "epoch": 0.8372017385647971, "grad_norm": 0.08055713027715683, "learning_rate": 8.170109211775878e-06, "loss": 0.0168, "step": 113260 }, { "epoch": 0.8372756571360989, "grad_norm": 0.07950981706380844, "learning_rate": 8.166399572649573e-06, "loss": 0.0168, "step": 113270 }, { "epoch": 0.8373495757074008, "grad_norm": 0.07329950481653214, "learning_rate": 8.162689933523267e-06, "loss": 0.0159, "step": 113280 }, { "epoch": 0.8374234942787026, "grad_norm": 0.08244430273771286, "learning_rate": 8.158980294396961e-06, "loss": 0.0189, "step": 113290 }, { "epoch": 0.8374974128500045, "grad_norm": 0.08308271318674088, "learning_rate": 8.155270655270655e-06, "loss": 0.0162, "step": 113300 }, { "epoch": 0.8375713314213062, "grad_norm": 0.08598320931196213, "learning_rate": 8.15156101614435e-06, "loss": 0.0169, "step": 113310 }, { "epoch": 0.8376452499926081, "grad_norm": 0.06367569416761398, "learning_rate": 8.147851377018044e-06, "loss": 0.0162, "step": 113320 }, { "epoch": 0.83771916856391, "grad_norm": 0.058816634118556976, "learning_rate": 8.144141737891738e-06, "loss": 0.0183, "step": 113330 }, { "epoch": 0.8377930871352118, "grad_norm": 0.0877440795302391, "learning_rate": 8.140432098765433e-06, "loss": 0.0161, "step": 113340 }, { "epoch": 0.8378670057065137, "grad_norm": 0.054381802678108215, "learning_rate": 8.136722459639127e-06, "loss": 0.0167, "step": 113350 }, { "epoch": 0.8379409242778155, "grad_norm": 0.05709408223628998, "learning_rate": 8.133012820512821e-06, "loss": 0.0197, "step": 113360 }, { "epoch": 0.8380148428491174, "grad_norm": 0.058122724294662476, "learning_rate": 8.129303181386515e-06, "loss": 0.0169, "step": 113370 }, { "epoch": 0.8380887614204192, "grad_norm": 0.07336383312940598, "learning_rate": 8.12559354226021e-06, "loss": 0.0178, "step": 113380 }, { "epoch": 0.8381626799917211, "grad_norm": 0.07232995331287384, "learning_rate": 8.121883903133904e-06, "loss": 0.0174, "step": 113390 }, { "epoch": 0.838236598563023, "grad_norm": 0.08827503025531769, "learning_rate": 8.118174264007598e-06, "loss": 0.0192, "step": 113400 }, { "epoch": 0.8383105171343248, "grad_norm": 0.059943169355392456, "learning_rate": 8.114464624881292e-06, "loss": 0.0168, "step": 113410 }, { "epoch": 0.8383844357056267, "grad_norm": 0.07064764946699142, "learning_rate": 8.110754985754985e-06, "loss": 0.0161, "step": 113420 }, { "epoch": 0.8384583542769285, "grad_norm": 0.08875328302383423, "learning_rate": 8.107045346628681e-06, "loss": 0.0172, "step": 113430 }, { "epoch": 0.8385322728482304, "grad_norm": 0.08653685450553894, "learning_rate": 8.103335707502374e-06, "loss": 0.0184, "step": 113440 }, { "epoch": 0.8386061914195323, "grad_norm": 0.0637197494506836, "learning_rate": 8.09962606837607e-06, "loss": 0.0154, "step": 113450 }, { "epoch": 0.8386801099908341, "grad_norm": 0.09255864471197128, "learning_rate": 8.095916429249762e-06, "loss": 0.0195, "step": 113460 }, { "epoch": 0.838754028562136, "grad_norm": 0.06862996518611908, "learning_rate": 8.092206790123458e-06, "loss": 0.0159, "step": 113470 }, { "epoch": 0.8388279471334378, "grad_norm": 0.08368046581745148, "learning_rate": 8.08849715099715e-06, "loss": 0.0171, "step": 113480 }, { "epoch": 0.8389018657047397, "grad_norm": 0.07304484397172928, "learning_rate": 8.084787511870845e-06, "loss": 0.0167, "step": 113490 }, { "epoch": 0.8389757842760415, "grad_norm": 0.07190733402967453, "learning_rate": 8.08107787274454e-06, "loss": 0.0157, "step": 113500 }, { "epoch": 0.8390497028473434, "grad_norm": 0.08199194818735123, "learning_rate": 8.077368233618233e-06, "loss": 0.0168, "step": 113510 }, { "epoch": 0.8391236214186453, "grad_norm": 0.050676293671131134, "learning_rate": 8.073658594491928e-06, "loss": 0.0187, "step": 113520 }, { "epoch": 0.8391975399899471, "grad_norm": 0.0652499794960022, "learning_rate": 8.069948955365622e-06, "loss": 0.016, "step": 113530 }, { "epoch": 0.839271458561249, "grad_norm": 0.09222967177629471, "learning_rate": 8.066239316239316e-06, "loss": 0.0161, "step": 113540 }, { "epoch": 0.8393453771325508, "grad_norm": 0.10128255933523178, "learning_rate": 8.06252967711301e-06, "loss": 0.0154, "step": 113550 }, { "epoch": 0.8394192957038527, "grad_norm": 0.06996087729930878, "learning_rate": 8.058820037986707e-06, "loss": 0.0186, "step": 113560 }, { "epoch": 0.8394932142751544, "grad_norm": 0.06131841987371445, "learning_rate": 8.055110398860399e-06, "loss": 0.0178, "step": 113570 }, { "epoch": 0.8395671328464563, "grad_norm": 0.08808259665966034, "learning_rate": 8.051400759734093e-06, "loss": 0.0175, "step": 113580 }, { "epoch": 0.8396410514177582, "grad_norm": 0.050261400640010834, "learning_rate": 8.047691120607788e-06, "loss": 0.0153, "step": 113590 }, { "epoch": 0.83971496998906, "grad_norm": 0.0644235759973526, "learning_rate": 8.043981481481482e-06, "loss": 0.0163, "step": 113600 }, { "epoch": 0.8397888885603619, "grad_norm": 0.07216489315032959, "learning_rate": 8.040271842355176e-06, "loss": 0.0164, "step": 113610 }, { "epoch": 0.8398628071316637, "grad_norm": 0.08070338517427444, "learning_rate": 8.03656220322887e-06, "loss": 0.0183, "step": 113620 }, { "epoch": 0.8399367257029656, "grad_norm": 0.06366539746522903, "learning_rate": 8.032852564102565e-06, "loss": 0.0139, "step": 113630 }, { "epoch": 0.8400106442742674, "grad_norm": 0.08025801181793213, "learning_rate": 8.029142924976259e-06, "loss": 0.0162, "step": 113640 }, { "epoch": 0.8400845628455693, "grad_norm": 0.0852746069431305, "learning_rate": 8.025433285849952e-06, "loss": 0.0178, "step": 113650 }, { "epoch": 0.8401584814168712, "grad_norm": 0.07929253578186035, "learning_rate": 8.021723646723648e-06, "loss": 0.0188, "step": 113660 }, { "epoch": 0.840232399988173, "grad_norm": 0.06703963130712509, "learning_rate": 8.01801400759734e-06, "loss": 0.0181, "step": 113670 }, { "epoch": 0.8403063185594749, "grad_norm": 0.08296431601047516, "learning_rate": 8.014304368471036e-06, "loss": 0.0159, "step": 113680 }, { "epoch": 0.8403802371307767, "grad_norm": 0.07311347126960754, "learning_rate": 8.010594729344729e-06, "loss": 0.0172, "step": 113690 }, { "epoch": 0.8404541557020786, "grad_norm": 0.0729018971323967, "learning_rate": 8.006885090218425e-06, "loss": 0.0172, "step": 113700 }, { "epoch": 0.8405280742733805, "grad_norm": 0.07265544682741165, "learning_rate": 8.003175451092117e-06, "loss": 0.0166, "step": 113710 }, { "epoch": 0.8406019928446823, "grad_norm": 0.057415880262851715, "learning_rate": 7.999465811965812e-06, "loss": 0.0162, "step": 113720 }, { "epoch": 0.8406759114159842, "grad_norm": 0.062344666570425034, "learning_rate": 7.995756172839507e-06, "loss": 0.0188, "step": 113730 }, { "epoch": 0.840749829987286, "grad_norm": 0.08408903330564499, "learning_rate": 7.9920465337132e-06, "loss": 0.015, "step": 113740 }, { "epoch": 0.8408237485585879, "grad_norm": 0.07730511575937271, "learning_rate": 7.988336894586896e-06, "loss": 0.0167, "step": 113750 }, { "epoch": 0.8408976671298897, "grad_norm": 0.10464189946651459, "learning_rate": 7.984627255460589e-06, "loss": 0.0176, "step": 113760 }, { "epoch": 0.8409715857011916, "grad_norm": 0.09843303263187408, "learning_rate": 7.980917616334285e-06, "loss": 0.0179, "step": 113770 }, { "epoch": 0.8410455042724935, "grad_norm": 0.07486625015735626, "learning_rate": 7.977207977207977e-06, "loss": 0.0155, "step": 113780 }, { "epoch": 0.8411194228437953, "grad_norm": 0.07983004301786423, "learning_rate": 7.973498338081673e-06, "loss": 0.017, "step": 113790 }, { "epoch": 0.8411933414150972, "grad_norm": 0.07761627435684204, "learning_rate": 7.969788698955366e-06, "loss": 0.0163, "step": 113800 }, { "epoch": 0.841267259986399, "grad_norm": 0.06571881473064423, "learning_rate": 7.96607905982906e-06, "loss": 0.0157, "step": 113810 }, { "epoch": 0.8413411785577009, "grad_norm": 0.08808450400829315, "learning_rate": 7.962369420702754e-06, "loss": 0.0181, "step": 113820 }, { "epoch": 0.8414150971290026, "grad_norm": 0.09128312021493912, "learning_rate": 7.958659781576449e-06, "loss": 0.0166, "step": 113830 }, { "epoch": 0.8414890157003045, "grad_norm": 0.054315246641635895, "learning_rate": 7.954950142450143e-06, "loss": 0.0174, "step": 113840 }, { "epoch": 0.8415629342716064, "grad_norm": 0.08689109981060028, "learning_rate": 7.951240503323837e-06, "loss": 0.0174, "step": 113850 }, { "epoch": 0.8416368528429082, "grad_norm": 0.06928656995296478, "learning_rate": 7.947530864197531e-06, "loss": 0.0175, "step": 113860 }, { "epoch": 0.8417107714142101, "grad_norm": 0.0875593051314354, "learning_rate": 7.943821225071226e-06, "loss": 0.0177, "step": 113870 }, { "epoch": 0.8417846899855119, "grad_norm": 0.07347662001848221, "learning_rate": 7.94011158594492e-06, "loss": 0.0158, "step": 113880 }, { "epoch": 0.8418586085568138, "grad_norm": 0.08090215176343918, "learning_rate": 7.936401946818614e-06, "loss": 0.019, "step": 113890 }, { "epoch": 0.8419325271281156, "grad_norm": 0.06561324000358582, "learning_rate": 7.932692307692308e-06, "loss": 0.0187, "step": 113900 }, { "epoch": 0.8420064456994175, "grad_norm": 0.0978066697716713, "learning_rate": 7.928982668566003e-06, "loss": 0.0154, "step": 113910 }, { "epoch": 0.8420803642707194, "grad_norm": 0.06843017041683197, "learning_rate": 7.925273029439697e-06, "loss": 0.0155, "step": 113920 }, { "epoch": 0.8421542828420212, "grad_norm": 0.051617275923490524, "learning_rate": 7.921563390313391e-06, "loss": 0.0163, "step": 113930 }, { "epoch": 0.8422282014133231, "grad_norm": 0.07422637194395065, "learning_rate": 7.917853751187086e-06, "loss": 0.0187, "step": 113940 }, { "epoch": 0.8423021199846249, "grad_norm": 0.06312904506921768, "learning_rate": 7.914144112060778e-06, "loss": 0.0182, "step": 113950 }, { "epoch": 0.8423760385559268, "grad_norm": 0.056918200105428696, "learning_rate": 7.910434472934474e-06, "loss": 0.0149, "step": 113960 }, { "epoch": 0.8424499571272287, "grad_norm": 0.08495678007602692, "learning_rate": 7.906724833808167e-06, "loss": 0.0206, "step": 113970 }, { "epoch": 0.8425238756985305, "grad_norm": 0.07521885633468628, "learning_rate": 7.903015194681863e-06, "loss": 0.0168, "step": 113980 }, { "epoch": 0.8425977942698324, "grad_norm": 0.06670526415109634, "learning_rate": 7.899305555555555e-06, "loss": 0.0184, "step": 113990 }, { "epoch": 0.8426717128411342, "grad_norm": 0.08714757114648819, "learning_rate": 7.895595916429251e-06, "loss": 0.0205, "step": 114000 }, { "epoch": 0.8427456314124361, "grad_norm": 0.07835818082094193, "learning_rate": 7.891886277302944e-06, "loss": 0.0182, "step": 114010 }, { "epoch": 0.8428195499837379, "grad_norm": 0.0721108689904213, "learning_rate": 7.88817663817664e-06, "loss": 0.0178, "step": 114020 }, { "epoch": 0.8428934685550398, "grad_norm": 0.07580531388521194, "learning_rate": 7.884466999050332e-06, "loss": 0.0195, "step": 114030 }, { "epoch": 0.8429673871263417, "grad_norm": 0.06839949637651443, "learning_rate": 7.880757359924027e-06, "loss": 0.0174, "step": 114040 }, { "epoch": 0.8430413056976435, "grad_norm": 0.07175683975219727, "learning_rate": 7.87704772079772e-06, "loss": 0.0197, "step": 114050 }, { "epoch": 0.8431152242689454, "grad_norm": 0.0730174258351326, "learning_rate": 7.873338081671415e-06, "loss": 0.0154, "step": 114060 }, { "epoch": 0.8431891428402472, "grad_norm": 0.06311491131782532, "learning_rate": 7.86962844254511e-06, "loss": 0.0169, "step": 114070 }, { "epoch": 0.843263061411549, "grad_norm": 0.11842292547225952, "learning_rate": 7.865918803418804e-06, "loss": 0.0215, "step": 114080 }, { "epoch": 0.8433369799828508, "grad_norm": 0.06746391952037811, "learning_rate": 7.862209164292498e-06, "loss": 0.0168, "step": 114090 }, { "epoch": 0.8434108985541527, "grad_norm": 0.0839524045586586, "learning_rate": 7.858499525166192e-06, "loss": 0.0203, "step": 114100 }, { "epoch": 0.8434848171254546, "grad_norm": 0.07548878341913223, "learning_rate": 7.854789886039886e-06, "loss": 0.0172, "step": 114110 }, { "epoch": 0.8435587356967564, "grad_norm": 0.07536860555410385, "learning_rate": 7.85108024691358e-06, "loss": 0.0155, "step": 114120 }, { "epoch": 0.8436326542680583, "grad_norm": 0.06918346881866455, "learning_rate": 7.847370607787275e-06, "loss": 0.0182, "step": 114130 }, { "epoch": 0.8437065728393601, "grad_norm": 0.06039072573184967, "learning_rate": 7.84366096866097e-06, "loss": 0.0162, "step": 114140 }, { "epoch": 0.843780491410662, "grad_norm": 0.06949473172426224, "learning_rate": 7.839951329534664e-06, "loss": 0.0159, "step": 114150 }, { "epoch": 0.8438544099819638, "grad_norm": 0.07249496132135391, "learning_rate": 7.836241690408358e-06, "loss": 0.0187, "step": 114160 }, { "epoch": 0.8439283285532657, "grad_norm": 0.05909942835569382, "learning_rate": 7.832532051282052e-06, "loss": 0.0156, "step": 114170 }, { "epoch": 0.8440022471245676, "grad_norm": 0.08391299843788147, "learning_rate": 7.828822412155745e-06, "loss": 0.0171, "step": 114180 }, { "epoch": 0.8440761656958694, "grad_norm": 0.07751631736755371, "learning_rate": 7.82511277302944e-06, "loss": 0.016, "step": 114190 }, { "epoch": 0.8441500842671713, "grad_norm": 0.07122786343097687, "learning_rate": 7.821403133903133e-06, "loss": 0.016, "step": 114200 }, { "epoch": 0.8442240028384731, "grad_norm": 0.07838122546672821, "learning_rate": 7.81769349477683e-06, "loss": 0.0172, "step": 114210 }, { "epoch": 0.844297921409775, "grad_norm": 0.06461747735738754, "learning_rate": 7.813983855650522e-06, "loss": 0.0169, "step": 114220 }, { "epoch": 0.8443718399810769, "grad_norm": 0.0487394705414772, "learning_rate": 7.810274216524218e-06, "loss": 0.0173, "step": 114230 }, { "epoch": 0.8444457585523787, "grad_norm": 0.07009081542491913, "learning_rate": 7.80656457739791e-06, "loss": 0.0168, "step": 114240 }, { "epoch": 0.8445196771236806, "grad_norm": 0.06905744224786758, "learning_rate": 7.802854938271606e-06, "loss": 0.016, "step": 114250 }, { "epoch": 0.8445935956949824, "grad_norm": 0.09879184514284134, "learning_rate": 7.799145299145299e-06, "loss": 0.0168, "step": 114260 }, { "epoch": 0.8446675142662843, "grad_norm": 0.0833684578537941, "learning_rate": 7.795435660018993e-06, "loss": 0.0182, "step": 114270 }, { "epoch": 0.8447414328375861, "grad_norm": 0.16463328897953033, "learning_rate": 7.791726020892687e-06, "loss": 0.0156, "step": 114280 }, { "epoch": 0.844815351408888, "grad_norm": 0.07162051647901535, "learning_rate": 7.788016381766382e-06, "loss": 0.0172, "step": 114290 }, { "epoch": 0.8448892699801899, "grad_norm": 0.06895631551742554, "learning_rate": 7.784306742640076e-06, "loss": 0.0169, "step": 114300 }, { "epoch": 0.8449631885514917, "grad_norm": 0.0861455574631691, "learning_rate": 7.78059710351377e-06, "loss": 0.0155, "step": 114310 }, { "epoch": 0.8450371071227936, "grad_norm": 0.06159092113375664, "learning_rate": 7.776887464387465e-06, "loss": 0.0143, "step": 114320 }, { "epoch": 0.8451110256940954, "grad_norm": 0.06688378751277924, "learning_rate": 7.773177825261159e-06, "loss": 0.0159, "step": 114330 }, { "epoch": 0.8451849442653973, "grad_norm": 0.05591675639152527, "learning_rate": 7.769468186134853e-06, "loss": 0.0153, "step": 114340 }, { "epoch": 0.845258862836699, "grad_norm": 0.07992582023143768, "learning_rate": 7.765758547008547e-06, "loss": 0.0149, "step": 114350 }, { "epoch": 0.8453327814080009, "grad_norm": 0.07913650572299957, "learning_rate": 7.762048907882242e-06, "loss": 0.0152, "step": 114360 }, { "epoch": 0.8454066999793028, "grad_norm": 0.09735318273305893, "learning_rate": 7.758339268755936e-06, "loss": 0.0202, "step": 114370 }, { "epoch": 0.8454806185506046, "grad_norm": 0.10346204042434692, "learning_rate": 7.75462962962963e-06, "loss": 0.018, "step": 114380 }, { "epoch": 0.8455545371219065, "grad_norm": 0.06874750554561615, "learning_rate": 7.750919990503324e-06, "loss": 0.0178, "step": 114390 }, { "epoch": 0.8456284556932083, "grad_norm": 0.06304837018251419, "learning_rate": 7.747210351377019e-06, "loss": 0.0168, "step": 114400 }, { "epoch": 0.8457023742645102, "grad_norm": 0.09782323241233826, "learning_rate": 7.743500712250711e-06, "loss": 0.0171, "step": 114410 }, { "epoch": 0.845776292835812, "grad_norm": 0.07600420713424683, "learning_rate": 7.739791073124407e-06, "loss": 0.0175, "step": 114420 }, { "epoch": 0.8458502114071139, "grad_norm": 0.06809110939502716, "learning_rate": 7.7360814339981e-06, "loss": 0.0185, "step": 114430 }, { "epoch": 0.8459241299784158, "grad_norm": 0.09009858220815659, "learning_rate": 7.732371794871796e-06, "loss": 0.0132, "step": 114440 }, { "epoch": 0.8459980485497176, "grad_norm": 0.0723966658115387, "learning_rate": 7.728662155745488e-06, "loss": 0.0167, "step": 114450 }, { "epoch": 0.8460719671210195, "grad_norm": 0.08759031444787979, "learning_rate": 7.724952516619184e-06, "loss": 0.0158, "step": 114460 }, { "epoch": 0.8461458856923213, "grad_norm": 0.070323146879673, "learning_rate": 7.721242877492877e-06, "loss": 0.0161, "step": 114470 }, { "epoch": 0.8462198042636232, "grad_norm": 0.07351279258728027, "learning_rate": 7.717533238366573e-06, "loss": 0.0162, "step": 114480 }, { "epoch": 0.8462937228349251, "grad_norm": 0.057759840041399, "learning_rate": 7.713823599240267e-06, "loss": 0.0161, "step": 114490 }, { "epoch": 0.8463676414062269, "grad_norm": 0.08374207466840744, "learning_rate": 7.71011396011396e-06, "loss": 0.0179, "step": 114500 }, { "epoch": 0.8464415599775288, "grad_norm": 0.09012964367866516, "learning_rate": 7.706404320987656e-06, "loss": 0.0176, "step": 114510 }, { "epoch": 0.8465154785488306, "grad_norm": 0.06130051985383034, "learning_rate": 7.702694681861348e-06, "loss": 0.0156, "step": 114520 }, { "epoch": 0.8465893971201325, "grad_norm": 0.07411500811576843, "learning_rate": 7.698985042735044e-06, "loss": 0.0158, "step": 114530 }, { "epoch": 0.8466633156914343, "grad_norm": 0.06992583721876144, "learning_rate": 7.695275403608737e-06, "loss": 0.0164, "step": 114540 }, { "epoch": 0.8467372342627362, "grad_norm": 0.07129481434822083, "learning_rate": 7.691565764482433e-06, "loss": 0.0175, "step": 114550 }, { "epoch": 0.8468111528340381, "grad_norm": 0.08502853661775589, "learning_rate": 7.687856125356125e-06, "loss": 0.0144, "step": 114560 }, { "epoch": 0.8468850714053399, "grad_norm": 0.09787731617689133, "learning_rate": 7.68414648622982e-06, "loss": 0.0183, "step": 114570 }, { "epoch": 0.8469589899766418, "grad_norm": 0.06659726053476334, "learning_rate": 7.680436847103514e-06, "loss": 0.0159, "step": 114580 }, { "epoch": 0.8470329085479436, "grad_norm": 0.11812327802181244, "learning_rate": 7.676727207977208e-06, "loss": 0.0174, "step": 114590 }, { "epoch": 0.8471068271192455, "grad_norm": 0.08446000516414642, "learning_rate": 7.673017568850902e-06, "loss": 0.0155, "step": 114600 }, { "epoch": 0.8471807456905472, "grad_norm": 0.08022765070199966, "learning_rate": 7.669307929724597e-06, "loss": 0.017, "step": 114610 }, { "epoch": 0.8472546642618491, "grad_norm": 0.06662660837173462, "learning_rate": 7.665598290598291e-06, "loss": 0.0166, "step": 114620 }, { "epoch": 0.847328582833151, "grad_norm": 0.09599727392196655, "learning_rate": 7.661888651471985e-06, "loss": 0.0174, "step": 114630 }, { "epoch": 0.8474025014044528, "grad_norm": 0.059651460498571396, "learning_rate": 7.658179012345678e-06, "loss": 0.0197, "step": 114640 }, { "epoch": 0.8474764199757547, "grad_norm": 0.08370509743690491, "learning_rate": 7.654469373219374e-06, "loss": 0.0161, "step": 114650 }, { "epoch": 0.8475503385470565, "grad_norm": 0.09393087029457092, "learning_rate": 7.650759734093068e-06, "loss": 0.019, "step": 114660 }, { "epoch": 0.8476242571183584, "grad_norm": 0.09295108169317245, "learning_rate": 7.647050094966762e-06, "loss": 0.0168, "step": 114670 }, { "epoch": 0.8476981756896602, "grad_norm": 0.06544492393732071, "learning_rate": 7.643340455840457e-06, "loss": 0.0174, "step": 114680 }, { "epoch": 0.8477720942609621, "grad_norm": 0.06274139136075974, "learning_rate": 7.639630816714151e-06, "loss": 0.0188, "step": 114690 }, { "epoch": 0.847846012832264, "grad_norm": 0.06864412128925323, "learning_rate": 7.635921177587845e-06, "loss": 0.0177, "step": 114700 }, { "epoch": 0.8479199314035658, "grad_norm": 0.06328914314508438, "learning_rate": 7.63221153846154e-06, "loss": 0.0168, "step": 114710 }, { "epoch": 0.8479938499748677, "grad_norm": 0.07179394364356995, "learning_rate": 7.628501899335233e-06, "loss": 0.0171, "step": 114720 }, { "epoch": 0.8480677685461695, "grad_norm": 0.07428892701864243, "learning_rate": 7.624792260208926e-06, "loss": 0.0162, "step": 114730 }, { "epoch": 0.8481416871174714, "grad_norm": 0.08414027839899063, "learning_rate": 7.6210826210826214e-06, "loss": 0.017, "step": 114740 }, { "epoch": 0.8482156056887733, "grad_norm": 0.06821225583553314, "learning_rate": 7.617372981956315e-06, "loss": 0.018, "step": 114750 }, { "epoch": 0.8482895242600751, "grad_norm": 0.08133241534233093, "learning_rate": 7.61366334283001e-06, "loss": 0.0171, "step": 114760 }, { "epoch": 0.848363442831377, "grad_norm": 0.075186587870121, "learning_rate": 7.609953703703704e-06, "loss": 0.017, "step": 114770 }, { "epoch": 0.8484373614026788, "grad_norm": 0.10400836914777756, "learning_rate": 7.6062440645773985e-06, "loss": 0.017, "step": 114780 }, { "epoch": 0.8485112799739807, "grad_norm": 0.07053770869970322, "learning_rate": 7.602534425451093e-06, "loss": 0.0165, "step": 114790 }, { "epoch": 0.8485851985452825, "grad_norm": 0.08867017924785614, "learning_rate": 7.598824786324786e-06, "loss": 0.0179, "step": 114800 }, { "epoch": 0.8486591171165844, "grad_norm": 0.07490748167037964, "learning_rate": 7.595115147198481e-06, "loss": 0.0187, "step": 114810 }, { "epoch": 0.8487330356878863, "grad_norm": 0.06685304641723633, "learning_rate": 7.591405508072175e-06, "loss": 0.0169, "step": 114820 }, { "epoch": 0.8488069542591881, "grad_norm": 0.07209019362926483, "learning_rate": 7.58769586894587e-06, "loss": 0.0165, "step": 114830 }, { "epoch": 0.84888087283049, "grad_norm": 0.057875972241163254, "learning_rate": 7.583986229819563e-06, "loss": 0.0168, "step": 114840 }, { "epoch": 0.8489547914017918, "grad_norm": 0.06630287319421768, "learning_rate": 7.5802765906932584e-06, "loss": 0.0159, "step": 114850 }, { "epoch": 0.8490287099730937, "grad_norm": 0.06411104649305344, "learning_rate": 7.576566951566952e-06, "loss": 0.0159, "step": 114860 }, { "epoch": 0.8491026285443954, "grad_norm": 0.07323815673589706, "learning_rate": 7.572857312440645e-06, "loss": 0.0169, "step": 114870 }, { "epoch": 0.8491765471156973, "grad_norm": 0.11291278153657913, "learning_rate": 7.56914767331434e-06, "loss": 0.0185, "step": 114880 }, { "epoch": 0.8492504656869992, "grad_norm": 0.1187766045331955, "learning_rate": 7.565438034188034e-06, "loss": 0.0203, "step": 114890 }, { "epoch": 0.849324384258301, "grad_norm": 0.0863991230726242, "learning_rate": 7.561728395061729e-06, "loss": 0.0174, "step": 114900 }, { "epoch": 0.8493983028296029, "grad_norm": 0.10659705102443695, "learning_rate": 7.558018755935422e-06, "loss": 0.0158, "step": 114910 }, { "epoch": 0.8494722214009047, "grad_norm": 0.10216405987739563, "learning_rate": 7.5543091168091175e-06, "loss": 0.0176, "step": 114920 }, { "epoch": 0.8495461399722066, "grad_norm": 0.10233411192893982, "learning_rate": 7.550599477682811e-06, "loss": 0.0183, "step": 114930 }, { "epoch": 0.8496200585435084, "grad_norm": 0.07432316243648529, "learning_rate": 7.546889838556506e-06, "loss": 0.0173, "step": 114940 }, { "epoch": 0.8496939771148103, "grad_norm": 0.06906505674123764, "learning_rate": 7.5431801994301995e-06, "loss": 0.0168, "step": 114950 }, { "epoch": 0.8497678956861122, "grad_norm": 0.060751430690288544, "learning_rate": 7.539470560303894e-06, "loss": 0.0158, "step": 114960 }, { "epoch": 0.849841814257414, "grad_norm": 0.08791016787290573, "learning_rate": 7.535760921177588e-06, "loss": 0.0168, "step": 114970 }, { "epoch": 0.8499157328287159, "grad_norm": 0.08157749474048615, "learning_rate": 7.532051282051282e-06, "loss": 0.0186, "step": 114980 }, { "epoch": 0.8499896514000177, "grad_norm": 0.07384663820266724, "learning_rate": 7.528341642924977e-06, "loss": 0.018, "step": 114990 }, { "epoch": 0.8500635699713196, "grad_norm": 0.05789195001125336, "learning_rate": 7.524632003798671e-06, "loss": 0.0173, "step": 115000 }, { "epoch": 0.8501374885426215, "grad_norm": 0.07224296778440475, "learning_rate": 7.520922364672366e-06, "loss": 0.0174, "step": 115010 }, { "epoch": 0.8502114071139233, "grad_norm": 0.0817876011133194, "learning_rate": 7.517212725546059e-06, "loss": 0.0203, "step": 115020 }, { "epoch": 0.8502853256852252, "grad_norm": 0.10659945011138916, "learning_rate": 7.513503086419753e-06, "loss": 0.0158, "step": 115030 }, { "epoch": 0.850359244256527, "grad_norm": 0.06013638898730278, "learning_rate": 7.509793447293448e-06, "loss": 0.0139, "step": 115040 }, { "epoch": 0.8504331628278289, "grad_norm": 0.12041322141885757, "learning_rate": 7.506083808167141e-06, "loss": 0.0154, "step": 115050 }, { "epoch": 0.8505070813991307, "grad_norm": 0.08451178669929504, "learning_rate": 7.5023741690408365e-06, "loss": 0.0181, "step": 115060 }, { "epoch": 0.8505809999704326, "grad_norm": 0.05861854925751686, "learning_rate": 7.49866452991453e-06, "loss": 0.0162, "step": 115070 }, { "epoch": 0.8506549185417345, "grad_norm": 0.05970502272248268, "learning_rate": 7.494954890788225e-06, "loss": 0.0173, "step": 115080 }, { "epoch": 0.8507288371130363, "grad_norm": 0.06429281830787659, "learning_rate": 7.4912452516619184e-06, "loss": 0.019, "step": 115090 }, { "epoch": 0.8508027556843382, "grad_norm": 0.05995741859078407, "learning_rate": 7.4875356125356136e-06, "loss": 0.0196, "step": 115100 }, { "epoch": 0.85087667425564, "grad_norm": 0.13881725072860718, "learning_rate": 7.483825973409307e-06, "loss": 0.0185, "step": 115110 }, { "epoch": 0.8509505928269419, "grad_norm": 0.08064279705286026, "learning_rate": 7.480116334283e-06, "loss": 0.0167, "step": 115120 }, { "epoch": 0.8510245113982436, "grad_norm": 0.10625021904706955, "learning_rate": 7.4764066951566955e-06, "loss": 0.0165, "step": 115130 }, { "epoch": 0.8510984299695455, "grad_norm": 0.09444019198417664, "learning_rate": 7.47269705603039e-06, "loss": 0.0186, "step": 115140 }, { "epoch": 0.8511723485408474, "grad_norm": 0.07402213662862778, "learning_rate": 7.468987416904084e-06, "loss": 0.0172, "step": 115150 }, { "epoch": 0.8512462671121492, "grad_norm": 0.07309866696596146, "learning_rate": 7.465277777777778e-06, "loss": 0.0181, "step": 115160 }, { "epoch": 0.8513201856834511, "grad_norm": 0.08333403617143631, "learning_rate": 7.461568138651473e-06, "loss": 0.017, "step": 115170 }, { "epoch": 0.8513941042547529, "grad_norm": 0.06300780922174454, "learning_rate": 7.457858499525167e-06, "loss": 0.0162, "step": 115180 }, { "epoch": 0.8514680228260548, "grad_norm": 0.06013835594058037, "learning_rate": 7.45414886039886e-06, "loss": 0.0173, "step": 115190 }, { "epoch": 0.8515419413973566, "grad_norm": 0.06843134760856628, "learning_rate": 7.4504392212725554e-06, "loss": 0.0154, "step": 115200 }, { "epoch": 0.8516158599686585, "grad_norm": 0.0664408728480339, "learning_rate": 7.446729582146249e-06, "loss": 0.0177, "step": 115210 }, { "epoch": 0.8516897785399604, "grad_norm": 0.08137643337249756, "learning_rate": 7.443019943019944e-06, "loss": 0.015, "step": 115220 }, { "epoch": 0.8517636971112622, "grad_norm": 0.07489970326423645, "learning_rate": 7.439310303893637e-06, "loss": 0.0156, "step": 115230 }, { "epoch": 0.8518376156825641, "grad_norm": 0.08225582540035248, "learning_rate": 7.4356006647673325e-06, "loss": 0.0179, "step": 115240 }, { "epoch": 0.8519115342538659, "grad_norm": 0.09738589823246002, "learning_rate": 7.431891025641026e-06, "loss": 0.0166, "step": 115250 }, { "epoch": 0.8519854528251678, "grad_norm": 0.11694802343845367, "learning_rate": 7.428181386514719e-06, "loss": 0.0179, "step": 115260 }, { "epoch": 0.8520593713964697, "grad_norm": 0.08086628466844559, "learning_rate": 7.4244717473884145e-06, "loss": 0.0187, "step": 115270 }, { "epoch": 0.8521332899677715, "grad_norm": 0.08938246220350266, "learning_rate": 7.420762108262108e-06, "loss": 0.0148, "step": 115280 }, { "epoch": 0.8522072085390734, "grad_norm": 0.06674882769584656, "learning_rate": 7.417052469135803e-06, "loss": 0.0145, "step": 115290 }, { "epoch": 0.8522811271103752, "grad_norm": 0.08584784716367722, "learning_rate": 7.4133428300094965e-06, "loss": 0.016, "step": 115300 }, { "epoch": 0.8523550456816771, "grad_norm": 0.09939748048782349, "learning_rate": 7.409633190883192e-06, "loss": 0.0155, "step": 115310 }, { "epoch": 0.8524289642529789, "grad_norm": 0.0794292464852333, "learning_rate": 7.405923551756885e-06, "loss": 0.0172, "step": 115320 }, { "epoch": 0.8525028828242808, "grad_norm": 0.08377964794635773, "learning_rate": 7.40221391263058e-06, "loss": 0.0163, "step": 115330 }, { "epoch": 0.8525768013955827, "grad_norm": 0.06709586083889008, "learning_rate": 7.3985042735042736e-06, "loss": 0.0192, "step": 115340 }, { "epoch": 0.8526507199668845, "grad_norm": 0.05004974827170372, "learning_rate": 7.394794634377968e-06, "loss": 0.0178, "step": 115350 }, { "epoch": 0.8527246385381864, "grad_norm": 0.0567706897854805, "learning_rate": 7.391084995251663e-06, "loss": 0.0136, "step": 115360 }, { "epoch": 0.8527985571094882, "grad_norm": 0.07427140325307846, "learning_rate": 7.387375356125356e-06, "loss": 0.0169, "step": 115370 }, { "epoch": 0.85287247568079, "grad_norm": 0.08540726453065872, "learning_rate": 7.3836657169990515e-06, "loss": 0.0175, "step": 115380 }, { "epoch": 0.8529463942520918, "grad_norm": 0.07391873747110367, "learning_rate": 7.379956077872745e-06, "loss": 0.0176, "step": 115390 }, { "epoch": 0.8530203128233937, "grad_norm": 0.09952632337808609, "learning_rate": 7.37624643874644e-06, "loss": 0.0207, "step": 115400 }, { "epoch": 0.8530942313946956, "grad_norm": 0.08218296617269516, "learning_rate": 7.3725367996201335e-06, "loss": 0.016, "step": 115410 }, { "epoch": 0.8531681499659974, "grad_norm": 0.07376095652580261, "learning_rate": 7.368827160493827e-06, "loss": 0.017, "step": 115420 }, { "epoch": 0.8532420685372993, "grad_norm": 0.07043536007404327, "learning_rate": 7.365117521367522e-06, "loss": 0.0175, "step": 115430 }, { "epoch": 0.8533159871086011, "grad_norm": 0.1102387085556984, "learning_rate": 7.3614078822412154e-06, "loss": 0.0178, "step": 115440 }, { "epoch": 0.853389905679903, "grad_norm": 0.06748242676258087, "learning_rate": 7.3576982431149106e-06, "loss": 0.0177, "step": 115450 }, { "epoch": 0.8534638242512049, "grad_norm": 0.11292647570371628, "learning_rate": 7.353988603988604e-06, "loss": 0.0196, "step": 115460 }, { "epoch": 0.8535377428225067, "grad_norm": 0.05069030448794365, "learning_rate": 7.350278964862299e-06, "loss": 0.0145, "step": 115470 }, { "epoch": 0.8536116613938086, "grad_norm": 0.09397978335618973, "learning_rate": 7.3465693257359925e-06, "loss": 0.0163, "step": 115480 }, { "epoch": 0.8536855799651104, "grad_norm": 0.07535222917795181, "learning_rate": 7.342859686609686e-06, "loss": 0.0156, "step": 115490 }, { "epoch": 0.8537594985364123, "grad_norm": 0.0793633908033371, "learning_rate": 7.339150047483381e-06, "loss": 0.0155, "step": 115500 }, { "epoch": 0.8538334171077141, "grad_norm": 0.06356097757816315, "learning_rate": 7.3354404083570745e-06, "loss": 0.0158, "step": 115510 }, { "epoch": 0.853907335679016, "grad_norm": 0.07921180874109268, "learning_rate": 7.33173076923077e-06, "loss": 0.0171, "step": 115520 }, { "epoch": 0.8539812542503179, "grad_norm": 0.08289998024702072, "learning_rate": 7.328021130104464e-06, "loss": 0.0175, "step": 115530 }, { "epoch": 0.8540551728216197, "grad_norm": 0.09399142861366272, "learning_rate": 7.324311490978158e-06, "loss": 0.0181, "step": 115540 }, { "epoch": 0.8541290913929216, "grad_norm": 0.10622493177652359, "learning_rate": 7.3206018518518524e-06, "loss": 0.0162, "step": 115550 }, { "epoch": 0.8542030099642234, "grad_norm": 0.09257388859987259, "learning_rate": 7.316892212725547e-06, "loss": 0.016, "step": 115560 }, { "epoch": 0.8542769285355253, "grad_norm": 0.06779754906892776, "learning_rate": 7.313182573599241e-06, "loss": 0.0167, "step": 115570 }, { "epoch": 0.8543508471068271, "grad_norm": 0.050333622843027115, "learning_rate": 7.309472934472934e-06, "loss": 0.0159, "step": 115580 }, { "epoch": 0.854424765678129, "grad_norm": 0.0706227645277977, "learning_rate": 7.3057632953466295e-06, "loss": 0.0177, "step": 115590 }, { "epoch": 0.8544986842494309, "grad_norm": 0.06861665844917297, "learning_rate": 7.302053656220323e-06, "loss": 0.0167, "step": 115600 }, { "epoch": 0.8545726028207327, "grad_norm": 0.08953040838241577, "learning_rate": 7.298344017094018e-06, "loss": 0.0152, "step": 115610 }, { "epoch": 0.8546465213920346, "grad_norm": 0.08950774371623993, "learning_rate": 7.2946343779677115e-06, "loss": 0.0195, "step": 115620 }, { "epoch": 0.8547204399633364, "grad_norm": 0.08176089078187943, "learning_rate": 7.290924738841407e-06, "loss": 0.0184, "step": 115630 }, { "epoch": 0.8547943585346383, "grad_norm": 0.07711353898048401, "learning_rate": 7.2872150997151e-06, "loss": 0.0178, "step": 115640 }, { "epoch": 0.85486827710594, "grad_norm": 0.0611676424741745, "learning_rate": 7.2835054605887935e-06, "loss": 0.0152, "step": 115650 }, { "epoch": 0.854942195677242, "grad_norm": 0.06824205815792084, "learning_rate": 7.279795821462489e-06, "loss": 0.015, "step": 115660 }, { "epoch": 0.8550161142485438, "grad_norm": 0.10189816355705261, "learning_rate": 7.276086182336182e-06, "loss": 0.0187, "step": 115670 }, { "epoch": 0.8550900328198456, "grad_norm": 0.05837101861834526, "learning_rate": 7.272376543209877e-06, "loss": 0.0198, "step": 115680 }, { "epoch": 0.8551639513911475, "grad_norm": 0.060416217893362045, "learning_rate": 7.2686669040835706e-06, "loss": 0.0182, "step": 115690 }, { "epoch": 0.8552378699624493, "grad_norm": 0.081768698990345, "learning_rate": 7.264957264957266e-06, "loss": 0.0167, "step": 115700 }, { "epoch": 0.8553117885337512, "grad_norm": 0.07554604113101959, "learning_rate": 7.261247625830959e-06, "loss": 0.0173, "step": 115710 }, { "epoch": 0.8553857071050531, "grad_norm": 0.0834031030535698, "learning_rate": 7.257537986704653e-06, "loss": 0.0173, "step": 115720 }, { "epoch": 0.8554596256763549, "grad_norm": 0.08713842928409576, "learning_rate": 7.253828347578348e-06, "loss": 0.0173, "step": 115730 }, { "epoch": 0.8555335442476568, "grad_norm": 0.06996472924947739, "learning_rate": 7.250118708452042e-06, "loss": 0.015, "step": 115740 }, { "epoch": 0.8556074628189586, "grad_norm": 0.08058460801839828, "learning_rate": 7.246409069325737e-06, "loss": 0.0171, "step": 115750 }, { "epoch": 0.8556813813902605, "grad_norm": 0.12176497280597687, "learning_rate": 7.2426994301994305e-06, "loss": 0.0183, "step": 115760 }, { "epoch": 0.8557552999615623, "grad_norm": 0.07371469587087631, "learning_rate": 7.238989791073126e-06, "loss": 0.0178, "step": 115770 }, { "epoch": 0.8558292185328642, "grad_norm": 0.09114787727594376, "learning_rate": 7.235280151946819e-06, "loss": 0.0203, "step": 115780 }, { "epoch": 0.8559031371041661, "grad_norm": 0.07811260223388672, "learning_rate": 7.231570512820514e-06, "loss": 0.0154, "step": 115790 }, { "epoch": 0.8559770556754679, "grad_norm": 0.07033334672451019, "learning_rate": 7.227860873694208e-06, "loss": 0.0187, "step": 115800 }, { "epoch": 0.8560509742467698, "grad_norm": 0.08371198922395706, "learning_rate": 7.224151234567901e-06, "loss": 0.0185, "step": 115810 }, { "epoch": 0.8561248928180716, "grad_norm": 0.07226774841547012, "learning_rate": 7.220441595441596e-06, "loss": 0.016, "step": 115820 }, { "epoch": 0.8561988113893735, "grad_norm": 0.05450865626335144, "learning_rate": 7.2167319563152896e-06, "loss": 0.0179, "step": 115830 }, { "epoch": 0.8562727299606753, "grad_norm": 0.08719339966773987, "learning_rate": 7.213022317188985e-06, "loss": 0.0187, "step": 115840 }, { "epoch": 0.8563466485319772, "grad_norm": 0.08201997727155685, "learning_rate": 7.209312678062678e-06, "loss": 0.0155, "step": 115850 }, { "epoch": 0.8564205671032791, "grad_norm": 0.07213526964187622, "learning_rate": 7.205603038936373e-06, "loss": 0.018, "step": 115860 }, { "epoch": 0.8564944856745809, "grad_norm": 0.08951819688081741, "learning_rate": 7.201893399810067e-06, "loss": 0.0182, "step": 115870 }, { "epoch": 0.8565684042458828, "grad_norm": 0.10166043043136597, "learning_rate": 7.19818376068376e-06, "loss": 0.0182, "step": 115880 }, { "epoch": 0.8566423228171846, "grad_norm": 0.07484026253223419, "learning_rate": 7.194474121557455e-06, "loss": 0.0178, "step": 115890 }, { "epoch": 0.8567162413884865, "grad_norm": 0.07420425862073898, "learning_rate": 7.190764482431149e-06, "loss": 0.0173, "step": 115900 }, { "epoch": 0.8567901599597882, "grad_norm": 0.06019234657287598, "learning_rate": 7.187054843304844e-06, "loss": 0.0181, "step": 115910 }, { "epoch": 0.8568640785310901, "grad_norm": 0.08211581408977509, "learning_rate": 7.183345204178538e-06, "loss": 0.0175, "step": 115920 }, { "epoch": 0.856937997102392, "grad_norm": 0.08052533864974976, "learning_rate": 7.179635565052232e-06, "loss": 0.0164, "step": 115930 }, { "epoch": 0.8570119156736938, "grad_norm": 0.07633931189775467, "learning_rate": 7.1759259259259266e-06, "loss": 0.0166, "step": 115940 }, { "epoch": 0.8570858342449957, "grad_norm": 0.06322506070137024, "learning_rate": 7.17221628679962e-06, "loss": 0.0168, "step": 115950 }, { "epoch": 0.8571597528162975, "grad_norm": 0.11195466667413712, "learning_rate": 7.168506647673315e-06, "loss": 0.0174, "step": 115960 }, { "epoch": 0.8572336713875994, "grad_norm": 0.07432009279727936, "learning_rate": 7.1647970085470085e-06, "loss": 0.0178, "step": 115970 }, { "epoch": 0.8573075899589013, "grad_norm": 0.06407511979341507, "learning_rate": 7.161087369420704e-06, "loss": 0.0159, "step": 115980 }, { "epoch": 0.8573815085302031, "grad_norm": 0.07257718592882156, "learning_rate": 7.157377730294397e-06, "loss": 0.017, "step": 115990 }, { "epoch": 0.857455427101505, "grad_norm": 0.06976402550935745, "learning_rate": 7.153668091168092e-06, "loss": 0.0153, "step": 116000 }, { "epoch": 0.8575293456728068, "grad_norm": 0.06721027195453644, "learning_rate": 7.149958452041786e-06, "loss": 0.0167, "step": 116010 }, { "epoch": 0.8576032642441087, "grad_norm": 0.16653594374656677, "learning_rate": 7.146248812915481e-06, "loss": 0.0178, "step": 116020 }, { "epoch": 0.8576771828154105, "grad_norm": 0.08466430008411407, "learning_rate": 7.142539173789174e-06, "loss": 0.0177, "step": 116030 }, { "epoch": 0.8577511013867124, "grad_norm": 0.06317166984081268, "learning_rate": 7.138829534662868e-06, "loss": 0.0184, "step": 116040 }, { "epoch": 0.8578250199580143, "grad_norm": 0.06002514809370041, "learning_rate": 7.135119895536563e-06, "loss": 0.0171, "step": 116050 }, { "epoch": 0.8578989385293161, "grad_norm": 0.07541421055793762, "learning_rate": 7.131410256410256e-06, "loss": 0.0194, "step": 116060 }, { "epoch": 0.857972857100618, "grad_norm": 0.07419611513614655, "learning_rate": 7.127700617283951e-06, "loss": 0.0154, "step": 116070 }, { "epoch": 0.8580467756719198, "grad_norm": 0.07317450642585754, "learning_rate": 7.123990978157645e-06, "loss": 0.017, "step": 116080 }, { "epoch": 0.8581206942432217, "grad_norm": 0.08183526247739792, "learning_rate": 7.12028133903134e-06, "loss": 0.0194, "step": 116090 }, { "epoch": 0.8581946128145235, "grad_norm": 0.08646831661462784, "learning_rate": 7.116571699905033e-06, "loss": 0.0164, "step": 116100 }, { "epoch": 0.8582685313858254, "grad_norm": 0.11067859828472137, "learning_rate": 7.1128620607787275e-06, "loss": 0.0196, "step": 116110 }, { "epoch": 0.8583424499571273, "grad_norm": 0.0796673446893692, "learning_rate": 7.109152421652422e-06, "loss": 0.0156, "step": 116120 }, { "epoch": 0.8584163685284291, "grad_norm": 0.08841119706630707, "learning_rate": 7.105442782526116e-06, "loss": 0.0187, "step": 116130 }, { "epoch": 0.858490287099731, "grad_norm": 0.08489008247852325, "learning_rate": 7.101733143399811e-06, "loss": 0.0174, "step": 116140 }, { "epoch": 0.8585642056710328, "grad_norm": 0.06625782698392868, "learning_rate": 7.098023504273505e-06, "loss": 0.0175, "step": 116150 }, { "epoch": 0.8586381242423347, "grad_norm": 0.06949188560247421, "learning_rate": 7.0943138651472e-06, "loss": 0.0174, "step": 116160 }, { "epoch": 0.8587120428136364, "grad_norm": 0.07019592821598053, "learning_rate": 7.090604226020893e-06, "loss": 0.0165, "step": 116170 }, { "epoch": 0.8587859613849383, "grad_norm": 0.06393100321292877, "learning_rate": 7.0868945868945866e-06, "loss": 0.0165, "step": 116180 }, { "epoch": 0.8588598799562402, "grad_norm": 0.07292962074279785, "learning_rate": 7.083184947768282e-06, "loss": 0.0176, "step": 116190 }, { "epoch": 0.858933798527542, "grad_norm": 0.0762207955121994, "learning_rate": 7.079475308641975e-06, "loss": 0.0159, "step": 116200 }, { "epoch": 0.8590077170988439, "grad_norm": 0.0796966627240181, "learning_rate": 7.07576566951567e-06, "loss": 0.0163, "step": 116210 }, { "epoch": 0.8590816356701457, "grad_norm": 0.0938892513513565, "learning_rate": 7.072056030389364e-06, "loss": 0.017, "step": 116220 }, { "epoch": 0.8591555542414476, "grad_norm": 0.0776246041059494, "learning_rate": 7.068346391263059e-06, "loss": 0.0162, "step": 116230 }, { "epoch": 0.8592294728127495, "grad_norm": 0.05977245792746544, "learning_rate": 7.064636752136752e-06, "loss": 0.0171, "step": 116240 }, { "epoch": 0.8593033913840513, "grad_norm": 0.080894835293293, "learning_rate": 7.060927113010447e-06, "loss": 0.0175, "step": 116250 }, { "epoch": 0.8593773099553532, "grad_norm": 0.0629294291138649, "learning_rate": 7.057217473884141e-06, "loss": 0.0187, "step": 116260 }, { "epoch": 0.859451228526655, "grad_norm": 0.07322567701339722, "learning_rate": 7.053507834757834e-06, "loss": 0.0174, "step": 116270 }, { "epoch": 0.8595251470979569, "grad_norm": 0.09472065418958664, "learning_rate": 7.049798195631529e-06, "loss": 0.0202, "step": 116280 }, { "epoch": 0.8595990656692587, "grad_norm": 0.06628761440515518, "learning_rate": 7.0460885565052236e-06, "loss": 0.0172, "step": 116290 }, { "epoch": 0.8596729842405606, "grad_norm": 0.0796431228518486, "learning_rate": 7.042378917378918e-06, "loss": 0.0165, "step": 116300 }, { "epoch": 0.8597469028118625, "grad_norm": 0.10444629937410355, "learning_rate": 7.038669278252612e-06, "loss": 0.0175, "step": 116310 }, { "epoch": 0.8598208213831643, "grad_norm": 0.09770618379116058, "learning_rate": 7.034959639126306e-06, "loss": 0.0184, "step": 116320 }, { "epoch": 0.8598947399544662, "grad_norm": 0.072440005838871, "learning_rate": 7.031250000000001e-06, "loss": 0.0155, "step": 116330 }, { "epoch": 0.859968658525768, "grad_norm": 0.1179017499089241, "learning_rate": 7.027540360873694e-06, "loss": 0.0174, "step": 116340 }, { "epoch": 0.8600425770970699, "grad_norm": 0.0868251696228981, "learning_rate": 7.023830721747389e-06, "loss": 0.0182, "step": 116350 }, { "epoch": 0.8601164956683717, "grad_norm": 0.09221919625997543, "learning_rate": 7.020121082621083e-06, "loss": 0.017, "step": 116360 }, { "epoch": 0.8601904142396736, "grad_norm": 0.06533008813858032, "learning_rate": 7.016411443494778e-06, "loss": 0.0162, "step": 116370 }, { "epoch": 0.8602643328109755, "grad_norm": 0.0642094761133194, "learning_rate": 7.012701804368471e-06, "loss": 0.0169, "step": 116380 }, { "epoch": 0.8603382513822773, "grad_norm": 0.0782715305685997, "learning_rate": 7.008992165242166e-06, "loss": 0.0163, "step": 116390 }, { "epoch": 0.8604121699535792, "grad_norm": 0.07866599410772324, "learning_rate": 7.00528252611586e-06, "loss": 0.0164, "step": 116400 }, { "epoch": 0.860486088524881, "grad_norm": 0.057049378752708435, "learning_rate": 7.001572886989553e-06, "loss": 0.0173, "step": 116410 }, { "epoch": 0.8605600070961829, "grad_norm": 0.07760420441627502, "learning_rate": 6.997863247863248e-06, "loss": 0.0196, "step": 116420 }, { "epoch": 0.8606339256674846, "grad_norm": 0.07585640251636505, "learning_rate": 6.994153608736942e-06, "loss": 0.0148, "step": 116430 }, { "epoch": 0.8607078442387865, "grad_norm": 0.0873769074678421, "learning_rate": 6.990443969610637e-06, "loss": 0.0181, "step": 116440 }, { "epoch": 0.8607817628100884, "grad_norm": 0.07950405776500702, "learning_rate": 6.98673433048433e-06, "loss": 0.0188, "step": 116450 }, { "epoch": 0.8608556813813902, "grad_norm": 0.054485294967889786, "learning_rate": 6.983024691358025e-06, "loss": 0.0158, "step": 116460 }, { "epoch": 0.8609295999526921, "grad_norm": 0.10393580794334412, "learning_rate": 6.979315052231719e-06, "loss": 0.0188, "step": 116470 }, { "epoch": 0.8610035185239939, "grad_norm": 0.0881158709526062, "learning_rate": 6.975605413105414e-06, "loss": 0.0162, "step": 116480 }, { "epoch": 0.8610774370952958, "grad_norm": 0.07192538678646088, "learning_rate": 6.971895773979107e-06, "loss": 0.0157, "step": 116490 }, { "epoch": 0.8611513556665977, "grad_norm": 0.05255180597305298, "learning_rate": 6.968186134852802e-06, "loss": 0.0192, "step": 116500 }, { "epoch": 0.8612252742378995, "grad_norm": 0.063807412981987, "learning_rate": 6.964476495726497e-06, "loss": 0.0171, "step": 116510 }, { "epoch": 0.8612991928092014, "grad_norm": 0.08546123653650284, "learning_rate": 6.96076685660019e-06, "loss": 0.0169, "step": 116520 }, { "epoch": 0.8613731113805032, "grad_norm": 0.11296427994966507, "learning_rate": 6.957057217473885e-06, "loss": 0.0174, "step": 116530 }, { "epoch": 0.8614470299518051, "grad_norm": 0.07347305119037628, "learning_rate": 6.953347578347579e-06, "loss": 0.0164, "step": 116540 }, { "epoch": 0.8615209485231069, "grad_norm": 0.06195492297410965, "learning_rate": 6.949637939221274e-06, "loss": 0.017, "step": 116550 }, { "epoch": 0.8615948670944088, "grad_norm": 0.05554427579045296, "learning_rate": 6.945928300094967e-06, "loss": 0.0159, "step": 116560 }, { "epoch": 0.8616687856657107, "grad_norm": 0.06394603103399277, "learning_rate": 6.942218660968661e-06, "loss": 0.0151, "step": 116570 }, { "epoch": 0.8617427042370125, "grad_norm": 0.0576290637254715, "learning_rate": 6.938509021842356e-06, "loss": 0.0172, "step": 116580 }, { "epoch": 0.8618166228083144, "grad_norm": 0.09491167217493057, "learning_rate": 6.934799382716049e-06, "loss": 0.0194, "step": 116590 }, { "epoch": 0.8618905413796162, "grad_norm": 0.07047645002603531, "learning_rate": 6.931089743589744e-06, "loss": 0.0155, "step": 116600 }, { "epoch": 0.8619644599509181, "grad_norm": 0.06572113931179047, "learning_rate": 6.927380104463438e-06, "loss": 0.015, "step": 116610 }, { "epoch": 0.8620383785222199, "grad_norm": 0.08306535333395004, "learning_rate": 6.923670465337133e-06, "loss": 0.0155, "step": 116620 }, { "epoch": 0.8621122970935218, "grad_norm": 0.08482124656438828, "learning_rate": 6.919960826210826e-06, "loss": 0.0176, "step": 116630 }, { "epoch": 0.8621862156648237, "grad_norm": 0.08405287563800812, "learning_rate": 6.91625118708452e-06, "loss": 0.0183, "step": 116640 }, { "epoch": 0.8622601342361255, "grad_norm": 0.07583169639110565, "learning_rate": 6.912541547958215e-06, "loss": 0.0161, "step": 116650 }, { "epoch": 0.8623340528074274, "grad_norm": 0.07283152639865875, "learning_rate": 6.908831908831908e-06, "loss": 0.0196, "step": 116660 }, { "epoch": 0.8624079713787292, "grad_norm": 0.13090991973876953, "learning_rate": 6.905122269705603e-06, "loss": 0.0167, "step": 116670 }, { "epoch": 0.862481889950031, "grad_norm": 0.07329442352056503, "learning_rate": 6.901412630579298e-06, "loss": 0.0171, "step": 116680 }, { "epoch": 0.8625558085213328, "grad_norm": 0.06055987626314163, "learning_rate": 6.897702991452992e-06, "loss": 0.018, "step": 116690 }, { "epoch": 0.8626297270926347, "grad_norm": 0.06084635481238365, "learning_rate": 6.893993352326686e-06, "loss": 0.0177, "step": 116700 }, { "epoch": 0.8627036456639366, "grad_norm": 0.09046801179647446, "learning_rate": 6.8902837132003805e-06, "loss": 0.0158, "step": 116710 }, { "epoch": 0.8627775642352384, "grad_norm": 0.08746856451034546, "learning_rate": 6.886574074074075e-06, "loss": 0.0142, "step": 116720 }, { "epoch": 0.8628514828065403, "grad_norm": 0.09596207737922668, "learning_rate": 6.882864434947768e-06, "loss": 0.0169, "step": 116730 }, { "epoch": 0.8629254013778421, "grad_norm": 0.08952523022890091, "learning_rate": 6.879154795821463e-06, "loss": 0.0144, "step": 116740 }, { "epoch": 0.862999319949144, "grad_norm": 0.08711504936218262, "learning_rate": 6.875445156695157e-06, "loss": 0.0157, "step": 116750 }, { "epoch": 0.8630732385204459, "grad_norm": 0.0730694904923439, "learning_rate": 6.871735517568852e-06, "loss": 0.0175, "step": 116760 }, { "epoch": 0.8631471570917477, "grad_norm": 0.0819726511836052, "learning_rate": 6.868025878442545e-06, "loss": 0.0155, "step": 116770 }, { "epoch": 0.8632210756630496, "grad_norm": 0.07040496915578842, "learning_rate": 6.86431623931624e-06, "loss": 0.0182, "step": 116780 }, { "epoch": 0.8632949942343514, "grad_norm": 0.09829843789339066, "learning_rate": 6.860606600189934e-06, "loss": 0.018, "step": 116790 }, { "epoch": 0.8633689128056533, "grad_norm": 0.08462739735841751, "learning_rate": 6.856896961063627e-06, "loss": 0.0166, "step": 116800 }, { "epoch": 0.8634428313769551, "grad_norm": 0.08167954534292221, "learning_rate": 6.853187321937322e-06, "loss": 0.0183, "step": 116810 }, { "epoch": 0.863516749948257, "grad_norm": 0.0685417652130127, "learning_rate": 6.849477682811016e-06, "loss": 0.0152, "step": 116820 }, { "epoch": 0.8635906685195589, "grad_norm": 0.06929564476013184, "learning_rate": 6.845768043684711e-06, "loss": 0.016, "step": 116830 }, { "epoch": 0.8636645870908607, "grad_norm": 0.08290966600179672, "learning_rate": 6.842058404558404e-06, "loss": 0.0182, "step": 116840 }, { "epoch": 0.8637385056621626, "grad_norm": 0.08525334298610687, "learning_rate": 6.8383487654320995e-06, "loss": 0.0168, "step": 116850 }, { "epoch": 0.8638124242334644, "grad_norm": 0.08763402700424194, "learning_rate": 6.834639126305793e-06, "loss": 0.0152, "step": 116860 }, { "epoch": 0.8638863428047663, "grad_norm": 0.08820448815822601, "learning_rate": 6.830929487179487e-06, "loss": 0.0166, "step": 116870 }, { "epoch": 0.8639602613760681, "grad_norm": 0.08678741008043289, "learning_rate": 6.8272198480531814e-06, "loss": 0.0184, "step": 116880 }, { "epoch": 0.86403417994737, "grad_norm": 0.09588257223367691, "learning_rate": 6.823510208926876e-06, "loss": 0.019, "step": 116890 }, { "epoch": 0.8641080985186719, "grad_norm": 0.0747290551662445, "learning_rate": 6.819800569800571e-06, "loss": 0.0174, "step": 116900 }, { "epoch": 0.8641820170899737, "grad_norm": 0.10844788700342178, "learning_rate": 6.816090930674264e-06, "loss": 0.018, "step": 116910 }, { "epoch": 0.8642559356612756, "grad_norm": 0.07840590178966522, "learning_rate": 6.812381291547959e-06, "loss": 0.0166, "step": 116920 }, { "epoch": 0.8643298542325774, "grad_norm": 0.07529750466346741, "learning_rate": 6.808671652421653e-06, "loss": 0.0189, "step": 116930 }, { "epoch": 0.8644037728038793, "grad_norm": 0.08012118935585022, "learning_rate": 6.804962013295348e-06, "loss": 0.0166, "step": 116940 }, { "epoch": 0.864477691375181, "grad_norm": 0.07295463234186172, "learning_rate": 6.801252374169041e-06, "loss": 0.0199, "step": 116950 }, { "epoch": 0.864551609946483, "grad_norm": 0.0702839270234108, "learning_rate": 6.797542735042735e-06, "loss": 0.0155, "step": 116960 }, { "epoch": 0.8646255285177848, "grad_norm": 0.060946814715862274, "learning_rate": 6.79383309591643e-06, "loss": 0.015, "step": 116970 }, { "epoch": 0.8646994470890866, "grad_norm": 0.07480543851852417, "learning_rate": 6.790123456790123e-06, "loss": 0.019, "step": 116980 }, { "epoch": 0.8647733656603885, "grad_norm": 0.07786723971366882, "learning_rate": 6.7864138176638184e-06, "loss": 0.0149, "step": 116990 }, { "epoch": 0.8648472842316903, "grad_norm": 0.08606283366680145, "learning_rate": 6.782704178537512e-06, "loss": 0.0165, "step": 117000 }, { "epoch": 0.8649212028029922, "grad_norm": 0.08000627905130386, "learning_rate": 6.778994539411207e-06, "loss": 0.0152, "step": 117010 }, { "epoch": 0.8649951213742941, "grad_norm": 0.0888182744383812, "learning_rate": 6.7752849002849e-06, "loss": 0.0146, "step": 117020 }, { "epoch": 0.8650690399455959, "grad_norm": 0.06171491742134094, "learning_rate": 6.771575261158594e-06, "loss": 0.016, "step": 117030 }, { "epoch": 0.8651429585168978, "grad_norm": 0.08448406308889389, "learning_rate": 6.767865622032289e-06, "loss": 0.0152, "step": 117040 }, { "epoch": 0.8652168770881996, "grad_norm": 0.07047073543071747, "learning_rate": 6.764155982905982e-06, "loss": 0.0158, "step": 117050 }, { "epoch": 0.8652907956595015, "grad_norm": 0.06946824491024017, "learning_rate": 6.7604463437796775e-06, "loss": 0.0156, "step": 117060 }, { "epoch": 0.8653647142308033, "grad_norm": 0.08859638124704361, "learning_rate": 6.756736704653372e-06, "loss": 0.0166, "step": 117070 }, { "epoch": 0.8654386328021052, "grad_norm": 0.08528061211109161, "learning_rate": 6.753027065527066e-06, "loss": 0.0161, "step": 117080 }, { "epoch": 0.8655125513734071, "grad_norm": 0.08984003961086273, "learning_rate": 6.74931742640076e-06, "loss": 0.0185, "step": 117090 }, { "epoch": 0.8655864699447089, "grad_norm": 0.08175487071275711, "learning_rate": 6.745607787274454e-06, "loss": 0.0164, "step": 117100 }, { "epoch": 0.8656603885160108, "grad_norm": 0.05711497738957405, "learning_rate": 6.741898148148149e-06, "loss": 0.0184, "step": 117110 }, { "epoch": 0.8657343070873126, "grad_norm": 0.081973135471344, "learning_rate": 6.738188509021842e-06, "loss": 0.0209, "step": 117120 }, { "epoch": 0.8658082256586145, "grad_norm": 0.06424666941165924, "learning_rate": 6.734478869895537e-06, "loss": 0.0157, "step": 117130 }, { "epoch": 0.8658821442299163, "grad_norm": 0.08565365523099899, "learning_rate": 6.730769230769231e-06, "loss": 0.0185, "step": 117140 }, { "epoch": 0.8659560628012182, "grad_norm": 0.06290718913078308, "learning_rate": 6.727059591642926e-06, "loss": 0.0173, "step": 117150 }, { "epoch": 0.8660299813725201, "grad_norm": 0.06647878885269165, "learning_rate": 6.723349952516619e-06, "loss": 0.0148, "step": 117160 }, { "epoch": 0.8661038999438219, "grad_norm": 0.08329624682664871, "learning_rate": 6.7196403133903145e-06, "loss": 0.0152, "step": 117170 }, { "epoch": 0.8661778185151238, "grad_norm": 0.053988419473171234, "learning_rate": 6.715930674264008e-06, "loss": 0.0151, "step": 117180 }, { "epoch": 0.8662517370864256, "grad_norm": 0.06262490898370743, "learning_rate": 6.712221035137701e-06, "loss": 0.0159, "step": 117190 }, { "epoch": 0.8663256556577275, "grad_norm": 0.07730703055858612, "learning_rate": 6.7085113960113965e-06, "loss": 0.0202, "step": 117200 }, { "epoch": 0.8663995742290294, "grad_norm": 0.06828780472278595, "learning_rate": 6.70480175688509e-06, "loss": 0.016, "step": 117210 }, { "epoch": 0.8664734928003311, "grad_norm": 0.0874919444322586, "learning_rate": 6.701092117758785e-06, "loss": 0.016, "step": 117220 }, { "epoch": 0.866547411371633, "grad_norm": 0.07567555457353592, "learning_rate": 6.6973824786324784e-06, "loss": 0.0198, "step": 117230 }, { "epoch": 0.8666213299429348, "grad_norm": 0.08014479279518127, "learning_rate": 6.6936728395061736e-06, "loss": 0.0175, "step": 117240 }, { "epoch": 0.8666952485142367, "grad_norm": 0.08009376376867294, "learning_rate": 6.689963200379867e-06, "loss": 0.0157, "step": 117250 }, { "epoch": 0.8667691670855385, "grad_norm": 0.0808338075876236, "learning_rate": 6.686253561253561e-06, "loss": 0.0149, "step": 117260 }, { "epoch": 0.8668430856568404, "grad_norm": 0.07933666557073593, "learning_rate": 6.6825439221272555e-06, "loss": 0.0153, "step": 117270 }, { "epoch": 0.8669170042281423, "grad_norm": 0.07114316523075104, "learning_rate": 6.67883428300095e-06, "loss": 0.0164, "step": 117280 }, { "epoch": 0.8669909227994441, "grad_norm": 0.072788305580616, "learning_rate": 6.675124643874645e-06, "loss": 0.0176, "step": 117290 }, { "epoch": 0.867064841370746, "grad_norm": 0.06929667294025421, "learning_rate": 6.671415004748338e-06, "loss": 0.0154, "step": 117300 }, { "epoch": 0.8671387599420478, "grad_norm": 0.077360600233078, "learning_rate": 6.6677053656220335e-06, "loss": 0.017, "step": 117310 }, { "epoch": 0.8672126785133497, "grad_norm": 0.06290005892515182, "learning_rate": 6.663995726495727e-06, "loss": 0.0175, "step": 117320 }, { "epoch": 0.8672865970846515, "grad_norm": 0.06111391261219978, "learning_rate": 6.66028608736942e-06, "loss": 0.0163, "step": 117330 }, { "epoch": 0.8673605156559534, "grad_norm": 0.0982351154088974, "learning_rate": 6.6565764482431154e-06, "loss": 0.0175, "step": 117340 }, { "epoch": 0.8674344342272553, "grad_norm": 0.07444393634796143, "learning_rate": 6.652866809116809e-06, "loss": 0.0161, "step": 117350 }, { "epoch": 0.8675083527985571, "grad_norm": 0.062405869364738464, "learning_rate": 6.649157169990504e-06, "loss": 0.0172, "step": 117360 }, { "epoch": 0.867582271369859, "grad_norm": 0.08480317890644073, "learning_rate": 6.645447530864197e-06, "loss": 0.0188, "step": 117370 }, { "epoch": 0.8676561899411608, "grad_norm": 0.0864877700805664, "learning_rate": 6.6417378917378925e-06, "loss": 0.0166, "step": 117380 }, { "epoch": 0.8677301085124627, "grad_norm": 0.07572106271982193, "learning_rate": 6.638028252611586e-06, "loss": 0.018, "step": 117390 }, { "epoch": 0.8678040270837645, "grad_norm": 0.09496478736400604, "learning_rate": 6.634318613485281e-06, "loss": 0.0178, "step": 117400 }, { "epoch": 0.8678779456550664, "grad_norm": 0.057836130261421204, "learning_rate": 6.6306089743589745e-06, "loss": 0.0167, "step": 117410 }, { "epoch": 0.8679518642263683, "grad_norm": 0.06510256230831146, "learning_rate": 6.626899335232668e-06, "loss": 0.0158, "step": 117420 }, { "epoch": 0.8680257827976701, "grad_norm": 0.07079634070396423, "learning_rate": 6.623189696106363e-06, "loss": 0.0178, "step": 117430 }, { "epoch": 0.868099701368972, "grad_norm": 0.09129411727190018, "learning_rate": 6.619480056980057e-06, "loss": 0.019, "step": 117440 }, { "epoch": 0.8681736199402738, "grad_norm": 0.07951433956623077, "learning_rate": 6.615770417853752e-06, "loss": 0.0192, "step": 117450 }, { "epoch": 0.8682475385115757, "grad_norm": 0.060429371893405914, "learning_rate": 6.612060778727446e-06, "loss": 0.0153, "step": 117460 }, { "epoch": 0.8683214570828776, "grad_norm": 0.07009291648864746, "learning_rate": 6.60835113960114e-06, "loss": 0.0167, "step": 117470 }, { "epoch": 0.8683953756541793, "grad_norm": 0.08836237341165543, "learning_rate": 6.604641500474834e-06, "loss": 0.0172, "step": 117480 }, { "epoch": 0.8684692942254812, "grad_norm": 0.09908463060855865, "learning_rate": 6.600931861348528e-06, "loss": 0.0189, "step": 117490 }, { "epoch": 0.868543212796783, "grad_norm": 0.08165255934000015, "learning_rate": 6.597222222222223e-06, "loss": 0.0167, "step": 117500 }, { "epoch": 0.8686171313680849, "grad_norm": 0.0675068348646164, "learning_rate": 6.593512583095916e-06, "loss": 0.0165, "step": 117510 }, { "epoch": 0.8686910499393867, "grad_norm": 0.07735388725996017, "learning_rate": 6.5898029439696115e-06, "loss": 0.0171, "step": 117520 }, { "epoch": 0.8687649685106886, "grad_norm": 0.1081601232290268, "learning_rate": 6.586093304843305e-06, "loss": 0.0158, "step": 117530 }, { "epoch": 0.8688388870819905, "grad_norm": 0.0819975882768631, "learning_rate": 6.582383665717e-06, "loss": 0.0158, "step": 117540 }, { "epoch": 0.8689128056532923, "grad_norm": 0.07920730113983154, "learning_rate": 6.5786740265906935e-06, "loss": 0.0185, "step": 117550 }, { "epoch": 0.8689867242245942, "grad_norm": 0.0685114786028862, "learning_rate": 6.574964387464387e-06, "loss": 0.0172, "step": 117560 }, { "epoch": 0.869060642795896, "grad_norm": 0.07959503680467606, "learning_rate": 6.571254748338082e-06, "loss": 0.015, "step": 117570 }, { "epoch": 0.8691345613671979, "grad_norm": 0.10559289902448654, "learning_rate": 6.5675451092117755e-06, "loss": 0.0196, "step": 117580 }, { "epoch": 0.8692084799384997, "grad_norm": 0.08839577436447144, "learning_rate": 6.5638354700854706e-06, "loss": 0.0187, "step": 117590 }, { "epoch": 0.8692823985098016, "grad_norm": 0.06458381563425064, "learning_rate": 6.560125830959164e-06, "loss": 0.017, "step": 117600 }, { "epoch": 0.8693563170811035, "grad_norm": 0.08625758439302444, "learning_rate": 6.556416191832859e-06, "loss": 0.0179, "step": 117610 }, { "epoch": 0.8694302356524053, "grad_norm": 0.07499458640813828, "learning_rate": 6.5527065527065525e-06, "loss": 0.0174, "step": 117620 }, { "epoch": 0.8695041542237072, "grad_norm": 0.07140425592660904, "learning_rate": 6.548996913580248e-06, "loss": 0.0176, "step": 117630 }, { "epoch": 0.869578072795009, "grad_norm": 0.0880943313241005, "learning_rate": 6.545287274453941e-06, "loss": 0.0141, "step": 117640 }, { "epoch": 0.8696519913663109, "grad_norm": 0.0611373633146286, "learning_rate": 6.541577635327635e-06, "loss": 0.0164, "step": 117650 }, { "epoch": 0.8697259099376127, "grad_norm": 0.06848348677158356, "learning_rate": 6.5378679962013305e-06, "loss": 0.0156, "step": 117660 }, { "epoch": 0.8697998285089146, "grad_norm": 0.07925549894571304, "learning_rate": 6.534158357075024e-06, "loss": 0.0169, "step": 117670 }, { "epoch": 0.8698737470802165, "grad_norm": 0.08508136868476868, "learning_rate": 6.530448717948719e-06, "loss": 0.0163, "step": 117680 }, { "epoch": 0.8699476656515183, "grad_norm": 0.08420059829950333, "learning_rate": 6.5267390788224125e-06, "loss": 0.0152, "step": 117690 }, { "epoch": 0.8700215842228202, "grad_norm": 0.07651443034410477, "learning_rate": 6.5230294396961076e-06, "loss": 0.0164, "step": 117700 }, { "epoch": 0.870095502794122, "grad_norm": 0.0796276330947876, "learning_rate": 6.519319800569801e-06, "loss": 0.0177, "step": 117710 }, { "epoch": 0.8701694213654239, "grad_norm": 0.0848752036690712, "learning_rate": 6.5156101614434944e-06, "loss": 0.0175, "step": 117720 }, { "epoch": 0.8702433399367258, "grad_norm": 0.06675256788730621, "learning_rate": 6.5119005223171895e-06, "loss": 0.0164, "step": 117730 }, { "epoch": 0.8703172585080275, "grad_norm": 0.10622403770685196, "learning_rate": 6.508190883190883e-06, "loss": 0.0182, "step": 117740 }, { "epoch": 0.8703911770793294, "grad_norm": 0.05470990762114525, "learning_rate": 6.504481244064578e-06, "loss": 0.0158, "step": 117750 }, { "epoch": 0.8704650956506312, "grad_norm": 0.07306019216775894, "learning_rate": 6.5007716049382715e-06, "loss": 0.0159, "step": 117760 }, { "epoch": 0.8705390142219331, "grad_norm": 0.07729344069957733, "learning_rate": 6.497061965811967e-06, "loss": 0.0161, "step": 117770 }, { "epoch": 0.8706129327932349, "grad_norm": 0.06492874026298523, "learning_rate": 6.49335232668566e-06, "loss": 0.0183, "step": 117780 }, { "epoch": 0.8706868513645368, "grad_norm": 0.05055154487490654, "learning_rate": 6.489642687559355e-06, "loss": 0.0153, "step": 117790 }, { "epoch": 0.8707607699358387, "grad_norm": 0.0782744288444519, "learning_rate": 6.485933048433049e-06, "loss": 0.0177, "step": 117800 }, { "epoch": 0.8708346885071405, "grad_norm": 0.06355128437280655, "learning_rate": 6.482223409306742e-06, "loss": 0.0167, "step": 117810 }, { "epoch": 0.8709086070784424, "grad_norm": 0.07428276538848877, "learning_rate": 6.478513770180437e-06, "loss": 0.0183, "step": 117820 }, { "epoch": 0.8709825256497442, "grad_norm": 0.07489913702011108, "learning_rate": 6.4748041310541314e-06, "loss": 0.0175, "step": 117830 }, { "epoch": 0.8710564442210461, "grad_norm": 0.08711172640323639, "learning_rate": 6.471094491927826e-06, "loss": 0.0154, "step": 117840 }, { "epoch": 0.8711303627923479, "grad_norm": 0.08286292850971222, "learning_rate": 6.46738485280152e-06, "loss": 0.0173, "step": 117850 }, { "epoch": 0.8712042813636498, "grad_norm": 0.09852740913629532, "learning_rate": 6.463675213675214e-06, "loss": 0.0189, "step": 117860 }, { "epoch": 0.8712781999349517, "grad_norm": 0.06941636651754379, "learning_rate": 6.4599655745489085e-06, "loss": 0.0176, "step": 117870 }, { "epoch": 0.8713521185062535, "grad_norm": 0.08215862512588501, "learning_rate": 6.456255935422602e-06, "loss": 0.0184, "step": 117880 }, { "epoch": 0.8714260370775554, "grad_norm": 0.07836437970399857, "learning_rate": 6.452546296296297e-06, "loss": 0.0168, "step": 117890 }, { "epoch": 0.8714999556488572, "grad_norm": 0.09308689087629318, "learning_rate": 6.4488366571699905e-06, "loss": 0.0188, "step": 117900 }, { "epoch": 0.8715738742201591, "grad_norm": 0.0990125983953476, "learning_rate": 6.445127018043686e-06, "loss": 0.0165, "step": 117910 }, { "epoch": 0.8716477927914609, "grad_norm": 0.0700770914554596, "learning_rate": 6.441417378917379e-06, "loss": 0.0162, "step": 117920 }, { "epoch": 0.8717217113627628, "grad_norm": 0.05903575196862221, "learning_rate": 6.437707739791074e-06, "loss": 0.0177, "step": 117930 }, { "epoch": 0.8717956299340647, "grad_norm": 0.05856722965836525, "learning_rate": 6.433998100664768e-06, "loss": 0.0164, "step": 117940 }, { "epoch": 0.8718695485053665, "grad_norm": 0.06907519698143005, "learning_rate": 6.430288461538461e-06, "loss": 0.0175, "step": 117950 }, { "epoch": 0.8719434670766684, "grad_norm": 0.09158388525247574, "learning_rate": 6.426578822412156e-06, "loss": 0.0193, "step": 117960 }, { "epoch": 0.8720173856479702, "grad_norm": 0.10444381088018417, "learning_rate": 6.4228691832858496e-06, "loss": 0.0176, "step": 117970 }, { "epoch": 0.8720913042192721, "grad_norm": 0.06845099478960037, "learning_rate": 6.419159544159545e-06, "loss": 0.0196, "step": 117980 }, { "epoch": 0.872165222790574, "grad_norm": 0.05813978984951973, "learning_rate": 6.415449905033238e-06, "loss": 0.0161, "step": 117990 }, { "epoch": 0.8722391413618757, "grad_norm": 0.08608116954565048, "learning_rate": 6.411740265906933e-06, "loss": 0.0175, "step": 118000 }, { "epoch": 0.8723130599331776, "grad_norm": 0.06167742609977722, "learning_rate": 6.408030626780627e-06, "loss": 0.0155, "step": 118010 }, { "epoch": 0.8723869785044794, "grad_norm": 0.09053590893745422, "learning_rate": 6.404320987654322e-06, "loss": 0.0164, "step": 118020 }, { "epoch": 0.8724608970757813, "grad_norm": 0.07437341660261154, "learning_rate": 6.400611348528015e-06, "loss": 0.0182, "step": 118030 }, { "epoch": 0.8725348156470831, "grad_norm": 0.052046339958906174, "learning_rate": 6.3969017094017095e-06, "loss": 0.0162, "step": 118040 }, { "epoch": 0.872608734218385, "grad_norm": 0.06491658091545105, "learning_rate": 6.393192070275405e-06, "loss": 0.0155, "step": 118050 }, { "epoch": 0.8726826527896869, "grad_norm": 0.10303626209497452, "learning_rate": 6.389482431149098e-06, "loss": 0.0159, "step": 118060 }, { "epoch": 0.8727565713609887, "grad_norm": 0.11025142669677734, "learning_rate": 6.385772792022793e-06, "loss": 0.0162, "step": 118070 }, { "epoch": 0.8728304899322906, "grad_norm": 0.08896782994270325, "learning_rate": 6.3820631528964866e-06, "loss": 0.0171, "step": 118080 }, { "epoch": 0.8729044085035924, "grad_norm": 0.07671711593866348, "learning_rate": 6.378353513770182e-06, "loss": 0.0146, "step": 118090 }, { "epoch": 0.8729783270748943, "grad_norm": 0.07379849255084991, "learning_rate": 6.374643874643875e-06, "loss": 0.0166, "step": 118100 }, { "epoch": 0.8730522456461961, "grad_norm": 0.07860969752073288, "learning_rate": 6.3709342355175685e-06, "loss": 0.0176, "step": 118110 }, { "epoch": 0.873126164217498, "grad_norm": 0.08312699943780899, "learning_rate": 6.367224596391264e-06, "loss": 0.0151, "step": 118120 }, { "epoch": 0.8732000827887999, "grad_norm": 0.06143682450056076, "learning_rate": 6.363514957264957e-06, "loss": 0.0174, "step": 118130 }, { "epoch": 0.8732740013601017, "grad_norm": 0.06969328969717026, "learning_rate": 6.359805318138652e-06, "loss": 0.0175, "step": 118140 }, { "epoch": 0.8733479199314036, "grad_norm": 0.07606179267168045, "learning_rate": 6.356095679012346e-06, "loss": 0.0166, "step": 118150 }, { "epoch": 0.8734218385027054, "grad_norm": 0.06871391087770462, "learning_rate": 6.352386039886041e-06, "loss": 0.0164, "step": 118160 }, { "epoch": 0.8734957570740073, "grad_norm": 0.06539017707109451, "learning_rate": 6.348676400759734e-06, "loss": 0.015, "step": 118170 }, { "epoch": 0.8735696756453091, "grad_norm": 0.058919504284858704, "learning_rate": 6.344966761633428e-06, "loss": 0.0166, "step": 118180 }, { "epoch": 0.873643594216611, "grad_norm": 0.07681267708539963, "learning_rate": 6.341257122507123e-06, "loss": 0.0165, "step": 118190 }, { "epoch": 0.8737175127879129, "grad_norm": 0.09190231561660767, "learning_rate": 6.337547483380816e-06, "loss": 0.0195, "step": 118200 }, { "epoch": 0.8737914313592147, "grad_norm": 0.056361399590969086, "learning_rate": 6.333837844254511e-06, "loss": 0.0174, "step": 118210 }, { "epoch": 0.8738653499305166, "grad_norm": 0.07933057844638824, "learning_rate": 6.3301282051282055e-06, "loss": 0.0188, "step": 118220 }, { "epoch": 0.8739392685018184, "grad_norm": 0.08587239682674408, "learning_rate": 6.3264185660019e-06, "loss": 0.0173, "step": 118230 }, { "epoch": 0.8740131870731203, "grad_norm": 0.07572540640830994, "learning_rate": 6.322708926875594e-06, "loss": 0.0171, "step": 118240 }, { "epoch": 0.8740871056444222, "grad_norm": 0.09864775091409683, "learning_rate": 6.318999287749288e-06, "loss": 0.0162, "step": 118250 }, { "epoch": 0.874161024215724, "grad_norm": 0.053165651857852936, "learning_rate": 6.315289648622983e-06, "loss": 0.016, "step": 118260 }, { "epoch": 0.8742349427870258, "grad_norm": 0.09423815459012985, "learning_rate": 6.311580009496676e-06, "loss": 0.0166, "step": 118270 }, { "epoch": 0.8743088613583276, "grad_norm": 0.07985405623912811, "learning_rate": 6.307870370370371e-06, "loss": 0.0175, "step": 118280 }, { "epoch": 0.8743827799296295, "grad_norm": 0.07381712645292282, "learning_rate": 6.304160731244065e-06, "loss": 0.0143, "step": 118290 }, { "epoch": 0.8744566985009313, "grad_norm": 0.0779792070388794, "learning_rate": 6.30045109211776e-06, "loss": 0.0167, "step": 118300 }, { "epoch": 0.8745306170722332, "grad_norm": 0.06026535481214523, "learning_rate": 6.296741452991453e-06, "loss": 0.0154, "step": 118310 }, { "epoch": 0.8746045356435351, "grad_norm": 0.06948491185903549, "learning_rate": 6.293031813865148e-06, "loss": 0.0181, "step": 118320 }, { "epoch": 0.8746784542148369, "grad_norm": 0.09600476175546646, "learning_rate": 6.289322174738842e-06, "loss": 0.0181, "step": 118330 }, { "epoch": 0.8747523727861388, "grad_norm": 0.07605545967817307, "learning_rate": 6.285612535612535e-06, "loss": 0.017, "step": 118340 }, { "epoch": 0.8748262913574406, "grad_norm": 0.07898372411727905, "learning_rate": 6.28190289648623e-06, "loss": 0.0182, "step": 118350 }, { "epoch": 0.8749002099287425, "grad_norm": 0.07002683728933334, "learning_rate": 6.278193257359924e-06, "loss": 0.016, "step": 118360 }, { "epoch": 0.8749741285000443, "grad_norm": 0.06312119960784912, "learning_rate": 6.274483618233619e-06, "loss": 0.0173, "step": 118370 }, { "epoch": 0.8750480470713462, "grad_norm": 0.07424383610486984, "learning_rate": 6.270773979107312e-06, "loss": 0.0188, "step": 118380 }, { "epoch": 0.8751219656426481, "grad_norm": 0.09007822722196579, "learning_rate": 6.267064339981007e-06, "loss": 0.0151, "step": 118390 }, { "epoch": 0.8751958842139499, "grad_norm": 0.07628867030143738, "learning_rate": 6.263354700854701e-06, "loss": 0.017, "step": 118400 }, { "epoch": 0.8752698027852518, "grad_norm": 0.09205362200737, "learning_rate": 6.259645061728395e-06, "loss": 0.0191, "step": 118410 }, { "epoch": 0.8753437213565536, "grad_norm": 0.08055655658245087, "learning_rate": 6.255935422602089e-06, "loss": 0.0165, "step": 118420 }, { "epoch": 0.8754176399278555, "grad_norm": 0.06153779849410057, "learning_rate": 6.2522257834757836e-06, "loss": 0.0166, "step": 118430 }, { "epoch": 0.8754915584991573, "grad_norm": 0.07093429565429688, "learning_rate": 6.248516144349479e-06, "loss": 0.0175, "step": 118440 }, { "epoch": 0.8755654770704592, "grad_norm": 0.07778339833021164, "learning_rate": 6.244806505223172e-06, "loss": 0.0178, "step": 118450 }, { "epoch": 0.8756393956417611, "grad_norm": 0.0749053806066513, "learning_rate": 6.241096866096866e-06, "loss": 0.0198, "step": 118460 }, { "epoch": 0.8757133142130629, "grad_norm": 0.07246764004230499, "learning_rate": 6.237387226970561e-06, "loss": 0.0173, "step": 118470 }, { "epoch": 0.8757872327843648, "grad_norm": 0.062185708433389664, "learning_rate": 6.233677587844255e-06, "loss": 0.0173, "step": 118480 }, { "epoch": 0.8758611513556666, "grad_norm": 0.08127869665622711, "learning_rate": 6.229967948717949e-06, "loss": 0.017, "step": 118490 }, { "epoch": 0.8759350699269685, "grad_norm": 0.06540331989526749, "learning_rate": 6.2262583095916435e-06, "loss": 0.0184, "step": 118500 }, { "epoch": 0.8760089884982704, "grad_norm": 0.10836683958768845, "learning_rate": 6.222548670465338e-06, "loss": 0.0191, "step": 118510 }, { "epoch": 0.8760829070695721, "grad_norm": 0.07994542270898819, "learning_rate": 6.218839031339032e-06, "loss": 0.0183, "step": 118520 }, { "epoch": 0.876156825640874, "grad_norm": 0.07900182902812958, "learning_rate": 6.2151293922127254e-06, "loss": 0.0174, "step": 118530 }, { "epoch": 0.8762307442121758, "grad_norm": 0.07550885528326035, "learning_rate": 6.21141975308642e-06, "loss": 0.0159, "step": 118540 }, { "epoch": 0.8763046627834777, "grad_norm": 0.07126692682504654, "learning_rate": 6.207710113960114e-06, "loss": 0.0153, "step": 118550 }, { "epoch": 0.8763785813547795, "grad_norm": 0.06923384964466095, "learning_rate": 6.204000474833808e-06, "loss": 0.0156, "step": 118560 }, { "epoch": 0.8764524999260814, "grad_norm": 0.0925263836979866, "learning_rate": 6.2002908357075025e-06, "loss": 0.0176, "step": 118570 }, { "epoch": 0.8765264184973833, "grad_norm": 0.06728041917085648, "learning_rate": 6.196581196581197e-06, "loss": 0.0153, "step": 118580 }, { "epoch": 0.8766003370686851, "grad_norm": 0.06608957797288895, "learning_rate": 6.192871557454891e-06, "loss": 0.0167, "step": 118590 }, { "epoch": 0.876674255639987, "grad_norm": 0.07481256872415543, "learning_rate": 6.189161918328585e-06, "loss": 0.0153, "step": 118600 }, { "epoch": 0.8767481742112888, "grad_norm": 0.07264452427625656, "learning_rate": 6.18545227920228e-06, "loss": 0.0172, "step": 118610 }, { "epoch": 0.8768220927825907, "grad_norm": 0.08074048906564713, "learning_rate": 6.181742640075974e-06, "loss": 0.0178, "step": 118620 }, { "epoch": 0.8768960113538925, "grad_norm": 0.09411899000406265, "learning_rate": 6.178033000949668e-06, "loss": 0.0175, "step": 118630 }, { "epoch": 0.8769699299251944, "grad_norm": 0.0678592100739479, "learning_rate": 6.1743233618233625e-06, "loss": 0.0169, "step": 118640 }, { "epoch": 0.8770438484964963, "grad_norm": 0.06519462913274765, "learning_rate": 6.170613722697057e-06, "loss": 0.0165, "step": 118650 }, { "epoch": 0.8771177670677981, "grad_norm": 0.07327589392662048, "learning_rate": 6.166904083570751e-06, "loss": 0.0165, "step": 118660 }, { "epoch": 0.8771916856391, "grad_norm": 0.06687984615564346, "learning_rate": 6.163194444444445e-06, "loss": 0.0161, "step": 118670 }, { "epoch": 0.8772656042104018, "grad_norm": 0.08779148012399673, "learning_rate": 6.159484805318139e-06, "loss": 0.0177, "step": 118680 }, { "epoch": 0.8773395227817037, "grad_norm": 0.12947805225849152, "learning_rate": 6.155775166191833e-06, "loss": 0.0156, "step": 118690 }, { "epoch": 0.8774134413530055, "grad_norm": 0.07689861208200455, "learning_rate": 6.152065527065527e-06, "loss": 0.0158, "step": 118700 }, { "epoch": 0.8774873599243074, "grad_norm": 0.06581514328718185, "learning_rate": 6.1483558879392215e-06, "loss": 0.0175, "step": 118710 }, { "epoch": 0.8775612784956093, "grad_norm": 0.07931865751743317, "learning_rate": 6.144646248812916e-06, "loss": 0.0154, "step": 118720 }, { "epoch": 0.8776351970669111, "grad_norm": 0.10354472696781158, "learning_rate": 6.14093660968661e-06, "loss": 0.0183, "step": 118730 }, { "epoch": 0.877709115638213, "grad_norm": 0.06549788266420364, "learning_rate": 6.137226970560304e-06, "loss": 0.0182, "step": 118740 }, { "epoch": 0.8777830342095148, "grad_norm": 0.059177011251449585, "learning_rate": 6.133517331433999e-06, "loss": 0.0162, "step": 118750 }, { "epoch": 0.8778569527808167, "grad_norm": 0.08157137036323547, "learning_rate": 6.129807692307692e-06, "loss": 0.0184, "step": 118760 }, { "epoch": 0.8779308713521186, "grad_norm": 0.08282024413347244, "learning_rate": 6.126098053181386e-06, "loss": 0.0153, "step": 118770 }, { "epoch": 0.8780047899234203, "grad_norm": 0.08915043622255325, "learning_rate": 6.122388414055081e-06, "loss": 0.0161, "step": 118780 }, { "epoch": 0.8780787084947222, "grad_norm": 0.06341081857681274, "learning_rate": 6.118678774928775e-06, "loss": 0.0152, "step": 118790 }, { "epoch": 0.878152627066024, "grad_norm": 0.09454089403152466, "learning_rate": 6.114969135802469e-06, "loss": 0.0163, "step": 118800 }, { "epoch": 0.8782265456373259, "grad_norm": 0.09894683212041855, "learning_rate": 6.111259496676164e-06, "loss": 0.0168, "step": 118810 }, { "epoch": 0.8783004642086277, "grad_norm": 0.07308226823806763, "learning_rate": 6.1075498575498585e-06, "loss": 0.0157, "step": 118820 }, { "epoch": 0.8783743827799296, "grad_norm": 0.0727970078587532, "learning_rate": 6.103840218423552e-06, "loss": 0.0184, "step": 118830 }, { "epoch": 0.8784483013512315, "grad_norm": 0.08420936018228531, "learning_rate": 6.100130579297246e-06, "loss": 0.0174, "step": 118840 }, { "epoch": 0.8785222199225333, "grad_norm": 0.07505591958761215, "learning_rate": 6.0964209401709405e-06, "loss": 0.0159, "step": 118850 }, { "epoch": 0.8785961384938352, "grad_norm": 0.06973277777433395, "learning_rate": 6.092711301044635e-06, "loss": 0.0195, "step": 118860 }, { "epoch": 0.878670057065137, "grad_norm": 0.07956535369157791, "learning_rate": 6.089001661918329e-06, "loss": 0.0173, "step": 118870 }, { "epoch": 0.8787439756364389, "grad_norm": 0.07417246699333191, "learning_rate": 6.085292022792023e-06, "loss": 0.0169, "step": 118880 }, { "epoch": 0.8788178942077407, "grad_norm": 0.054359957575798035, "learning_rate": 6.081582383665718e-06, "loss": 0.016, "step": 118890 }, { "epoch": 0.8788918127790426, "grad_norm": 0.08991627395153046, "learning_rate": 6.077872744539412e-06, "loss": 0.0167, "step": 118900 }, { "epoch": 0.8789657313503445, "grad_norm": 0.06934183090925217, "learning_rate": 6.074163105413105e-06, "loss": 0.0166, "step": 118910 }, { "epoch": 0.8790396499216463, "grad_norm": 0.07893542945384979, "learning_rate": 6.0704534662867996e-06, "loss": 0.0177, "step": 118920 }, { "epoch": 0.8791135684929482, "grad_norm": 0.07071886956691742, "learning_rate": 6.066743827160494e-06, "loss": 0.0188, "step": 118930 }, { "epoch": 0.87918748706425, "grad_norm": 0.10038921236991882, "learning_rate": 6.063034188034188e-06, "loss": 0.0153, "step": 118940 }, { "epoch": 0.8792614056355519, "grad_norm": 0.07400741428136826, "learning_rate": 6.059324548907882e-06, "loss": 0.0182, "step": 118950 }, { "epoch": 0.8793353242068537, "grad_norm": 0.06069588661193848, "learning_rate": 6.055614909781577e-06, "loss": 0.0151, "step": 118960 }, { "epoch": 0.8794092427781556, "grad_norm": 0.08919277042150497, "learning_rate": 6.051905270655271e-06, "loss": 0.017, "step": 118970 }, { "epoch": 0.8794831613494575, "grad_norm": 0.08653470873832703, "learning_rate": 6.048195631528965e-06, "loss": 0.0177, "step": 118980 }, { "epoch": 0.8795570799207593, "grad_norm": 0.07246468216180801, "learning_rate": 6.0444859924026595e-06, "loss": 0.0161, "step": 118990 }, { "epoch": 0.8796309984920612, "grad_norm": 0.08383968472480774, "learning_rate": 6.040776353276354e-06, "loss": 0.0162, "step": 119000 }, { "epoch": 0.879704917063363, "grad_norm": 0.062004826962947845, "learning_rate": 6.037066714150048e-06, "loss": 0.0165, "step": 119010 }, { "epoch": 0.8797788356346649, "grad_norm": 0.056051649153232574, "learning_rate": 6.033357075023742e-06, "loss": 0.0164, "step": 119020 }, { "epoch": 0.8798527542059668, "grad_norm": 0.07480798661708832, "learning_rate": 6.0296474358974366e-06, "loss": 0.0168, "step": 119030 }, { "epoch": 0.8799266727772685, "grad_norm": 0.07464089244604111, "learning_rate": 6.025937796771131e-06, "loss": 0.0169, "step": 119040 }, { "epoch": 0.8800005913485704, "grad_norm": 0.06516869366168976, "learning_rate": 6.022228157644825e-06, "loss": 0.0153, "step": 119050 }, { "epoch": 0.8800745099198722, "grad_norm": 0.06231831759214401, "learning_rate": 6.0185185185185185e-06, "loss": 0.0148, "step": 119060 }, { "epoch": 0.8801484284911741, "grad_norm": 0.09648638218641281, "learning_rate": 6.014808879392213e-06, "loss": 0.017, "step": 119070 }, { "epoch": 0.8802223470624759, "grad_norm": 0.07397064566612244, "learning_rate": 6.011099240265907e-06, "loss": 0.0159, "step": 119080 }, { "epoch": 0.8802962656337778, "grad_norm": 0.07799141854047775, "learning_rate": 6.007389601139601e-06, "loss": 0.0165, "step": 119090 }, { "epoch": 0.8803701842050797, "grad_norm": 0.08395656943321228, "learning_rate": 6.003679962013296e-06, "loss": 0.018, "step": 119100 }, { "epoch": 0.8804441027763815, "grad_norm": 0.07837633043527603, "learning_rate": 5.99997032288699e-06, "loss": 0.0178, "step": 119110 }, { "epoch": 0.8805180213476834, "grad_norm": 0.12159515917301178, "learning_rate": 5.996260683760684e-06, "loss": 0.0191, "step": 119120 }, { "epoch": 0.8805919399189852, "grad_norm": 0.06409422308206558, "learning_rate": 5.9925510446343784e-06, "loss": 0.0169, "step": 119130 }, { "epoch": 0.8806658584902871, "grad_norm": 0.09560755640268326, "learning_rate": 5.988841405508072e-06, "loss": 0.0158, "step": 119140 }, { "epoch": 0.8807397770615889, "grad_norm": 0.07393410056829453, "learning_rate": 5.985131766381766e-06, "loss": 0.0193, "step": 119150 }, { "epoch": 0.8808136956328908, "grad_norm": 0.10730542987585068, "learning_rate": 5.98142212725546e-06, "loss": 0.0177, "step": 119160 }, { "epoch": 0.8808876142041927, "grad_norm": 0.05886324122548103, "learning_rate": 5.977712488129155e-06, "loss": 0.0163, "step": 119170 }, { "epoch": 0.8809615327754945, "grad_norm": 0.09222765266895294, "learning_rate": 5.974002849002849e-06, "loss": 0.0157, "step": 119180 }, { "epoch": 0.8810354513467964, "grad_norm": 0.07070504128932953, "learning_rate": 5.970293209876543e-06, "loss": 0.016, "step": 119190 }, { "epoch": 0.8811093699180982, "grad_norm": 0.055727239698171616, "learning_rate": 5.966583570750238e-06, "loss": 0.0183, "step": 119200 }, { "epoch": 0.8811832884894001, "grad_norm": 0.06888076663017273, "learning_rate": 5.962873931623933e-06, "loss": 0.0166, "step": 119210 }, { "epoch": 0.881257207060702, "grad_norm": 0.10308843851089478, "learning_rate": 5.959164292497626e-06, "loss": 0.0173, "step": 119220 }, { "epoch": 0.8813311256320038, "grad_norm": 0.10609335452318192, "learning_rate": 5.95545465337132e-06, "loss": 0.0177, "step": 119230 }, { "epoch": 0.8814050442033057, "grad_norm": 0.07922697812318802, "learning_rate": 5.951745014245015e-06, "loss": 0.0162, "step": 119240 }, { "epoch": 0.8814789627746075, "grad_norm": 0.050138652324676514, "learning_rate": 5.948035375118709e-06, "loss": 0.0136, "step": 119250 }, { "epoch": 0.8815528813459094, "grad_norm": 0.0792040005326271, "learning_rate": 5.944325735992403e-06, "loss": 0.0164, "step": 119260 }, { "epoch": 0.8816267999172112, "grad_norm": 0.0703444704413414, "learning_rate": 5.940616096866097e-06, "loss": 0.0156, "step": 119270 }, { "epoch": 0.8817007184885131, "grad_norm": 0.07934896647930145, "learning_rate": 5.936906457739792e-06, "loss": 0.0178, "step": 119280 }, { "epoch": 0.881774637059815, "grad_norm": 0.08978631347417831, "learning_rate": 5.933196818613485e-06, "loss": 0.0188, "step": 119290 }, { "epoch": 0.8818485556311167, "grad_norm": 0.07721249014139175, "learning_rate": 5.929487179487179e-06, "loss": 0.0176, "step": 119300 }, { "epoch": 0.8819224742024186, "grad_norm": 0.07975966483354568, "learning_rate": 5.925777540360874e-06, "loss": 0.0142, "step": 119310 }, { "epoch": 0.8819963927737204, "grad_norm": 0.06769223511219025, "learning_rate": 5.922067901234568e-06, "loss": 0.0153, "step": 119320 }, { "epoch": 0.8820703113450223, "grad_norm": 0.06781387329101562, "learning_rate": 5.918358262108262e-06, "loss": 0.017, "step": 119330 }, { "epoch": 0.8821442299163241, "grad_norm": 0.07139957696199417, "learning_rate": 5.9146486229819565e-06, "loss": 0.0195, "step": 119340 }, { "epoch": 0.882218148487626, "grad_norm": 0.09100551903247833, "learning_rate": 5.910938983855651e-06, "loss": 0.0186, "step": 119350 }, { "epoch": 0.8822920670589279, "grad_norm": 0.08640255779027939, "learning_rate": 5.907229344729345e-06, "loss": 0.0189, "step": 119360 }, { "epoch": 0.8823659856302297, "grad_norm": 0.06398281455039978, "learning_rate": 5.903519705603039e-06, "loss": 0.0176, "step": 119370 }, { "epoch": 0.8824399042015316, "grad_norm": 0.08789733052253723, "learning_rate": 5.8998100664767336e-06, "loss": 0.0187, "step": 119380 }, { "epoch": 0.8825138227728334, "grad_norm": 0.06949253380298615, "learning_rate": 5.896100427350428e-06, "loss": 0.0162, "step": 119390 }, { "epoch": 0.8825877413441353, "grad_norm": 0.07115206122398376, "learning_rate": 5.892390788224122e-06, "loss": 0.0154, "step": 119400 }, { "epoch": 0.8826616599154371, "grad_norm": 0.07384328544139862, "learning_rate": 5.888681149097816e-06, "loss": 0.0166, "step": 119410 }, { "epoch": 0.882735578486739, "grad_norm": 0.07285971939563751, "learning_rate": 5.884971509971511e-06, "loss": 0.0168, "step": 119420 }, { "epoch": 0.8828094970580409, "grad_norm": 0.07207538932561874, "learning_rate": 5.881261870845205e-06, "loss": 0.0171, "step": 119430 }, { "epoch": 0.8828834156293427, "grad_norm": 0.08270884305238724, "learning_rate": 5.877552231718899e-06, "loss": 0.0178, "step": 119440 }, { "epoch": 0.8829573342006446, "grad_norm": 0.11721400171518326, "learning_rate": 5.873842592592593e-06, "loss": 0.0168, "step": 119450 }, { "epoch": 0.8830312527719464, "grad_norm": 0.07226939499378204, "learning_rate": 5.870132953466287e-06, "loss": 0.0154, "step": 119460 }, { "epoch": 0.8831051713432483, "grad_norm": 0.05731596797704697, "learning_rate": 5.866423314339981e-06, "loss": 0.0173, "step": 119470 }, { "epoch": 0.8831790899145502, "grad_norm": 0.07784925401210785, "learning_rate": 5.8627136752136754e-06, "loss": 0.0157, "step": 119480 }, { "epoch": 0.883253008485852, "grad_norm": 0.057738661766052246, "learning_rate": 5.85900403608737e-06, "loss": 0.0164, "step": 119490 }, { "epoch": 0.8833269270571539, "grad_norm": 0.06912290304899216, "learning_rate": 5.855294396961064e-06, "loss": 0.0183, "step": 119500 }, { "epoch": 0.8834008456284557, "grad_norm": 0.07481943070888519, "learning_rate": 5.851584757834758e-06, "loss": 0.0155, "step": 119510 }, { "epoch": 0.8834747641997576, "grad_norm": 0.14108766615390778, "learning_rate": 5.847875118708452e-06, "loss": 0.0166, "step": 119520 }, { "epoch": 0.8835486827710594, "grad_norm": 0.06856310367584229, "learning_rate": 5.844165479582146e-06, "loss": 0.0158, "step": 119530 }, { "epoch": 0.8836226013423613, "grad_norm": 0.09352389723062515, "learning_rate": 5.84045584045584e-06, "loss": 0.0154, "step": 119540 }, { "epoch": 0.8836965199136632, "grad_norm": 0.06415341049432755, "learning_rate": 5.8367462013295345e-06, "loss": 0.0175, "step": 119550 }, { "epoch": 0.883770438484965, "grad_norm": 0.07106293737888336, "learning_rate": 5.833036562203229e-06, "loss": 0.0182, "step": 119560 }, { "epoch": 0.8838443570562668, "grad_norm": 0.0817696824669838, "learning_rate": 5.829326923076923e-06, "loss": 0.0154, "step": 119570 }, { "epoch": 0.8839182756275686, "grad_norm": 0.07956176996231079, "learning_rate": 5.825617283950618e-06, "loss": 0.0172, "step": 119580 }, { "epoch": 0.8839921941988705, "grad_norm": 0.07482311129570007, "learning_rate": 5.8219076448243124e-06, "loss": 0.0154, "step": 119590 }, { "epoch": 0.8840661127701723, "grad_norm": 0.09065963327884674, "learning_rate": 5.818198005698006e-06, "loss": 0.0151, "step": 119600 }, { "epoch": 0.8841400313414742, "grad_norm": 0.12129585444927216, "learning_rate": 5.8144883665717e-06, "loss": 0.0164, "step": 119610 }, { "epoch": 0.8842139499127761, "grad_norm": 0.11287616938352585, "learning_rate": 5.810778727445394e-06, "loss": 0.0173, "step": 119620 }, { "epoch": 0.8842878684840779, "grad_norm": 0.05906311795115471, "learning_rate": 5.807069088319089e-06, "loss": 0.0157, "step": 119630 }, { "epoch": 0.8843617870553798, "grad_norm": 0.11285065859556198, "learning_rate": 5.803359449192783e-06, "loss": 0.0193, "step": 119640 }, { "epoch": 0.8844357056266816, "grad_norm": 0.07003043591976166, "learning_rate": 5.799649810066477e-06, "loss": 0.0173, "step": 119650 }, { "epoch": 0.8845096241979835, "grad_norm": 0.06678735464811325, "learning_rate": 5.7959401709401715e-06, "loss": 0.0148, "step": 119660 }, { "epoch": 0.8845835427692853, "grad_norm": 0.05683312565088272, "learning_rate": 5.792230531813866e-06, "loss": 0.0147, "step": 119670 }, { "epoch": 0.8846574613405872, "grad_norm": 0.08991986513137817, "learning_rate": 5.788520892687559e-06, "loss": 0.0144, "step": 119680 }, { "epoch": 0.8847313799118891, "grad_norm": 0.08308696746826172, "learning_rate": 5.7848112535612535e-06, "loss": 0.017, "step": 119690 }, { "epoch": 0.8848052984831909, "grad_norm": 0.07566976547241211, "learning_rate": 5.781101614434948e-06, "loss": 0.0162, "step": 119700 }, { "epoch": 0.8848792170544928, "grad_norm": 0.08281205594539642, "learning_rate": 5.777391975308642e-06, "loss": 0.0182, "step": 119710 }, { "epoch": 0.8849531356257946, "grad_norm": 0.0822274386882782, "learning_rate": 5.773682336182336e-06, "loss": 0.0147, "step": 119720 }, { "epoch": 0.8850270541970965, "grad_norm": 0.07982059568166733, "learning_rate": 5.7699726970560306e-06, "loss": 0.0161, "step": 119730 }, { "epoch": 0.8851009727683984, "grad_norm": 0.10032445192337036, "learning_rate": 5.766263057929725e-06, "loss": 0.0184, "step": 119740 }, { "epoch": 0.8851748913397002, "grad_norm": 0.05839885026216507, "learning_rate": 5.762553418803419e-06, "loss": 0.016, "step": 119750 }, { "epoch": 0.8852488099110021, "grad_norm": 0.08391750603914261, "learning_rate": 5.758843779677113e-06, "loss": 0.0151, "step": 119760 }, { "epoch": 0.8853227284823039, "grad_norm": 0.09729672968387604, "learning_rate": 5.755134140550808e-06, "loss": 0.0169, "step": 119770 }, { "epoch": 0.8853966470536058, "grad_norm": 0.06830310076475143, "learning_rate": 5.751424501424502e-06, "loss": 0.0172, "step": 119780 }, { "epoch": 0.8854705656249076, "grad_norm": 0.09163478761911392, "learning_rate": 5.747714862298196e-06, "loss": 0.0181, "step": 119790 }, { "epoch": 0.8855444841962095, "grad_norm": 0.047696489840745926, "learning_rate": 5.7440052231718905e-06, "loss": 0.0178, "step": 119800 }, { "epoch": 0.8856184027675114, "grad_norm": 0.09021651744842529, "learning_rate": 5.740295584045585e-06, "loss": 0.0164, "step": 119810 }, { "epoch": 0.8856923213388131, "grad_norm": 0.0775601714849472, "learning_rate": 5.736585944919279e-06, "loss": 0.0174, "step": 119820 }, { "epoch": 0.885766239910115, "grad_norm": 0.09051928669214249, "learning_rate": 5.7328763057929725e-06, "loss": 0.0192, "step": 119830 }, { "epoch": 0.8858401584814168, "grad_norm": 0.07663477212190628, "learning_rate": 5.729166666666667e-06, "loss": 0.0196, "step": 119840 }, { "epoch": 0.8859140770527187, "grad_norm": 0.09055771678686142, "learning_rate": 5.725457027540361e-06, "loss": 0.0156, "step": 119850 }, { "epoch": 0.8859879956240205, "grad_norm": 0.08544610440731049, "learning_rate": 5.721747388414055e-06, "loss": 0.0187, "step": 119860 }, { "epoch": 0.8860619141953224, "grad_norm": 0.09478023648262024, "learning_rate": 5.7180377492877495e-06, "loss": 0.0182, "step": 119870 }, { "epoch": 0.8861358327666243, "grad_norm": 0.0887848511338234, "learning_rate": 5.714328110161444e-06, "loss": 0.0174, "step": 119880 }, { "epoch": 0.8862097513379261, "grad_norm": 0.10478053241968155, "learning_rate": 5.710618471035138e-06, "loss": 0.0172, "step": 119890 }, { "epoch": 0.886283669909228, "grad_norm": 0.06299924105405807, "learning_rate": 5.706908831908832e-06, "loss": 0.0168, "step": 119900 }, { "epoch": 0.8863575884805298, "grad_norm": 0.07549279928207397, "learning_rate": 5.703199192782526e-06, "loss": 0.0168, "step": 119910 }, { "epoch": 0.8864315070518317, "grad_norm": 0.0627603754401207, "learning_rate": 5.69948955365622e-06, "loss": 0.0166, "step": 119920 }, { "epoch": 0.8865054256231335, "grad_norm": 0.08275282382965088, "learning_rate": 5.695779914529914e-06, "loss": 0.0159, "step": 119930 }, { "epoch": 0.8865793441944354, "grad_norm": 0.06280075758695602, "learning_rate": 5.692070275403609e-06, "loss": 0.0174, "step": 119940 }, { "epoch": 0.8866532627657373, "grad_norm": 0.07194601744413376, "learning_rate": 5.688360636277303e-06, "loss": 0.0166, "step": 119950 }, { "epoch": 0.8867271813370391, "grad_norm": 0.1123146116733551, "learning_rate": 5.684650997150998e-06, "loss": 0.02, "step": 119960 }, { "epoch": 0.886801099908341, "grad_norm": 0.07987431436777115, "learning_rate": 5.680941358024692e-06, "loss": 0.0159, "step": 119970 }, { "epoch": 0.8868750184796428, "grad_norm": 0.07852199673652649, "learning_rate": 5.6772317188983866e-06, "loss": 0.0182, "step": 119980 }, { "epoch": 0.8869489370509447, "grad_norm": 0.08840122073888779, "learning_rate": 5.67352207977208e-06, "loss": 0.0163, "step": 119990 }, { "epoch": 0.8870228556222466, "grad_norm": 0.05927702784538269, "learning_rate": 5.669812440645774e-06, "loss": 0.0161, "step": 120000 }, { "epoch": 0.8870228556222466, "eval_f1": 0.6344552032256219, "eval_loss": 0.016682270914316177, "eval_precision": 0.5062286369185682, "eval_recall": 0.8496762787442149, "eval_runtime": 2912.7696, "eval_samples_per_second": 185.78, "eval_steps_per_second": 2.903, "step": 120000 }, { "epoch": 0.8870967741935484, "grad_norm": 0.09450909495353699, "learning_rate": 5.6661028015194685e-06, "loss": 0.0155, "step": 120010 }, { "epoch": 0.8871706927648503, "grad_norm": 0.06688588857650757, "learning_rate": 5.662393162393163e-06, "loss": 0.0183, "step": 120020 }, { "epoch": 0.8872446113361521, "grad_norm": 0.09937802702188492, "learning_rate": 5.658683523266857e-06, "loss": 0.0192, "step": 120030 }, { "epoch": 0.887318529907454, "grad_norm": 0.06747591495513916, "learning_rate": 5.654973884140551e-06, "loss": 0.0152, "step": 120040 }, { "epoch": 0.8873924484787558, "grad_norm": 0.07164330780506134, "learning_rate": 5.651264245014246e-06, "loss": 0.0153, "step": 120050 }, { "epoch": 0.8874663670500577, "grad_norm": 0.07619501650333405, "learning_rate": 5.647554605887939e-06, "loss": 0.0156, "step": 120060 }, { "epoch": 0.8875402856213596, "grad_norm": 0.07995057851076126, "learning_rate": 5.643844966761633e-06, "loss": 0.0181, "step": 120070 }, { "epoch": 0.8876142041926613, "grad_norm": 0.06575370579957962, "learning_rate": 5.640135327635328e-06, "loss": 0.016, "step": 120080 }, { "epoch": 0.8876881227639632, "grad_norm": 0.09357030689716339, "learning_rate": 5.636425688509022e-06, "loss": 0.0178, "step": 120090 }, { "epoch": 0.887762041335265, "grad_norm": 0.1380130648612976, "learning_rate": 5.632716049382716e-06, "loss": 0.0189, "step": 120100 }, { "epoch": 0.8878359599065669, "grad_norm": 0.08238451927900314, "learning_rate": 5.62900641025641e-06, "loss": 0.016, "step": 120110 }, { "epoch": 0.8879098784778687, "grad_norm": 0.08737955242395401, "learning_rate": 5.625296771130105e-06, "loss": 0.0175, "step": 120120 }, { "epoch": 0.8879837970491706, "grad_norm": 0.08896934986114502, "learning_rate": 5.621587132003799e-06, "loss": 0.0162, "step": 120130 }, { "epoch": 0.8880577156204725, "grad_norm": 0.07118116319179535, "learning_rate": 5.617877492877493e-06, "loss": 0.0168, "step": 120140 }, { "epoch": 0.8881316341917743, "grad_norm": 0.08958224952220917, "learning_rate": 5.6141678537511875e-06, "loss": 0.0165, "step": 120150 }, { "epoch": 0.8882055527630762, "grad_norm": 0.08103135973215103, "learning_rate": 5.610458214624882e-06, "loss": 0.0171, "step": 120160 }, { "epoch": 0.888279471334378, "grad_norm": 0.04269809275865555, "learning_rate": 5.606748575498576e-06, "loss": 0.0155, "step": 120170 }, { "epoch": 0.8883533899056799, "grad_norm": 0.08776000887155533, "learning_rate": 5.60303893637227e-06, "loss": 0.0155, "step": 120180 }, { "epoch": 0.8884273084769817, "grad_norm": 0.09480982273817062, "learning_rate": 5.599329297245965e-06, "loss": 0.0152, "step": 120190 }, { "epoch": 0.8885012270482836, "grad_norm": 0.09173163771629333, "learning_rate": 5.595619658119659e-06, "loss": 0.0182, "step": 120200 }, { "epoch": 0.8885751456195855, "grad_norm": 0.08384384214878082, "learning_rate": 5.591910018993353e-06, "loss": 0.0178, "step": 120210 }, { "epoch": 0.8886490641908873, "grad_norm": 0.08577314764261246, "learning_rate": 5.5882003798670466e-06, "loss": 0.0193, "step": 120220 }, { "epoch": 0.8887229827621892, "grad_norm": 0.05332199111580849, "learning_rate": 5.584490740740741e-06, "loss": 0.0142, "step": 120230 }, { "epoch": 0.888796901333491, "grad_norm": 0.06766955554485321, "learning_rate": 5.580781101614435e-06, "loss": 0.0174, "step": 120240 }, { "epoch": 0.8888708199047929, "grad_norm": 0.0663696676492691, "learning_rate": 5.577071462488129e-06, "loss": 0.0172, "step": 120250 }, { "epoch": 0.8889447384760948, "grad_norm": 0.0493619330227375, "learning_rate": 5.573361823361824e-06, "loss": 0.0148, "step": 120260 }, { "epoch": 0.8890186570473966, "grad_norm": 0.08228102326393127, "learning_rate": 5.569652184235518e-06, "loss": 0.0193, "step": 120270 }, { "epoch": 0.8890925756186985, "grad_norm": 0.07778341323137283, "learning_rate": 5.565942545109212e-06, "loss": 0.0175, "step": 120280 }, { "epoch": 0.8891664941900003, "grad_norm": 0.09558829665184021, "learning_rate": 5.562232905982906e-06, "loss": 0.0186, "step": 120290 }, { "epoch": 0.8892404127613022, "grad_norm": 0.09294694662094116, "learning_rate": 5.5585232668566e-06, "loss": 0.0149, "step": 120300 }, { "epoch": 0.889314331332604, "grad_norm": 0.08595292270183563, "learning_rate": 5.554813627730294e-06, "loss": 0.0163, "step": 120310 }, { "epoch": 0.8893882499039059, "grad_norm": 0.052427005022764206, "learning_rate": 5.5511039886039884e-06, "loss": 0.0158, "step": 120320 }, { "epoch": 0.8894621684752078, "grad_norm": 0.09456674754619598, "learning_rate": 5.547394349477683e-06, "loss": 0.0172, "step": 120330 }, { "epoch": 0.8895360870465095, "grad_norm": 0.07085176557302475, "learning_rate": 5.543684710351377e-06, "loss": 0.0176, "step": 120340 }, { "epoch": 0.8896100056178115, "grad_norm": 0.10055804252624512, "learning_rate": 5.539975071225072e-06, "loss": 0.0163, "step": 120350 }, { "epoch": 0.8896839241891132, "grad_norm": 0.08094070106744766, "learning_rate": 5.536265432098766e-06, "loss": 0.0184, "step": 120360 }, { "epoch": 0.8897578427604151, "grad_norm": 0.07718300074338913, "learning_rate": 5.53255579297246e-06, "loss": 0.0169, "step": 120370 }, { "epoch": 0.8898317613317169, "grad_norm": 0.07198705524206161, "learning_rate": 5.528846153846154e-06, "loss": 0.0161, "step": 120380 }, { "epoch": 0.8899056799030188, "grad_norm": 0.09524102509021759, "learning_rate": 5.525136514719848e-06, "loss": 0.0176, "step": 120390 }, { "epoch": 0.8899795984743207, "grad_norm": 0.06162939593195915, "learning_rate": 5.521426875593543e-06, "loss": 0.0158, "step": 120400 }, { "epoch": 0.8900535170456225, "grad_norm": 0.0801302120089531, "learning_rate": 5.517717236467237e-06, "loss": 0.0147, "step": 120410 }, { "epoch": 0.8901274356169244, "grad_norm": 0.07829449325799942, "learning_rate": 5.514007597340931e-06, "loss": 0.0172, "step": 120420 }, { "epoch": 0.8902013541882262, "grad_norm": 0.11463841050863266, "learning_rate": 5.5102979582146254e-06, "loss": 0.0199, "step": 120430 }, { "epoch": 0.8902752727595281, "grad_norm": 0.09118429571390152, "learning_rate": 5.50658831908832e-06, "loss": 0.0162, "step": 120440 }, { "epoch": 0.8903491913308299, "grad_norm": 0.07519903779029846, "learning_rate": 5.502878679962013e-06, "loss": 0.0173, "step": 120450 }, { "epoch": 0.8904231099021318, "grad_norm": 0.08263619244098663, "learning_rate": 5.499169040835707e-06, "loss": 0.0167, "step": 120460 }, { "epoch": 0.8904970284734337, "grad_norm": 0.0880921334028244, "learning_rate": 5.495459401709402e-06, "loss": 0.0188, "step": 120470 }, { "epoch": 0.8905709470447355, "grad_norm": 0.07637332379817963, "learning_rate": 5.491749762583096e-06, "loss": 0.0168, "step": 120480 }, { "epoch": 0.8906448656160374, "grad_norm": 0.08179379999637604, "learning_rate": 5.48804012345679e-06, "loss": 0.0144, "step": 120490 }, { "epoch": 0.8907187841873392, "grad_norm": 0.0767722874879837, "learning_rate": 5.4843304843304845e-06, "loss": 0.0145, "step": 120500 }, { "epoch": 0.8907927027586411, "grad_norm": 0.05685288831591606, "learning_rate": 5.480620845204179e-06, "loss": 0.0156, "step": 120510 }, { "epoch": 0.890866621329943, "grad_norm": 0.0973455086350441, "learning_rate": 5.476911206077873e-06, "loss": 0.0157, "step": 120520 }, { "epoch": 0.8909405399012448, "grad_norm": 0.06820717453956604, "learning_rate": 5.473201566951567e-06, "loss": 0.0164, "step": 120530 }, { "epoch": 0.8910144584725467, "grad_norm": 0.08437089622020721, "learning_rate": 5.469491927825262e-06, "loss": 0.0166, "step": 120540 }, { "epoch": 0.8910883770438485, "grad_norm": 0.08168923109769821, "learning_rate": 5.465782288698956e-06, "loss": 0.017, "step": 120550 }, { "epoch": 0.8911622956151504, "grad_norm": 0.05618911609053612, "learning_rate": 5.46207264957265e-06, "loss": 0.0176, "step": 120560 }, { "epoch": 0.8912362141864522, "grad_norm": 0.06802624464035034, "learning_rate": 5.458363010446344e-06, "loss": 0.0191, "step": 120570 }, { "epoch": 0.8913101327577541, "grad_norm": 0.07354087382555008, "learning_rate": 5.454653371320039e-06, "loss": 0.0166, "step": 120580 }, { "epoch": 0.891384051329056, "grad_norm": 0.08429256081581116, "learning_rate": 5.450943732193733e-06, "loss": 0.0163, "step": 120590 }, { "epoch": 0.8914579699003577, "grad_norm": 0.06684058159589767, "learning_rate": 5.447234093067426e-06, "loss": 0.0152, "step": 120600 }, { "epoch": 0.8915318884716597, "grad_norm": 0.06897986680269241, "learning_rate": 5.443524453941121e-06, "loss": 0.0159, "step": 120610 }, { "epoch": 0.8916058070429614, "grad_norm": 0.07506728172302246, "learning_rate": 5.439814814814815e-06, "loss": 0.0179, "step": 120620 }, { "epoch": 0.8916797256142633, "grad_norm": 0.08993647992610931, "learning_rate": 5.436105175688509e-06, "loss": 0.0175, "step": 120630 }, { "epoch": 0.8917536441855651, "grad_norm": 0.0962747111916542, "learning_rate": 5.4323955365622035e-06, "loss": 0.0189, "step": 120640 }, { "epoch": 0.891827562756867, "grad_norm": 0.07659200578927994, "learning_rate": 5.428685897435898e-06, "loss": 0.0184, "step": 120650 }, { "epoch": 0.8919014813281689, "grad_norm": 0.10518428683280945, "learning_rate": 5.424976258309592e-06, "loss": 0.017, "step": 120660 }, { "epoch": 0.8919753998994707, "grad_norm": 0.07588525116443634, "learning_rate": 5.421266619183286e-06, "loss": 0.0189, "step": 120670 }, { "epoch": 0.8920493184707726, "grad_norm": 0.06094416230916977, "learning_rate": 5.41755698005698e-06, "loss": 0.0155, "step": 120680 }, { "epoch": 0.8921232370420744, "grad_norm": 0.07991020381450653, "learning_rate": 5.413847340930674e-06, "loss": 0.0156, "step": 120690 }, { "epoch": 0.8921971556133763, "grad_norm": 0.09702847898006439, "learning_rate": 5.410137701804368e-06, "loss": 0.018, "step": 120700 }, { "epoch": 0.8922710741846781, "grad_norm": 0.07065367698669434, "learning_rate": 5.4064280626780625e-06, "loss": 0.0174, "step": 120710 }, { "epoch": 0.89234499275598, "grad_norm": 0.0530664436519146, "learning_rate": 5.402718423551757e-06, "loss": 0.0159, "step": 120720 }, { "epoch": 0.8924189113272819, "grad_norm": 0.08903425186872482, "learning_rate": 5.399008784425452e-06, "loss": 0.018, "step": 120730 }, { "epoch": 0.8924928298985837, "grad_norm": 0.07997802644968033, "learning_rate": 5.395299145299146e-06, "loss": 0.0177, "step": 120740 }, { "epoch": 0.8925667484698856, "grad_norm": 0.06450887769460678, "learning_rate": 5.39158950617284e-06, "loss": 0.0171, "step": 120750 }, { "epoch": 0.8926406670411874, "grad_norm": 0.06914730370044708, "learning_rate": 5.387879867046534e-06, "loss": 0.0138, "step": 120760 }, { "epoch": 0.8927145856124893, "grad_norm": 0.05761079117655754, "learning_rate": 5.384170227920228e-06, "loss": 0.0166, "step": 120770 }, { "epoch": 0.8927885041837912, "grad_norm": 0.07949425280094147, "learning_rate": 5.3804605887939225e-06, "loss": 0.0163, "step": 120780 }, { "epoch": 0.892862422755093, "grad_norm": 0.06849084794521332, "learning_rate": 5.376750949667617e-06, "loss": 0.0158, "step": 120790 }, { "epoch": 0.8929363413263949, "grad_norm": 0.07909443974494934, "learning_rate": 5.373041310541311e-06, "loss": 0.0178, "step": 120800 }, { "epoch": 0.8930102598976967, "grad_norm": 0.08381883054971695, "learning_rate": 5.369331671415005e-06, "loss": 0.017, "step": 120810 }, { "epoch": 0.8930841784689986, "grad_norm": 0.0874544307589531, "learning_rate": 5.3656220322886995e-06, "loss": 0.0179, "step": 120820 }, { "epoch": 0.8931580970403004, "grad_norm": 0.08168869465589523, "learning_rate": 5.361912393162393e-06, "loss": 0.0206, "step": 120830 }, { "epoch": 0.8932320156116023, "grad_norm": 0.10479090362787247, "learning_rate": 5.358202754036087e-06, "loss": 0.0195, "step": 120840 }, { "epoch": 0.8933059341829042, "grad_norm": 0.10459177941083908, "learning_rate": 5.3544931149097815e-06, "loss": 0.019, "step": 120850 }, { "epoch": 0.893379852754206, "grad_norm": 0.10548960417509079, "learning_rate": 5.350783475783476e-06, "loss": 0.0166, "step": 120860 }, { "epoch": 0.8934537713255079, "grad_norm": 0.06455662101507187, "learning_rate": 5.34707383665717e-06, "loss": 0.015, "step": 120870 }, { "epoch": 0.8935276898968096, "grad_norm": 0.10260767489671707, "learning_rate": 5.343364197530864e-06, "loss": 0.016, "step": 120880 }, { "epoch": 0.8936016084681115, "grad_norm": 0.09701945632696152, "learning_rate": 5.339654558404559e-06, "loss": 0.0188, "step": 120890 }, { "epoch": 0.8936755270394133, "grad_norm": 0.09393393993377686, "learning_rate": 5.335944919278253e-06, "loss": 0.0213, "step": 120900 }, { "epoch": 0.8937494456107152, "grad_norm": 0.06040528416633606, "learning_rate": 5.332235280151947e-06, "loss": 0.0145, "step": 120910 }, { "epoch": 0.8938233641820171, "grad_norm": 0.0938306599855423, "learning_rate": 5.3285256410256414e-06, "loss": 0.0176, "step": 120920 }, { "epoch": 0.8938972827533189, "grad_norm": 0.10529477894306183, "learning_rate": 5.324816001899336e-06, "loss": 0.0161, "step": 120930 }, { "epoch": 0.8939712013246208, "grad_norm": 0.08327803015708923, "learning_rate": 5.32110636277303e-06, "loss": 0.0193, "step": 120940 }, { "epoch": 0.8940451198959226, "grad_norm": 0.09573381394147873, "learning_rate": 5.317396723646724e-06, "loss": 0.0188, "step": 120950 }, { "epoch": 0.8941190384672245, "grad_norm": 0.06852541863918304, "learning_rate": 5.3136870845204185e-06, "loss": 0.0154, "step": 120960 }, { "epoch": 0.8941929570385264, "grad_norm": 0.08873478323221207, "learning_rate": 5.309977445394113e-06, "loss": 0.0166, "step": 120970 }, { "epoch": 0.8942668756098282, "grad_norm": 0.09256771206855774, "learning_rate": 5.306267806267807e-06, "loss": 0.0173, "step": 120980 }, { "epoch": 0.8943407941811301, "grad_norm": 0.08251002430915833, "learning_rate": 5.3025581671415005e-06, "loss": 0.0157, "step": 120990 }, { "epoch": 0.8944147127524319, "grad_norm": 0.0775420218706131, "learning_rate": 5.298848528015195e-06, "loss": 0.0176, "step": 121000 }, { "epoch": 0.8944886313237338, "grad_norm": 0.05607917532324791, "learning_rate": 5.295138888888889e-06, "loss": 0.0181, "step": 121010 }, { "epoch": 0.8945625498950356, "grad_norm": 0.10188756138086319, "learning_rate": 5.291429249762583e-06, "loss": 0.0164, "step": 121020 }, { "epoch": 0.8946364684663375, "grad_norm": 0.0746680498123169, "learning_rate": 5.287719610636278e-06, "loss": 0.0157, "step": 121030 }, { "epoch": 0.8947103870376394, "grad_norm": 0.07992107421159744, "learning_rate": 5.284009971509972e-06, "loss": 0.0147, "step": 121040 }, { "epoch": 0.8947843056089412, "grad_norm": 0.08044373244047165, "learning_rate": 5.280300332383666e-06, "loss": 0.0157, "step": 121050 }, { "epoch": 0.8948582241802431, "grad_norm": 0.07756134122610092, "learning_rate": 5.2765906932573596e-06, "loss": 0.0178, "step": 121060 }, { "epoch": 0.8949321427515449, "grad_norm": 0.08153444528579712, "learning_rate": 5.272881054131054e-06, "loss": 0.0156, "step": 121070 }, { "epoch": 0.8950060613228468, "grad_norm": 0.06693438440561295, "learning_rate": 5.269171415004748e-06, "loss": 0.0167, "step": 121080 }, { "epoch": 0.8950799798941486, "grad_norm": 0.07793907821178436, "learning_rate": 5.265461775878442e-06, "loss": 0.0166, "step": 121090 }, { "epoch": 0.8951538984654505, "grad_norm": 0.06497281044721603, "learning_rate": 5.261752136752137e-06, "loss": 0.0153, "step": 121100 }, { "epoch": 0.8952278170367524, "grad_norm": 0.0652812048792839, "learning_rate": 5.258042497625832e-06, "loss": 0.016, "step": 121110 }, { "epoch": 0.8953017356080542, "grad_norm": 0.08423350006341934, "learning_rate": 5.254332858499526e-06, "loss": 0.0183, "step": 121120 }, { "epoch": 0.895375654179356, "grad_norm": 0.09293195605278015, "learning_rate": 5.25062321937322e-06, "loss": 0.0187, "step": 121130 }, { "epoch": 0.8954495727506578, "grad_norm": 0.07499924302101135, "learning_rate": 5.246913580246914e-06, "loss": 0.0153, "step": 121140 }, { "epoch": 0.8955234913219597, "grad_norm": 0.07277142256498337, "learning_rate": 5.243203941120608e-06, "loss": 0.0164, "step": 121150 }, { "epoch": 0.8955974098932615, "grad_norm": 0.07718726992607117, "learning_rate": 5.239494301994302e-06, "loss": 0.019, "step": 121160 }, { "epoch": 0.8956713284645634, "grad_norm": 0.07208777219057083, "learning_rate": 5.2357846628679966e-06, "loss": 0.0162, "step": 121170 }, { "epoch": 0.8957452470358653, "grad_norm": 0.06988029927015305, "learning_rate": 5.232075023741691e-06, "loss": 0.0161, "step": 121180 }, { "epoch": 0.8958191656071671, "grad_norm": 0.07956439256668091, "learning_rate": 5.228365384615385e-06, "loss": 0.0144, "step": 121190 }, { "epoch": 0.895893084178469, "grad_norm": 0.062169358134269714, "learning_rate": 5.224655745489079e-06, "loss": 0.0178, "step": 121200 }, { "epoch": 0.8959670027497708, "grad_norm": 0.07907166332006454, "learning_rate": 5.220946106362774e-06, "loss": 0.0166, "step": 121210 }, { "epoch": 0.8960409213210727, "grad_norm": 0.07376344501972198, "learning_rate": 5.217236467236467e-06, "loss": 0.0151, "step": 121220 }, { "epoch": 0.8961148398923746, "grad_norm": 0.08888218551874161, "learning_rate": 5.213526828110161e-06, "loss": 0.0186, "step": 121230 }, { "epoch": 0.8961887584636764, "grad_norm": 0.062063366174697876, "learning_rate": 5.209817188983856e-06, "loss": 0.0143, "step": 121240 }, { "epoch": 0.8962626770349783, "grad_norm": 0.09087532758712769, "learning_rate": 5.20610754985755e-06, "loss": 0.0159, "step": 121250 }, { "epoch": 0.8963365956062801, "grad_norm": 0.08101866394281387, "learning_rate": 5.202397910731244e-06, "loss": 0.0178, "step": 121260 }, { "epoch": 0.896410514177582, "grad_norm": 0.07356608659029007, "learning_rate": 5.1986882716049384e-06, "loss": 0.0159, "step": 121270 }, { "epoch": 0.8964844327488838, "grad_norm": 0.07567834854125977, "learning_rate": 5.194978632478633e-06, "loss": 0.018, "step": 121280 }, { "epoch": 0.8965583513201857, "grad_norm": 0.08786658197641373, "learning_rate": 5.191268993352327e-06, "loss": 0.0186, "step": 121290 }, { "epoch": 0.8966322698914876, "grad_norm": 0.0797184482216835, "learning_rate": 5.187559354226021e-06, "loss": 0.0166, "step": 121300 }, { "epoch": 0.8967061884627894, "grad_norm": 0.08069688081741333, "learning_rate": 5.1838497150997155e-06, "loss": 0.0193, "step": 121310 }, { "epoch": 0.8967801070340913, "grad_norm": 0.07624389231204987, "learning_rate": 5.18014007597341e-06, "loss": 0.0163, "step": 121320 }, { "epoch": 0.8968540256053931, "grad_norm": 0.06398090720176697, "learning_rate": 5.176430436847104e-06, "loss": 0.0175, "step": 121330 }, { "epoch": 0.896927944176695, "grad_norm": 0.09289465844631195, "learning_rate": 5.172720797720798e-06, "loss": 0.0161, "step": 121340 }, { "epoch": 0.8970018627479968, "grad_norm": 0.07656008005142212, "learning_rate": 5.169011158594493e-06, "loss": 0.017, "step": 121350 }, { "epoch": 0.8970757813192987, "grad_norm": 0.08518286049365997, "learning_rate": 5.165301519468187e-06, "loss": 0.0161, "step": 121360 }, { "epoch": 0.8971496998906006, "grad_norm": 0.06318601220846176, "learning_rate": 5.16159188034188e-06, "loss": 0.0172, "step": 121370 }, { "epoch": 0.8972236184619024, "grad_norm": 0.07610304653644562, "learning_rate": 5.157882241215575e-06, "loss": 0.0164, "step": 121380 }, { "epoch": 0.8972975370332043, "grad_norm": 0.08971790969371796, "learning_rate": 5.154172602089269e-06, "loss": 0.0206, "step": 121390 }, { "epoch": 0.897371455604506, "grad_norm": 0.07047516852617264, "learning_rate": 5.150462962962963e-06, "loss": 0.0166, "step": 121400 }, { "epoch": 0.8974453741758079, "grad_norm": 0.08966632932424545, "learning_rate": 5.146753323836657e-06, "loss": 0.016, "step": 121410 }, { "epoch": 0.8975192927471097, "grad_norm": 0.07193748652935028, "learning_rate": 5.143043684710352e-06, "loss": 0.014, "step": 121420 }, { "epoch": 0.8975932113184116, "grad_norm": 0.07095572352409363, "learning_rate": 5.139334045584046e-06, "loss": 0.0152, "step": 121430 }, { "epoch": 0.8976671298897135, "grad_norm": 0.08083576709032059, "learning_rate": 5.13562440645774e-06, "loss": 0.016, "step": 121440 }, { "epoch": 0.8977410484610153, "grad_norm": 0.0850079134106636, "learning_rate": 5.131914767331434e-06, "loss": 0.0168, "step": 121450 }, { "epoch": 0.8978149670323172, "grad_norm": 0.05830361321568489, "learning_rate": 5.128205128205128e-06, "loss": 0.0155, "step": 121460 }, { "epoch": 0.897888885603619, "grad_norm": 0.06578114628791809, "learning_rate": 5.124495489078822e-06, "loss": 0.0185, "step": 121470 }, { "epoch": 0.8979628041749209, "grad_norm": 0.087006576359272, "learning_rate": 5.1207858499525165e-06, "loss": 0.0183, "step": 121480 }, { "epoch": 0.8980367227462228, "grad_norm": 0.06513582915067673, "learning_rate": 5.117076210826211e-06, "loss": 0.0161, "step": 121490 }, { "epoch": 0.8981106413175246, "grad_norm": 0.08822928369045258, "learning_rate": 5.113366571699906e-06, "loss": 0.0195, "step": 121500 }, { "epoch": 0.8981845598888265, "grad_norm": 0.06557939201593399, "learning_rate": 5.1096569325736e-06, "loss": 0.0178, "step": 121510 }, { "epoch": 0.8982584784601283, "grad_norm": 0.09345387667417526, "learning_rate": 5.1059472934472936e-06, "loss": 0.0184, "step": 121520 }, { "epoch": 0.8983323970314302, "grad_norm": 0.10218565911054611, "learning_rate": 5.102237654320988e-06, "loss": 0.0198, "step": 121530 }, { "epoch": 0.898406315602732, "grad_norm": 0.07959327101707458, "learning_rate": 5.098528015194682e-06, "loss": 0.0178, "step": 121540 }, { "epoch": 0.8984802341740339, "grad_norm": 0.06820554286241531, "learning_rate": 5.094818376068376e-06, "loss": 0.0144, "step": 121550 }, { "epoch": 0.8985541527453358, "grad_norm": 0.09927608072757721, "learning_rate": 5.091108736942071e-06, "loss": 0.0159, "step": 121560 }, { "epoch": 0.8986280713166376, "grad_norm": 0.07535196840763092, "learning_rate": 5.087399097815765e-06, "loss": 0.0147, "step": 121570 }, { "epoch": 0.8987019898879395, "grad_norm": 0.06772439926862717, "learning_rate": 5.083689458689459e-06, "loss": 0.0161, "step": 121580 }, { "epoch": 0.8987759084592413, "grad_norm": 0.06075502559542656, "learning_rate": 5.0799798195631535e-06, "loss": 0.0164, "step": 121590 }, { "epoch": 0.8988498270305432, "grad_norm": 0.09440341591835022, "learning_rate": 5.076270180436847e-06, "loss": 0.0165, "step": 121600 }, { "epoch": 0.898923745601845, "grad_norm": 0.0991566926240921, "learning_rate": 5.072560541310541e-06, "loss": 0.0167, "step": 121610 }, { "epoch": 0.8989976641731469, "grad_norm": 0.05196934938430786, "learning_rate": 5.0688509021842354e-06, "loss": 0.0187, "step": 121620 }, { "epoch": 0.8990715827444488, "grad_norm": 0.07425817847251892, "learning_rate": 5.06514126305793e-06, "loss": 0.0174, "step": 121630 }, { "epoch": 0.8991455013157506, "grad_norm": 0.07375023514032364, "learning_rate": 5.061431623931624e-06, "loss": 0.0155, "step": 121640 }, { "epoch": 0.8992194198870525, "grad_norm": 0.07478194683790207, "learning_rate": 5.057721984805318e-06, "loss": 0.0172, "step": 121650 }, { "epoch": 0.8992933384583542, "grad_norm": 0.07814273983240128, "learning_rate": 5.0540123456790125e-06, "loss": 0.0163, "step": 121660 }, { "epoch": 0.8993672570296561, "grad_norm": 0.09756813198328018, "learning_rate": 5.050302706552707e-06, "loss": 0.0163, "step": 121670 }, { "epoch": 0.8994411756009579, "grad_norm": 0.09019820392131805, "learning_rate": 5.046593067426401e-06, "loss": 0.0189, "step": 121680 }, { "epoch": 0.8995150941722598, "grad_norm": 0.07590436935424805, "learning_rate": 5.042883428300095e-06, "loss": 0.0189, "step": 121690 }, { "epoch": 0.8995890127435617, "grad_norm": 0.05467082932591438, "learning_rate": 5.03917378917379e-06, "loss": 0.0158, "step": 121700 }, { "epoch": 0.8996629313148635, "grad_norm": 0.07867514342069626, "learning_rate": 5.035464150047484e-06, "loss": 0.0178, "step": 121710 }, { "epoch": 0.8997368498861654, "grad_norm": 0.07019774615764618, "learning_rate": 5.031754510921178e-06, "loss": 0.0143, "step": 121720 }, { "epoch": 0.8998107684574672, "grad_norm": 0.06641940027475357, "learning_rate": 5.0280448717948725e-06, "loss": 0.0175, "step": 121730 }, { "epoch": 0.8998846870287691, "grad_norm": 0.07111826539039612, "learning_rate": 5.024335232668567e-06, "loss": 0.0172, "step": 121740 }, { "epoch": 0.899958605600071, "grad_norm": 0.08311285078525543, "learning_rate": 5.02062559354226e-06, "loss": 0.0177, "step": 121750 }, { "epoch": 0.9000325241713728, "grad_norm": 0.06598973274230957, "learning_rate": 5.016915954415954e-06, "loss": 0.0163, "step": 121760 }, { "epoch": 0.9001064427426747, "grad_norm": 0.06733829528093338, "learning_rate": 5.013206315289649e-06, "loss": 0.0153, "step": 121770 }, { "epoch": 0.9001803613139765, "grad_norm": 0.06981953978538513, "learning_rate": 5.009496676163343e-06, "loss": 0.0171, "step": 121780 }, { "epoch": 0.9002542798852784, "grad_norm": 0.10032287985086441, "learning_rate": 5.005787037037037e-06, "loss": 0.0158, "step": 121790 }, { "epoch": 0.9003281984565802, "grad_norm": 0.07456158101558685, "learning_rate": 5.0020773979107315e-06, "loss": 0.0177, "step": 121800 }, { "epoch": 0.9004021170278821, "grad_norm": 0.0792987272143364, "learning_rate": 4.998367758784426e-06, "loss": 0.0165, "step": 121810 }, { "epoch": 0.900476035599184, "grad_norm": 0.07135865837335587, "learning_rate": 4.99465811965812e-06, "loss": 0.0171, "step": 121820 }, { "epoch": 0.9005499541704858, "grad_norm": 0.08674946427345276, "learning_rate": 4.9909484805318135e-06, "loss": 0.0175, "step": 121830 }, { "epoch": 0.9006238727417877, "grad_norm": 0.06915190815925598, "learning_rate": 4.987238841405508e-06, "loss": 0.016, "step": 121840 }, { "epoch": 0.9006977913130895, "grad_norm": 0.09359006583690643, "learning_rate": 4.983529202279202e-06, "loss": 0.0159, "step": 121850 }, { "epoch": 0.9007717098843914, "grad_norm": 0.08661477267742157, "learning_rate": 4.979819563152896e-06, "loss": 0.0175, "step": 121860 }, { "epoch": 0.9008456284556932, "grad_norm": 0.06485380232334137, "learning_rate": 4.976109924026591e-06, "loss": 0.0171, "step": 121870 }, { "epoch": 0.9009195470269951, "grad_norm": 0.0564052015542984, "learning_rate": 4.972400284900286e-06, "loss": 0.0157, "step": 121880 }, { "epoch": 0.900993465598297, "grad_norm": 0.06444479525089264, "learning_rate": 4.96869064577398e-06, "loss": 0.0175, "step": 121890 }, { "epoch": 0.9010673841695988, "grad_norm": 0.0819358304142952, "learning_rate": 4.964981006647674e-06, "loss": 0.0177, "step": 121900 }, { "epoch": 0.9011413027409007, "grad_norm": 0.10272148251533508, "learning_rate": 4.961271367521368e-06, "loss": 0.0178, "step": 121910 }, { "epoch": 0.9012152213122024, "grad_norm": 0.07735146582126617, "learning_rate": 4.957561728395062e-06, "loss": 0.0184, "step": 121920 }, { "epoch": 0.9012891398835043, "grad_norm": 0.06250828504562378, "learning_rate": 4.953852089268756e-06, "loss": 0.0157, "step": 121930 }, { "epoch": 0.9013630584548061, "grad_norm": 0.05823352932929993, "learning_rate": 4.9501424501424505e-06, "loss": 0.0142, "step": 121940 }, { "epoch": 0.901436977026108, "grad_norm": 0.09979364275932312, "learning_rate": 4.946432811016145e-06, "loss": 0.0142, "step": 121950 }, { "epoch": 0.9015108955974099, "grad_norm": 0.07167592644691467, "learning_rate": 4.942723171889839e-06, "loss": 0.0169, "step": 121960 }, { "epoch": 0.9015848141687117, "grad_norm": 0.09357810020446777, "learning_rate": 4.939013532763533e-06, "loss": 0.016, "step": 121970 }, { "epoch": 0.9016587327400136, "grad_norm": 0.06767541170120239, "learning_rate": 4.935303893637227e-06, "loss": 0.0159, "step": 121980 }, { "epoch": 0.9017326513113154, "grad_norm": 0.06723565608263016, "learning_rate": 4.931594254510921e-06, "loss": 0.0161, "step": 121990 }, { "epoch": 0.9018065698826173, "grad_norm": 0.08712873607873917, "learning_rate": 4.927884615384615e-06, "loss": 0.0157, "step": 122000 }, { "epoch": 0.9018804884539192, "grad_norm": 0.08881501108407974, "learning_rate": 4.9241749762583096e-06, "loss": 0.0182, "step": 122010 }, { "epoch": 0.901954407025221, "grad_norm": 0.08723103255033493, "learning_rate": 4.920465337132004e-06, "loss": 0.0193, "step": 122020 }, { "epoch": 0.9020283255965229, "grad_norm": 0.09474994242191315, "learning_rate": 4.916755698005698e-06, "loss": 0.0147, "step": 122030 }, { "epoch": 0.9021022441678247, "grad_norm": 0.0643463209271431, "learning_rate": 4.913046058879392e-06, "loss": 0.0192, "step": 122040 }, { "epoch": 0.9021761627391266, "grad_norm": 0.06547664850950241, "learning_rate": 4.909336419753087e-06, "loss": 0.0173, "step": 122050 }, { "epoch": 0.9022500813104284, "grad_norm": 0.08374817669391632, "learning_rate": 4.905626780626781e-06, "loss": 0.0194, "step": 122060 }, { "epoch": 0.9023239998817303, "grad_norm": 0.07471413910388947, "learning_rate": 4.901917141500475e-06, "loss": 0.0169, "step": 122070 }, { "epoch": 0.9023979184530322, "grad_norm": 0.07656655460596085, "learning_rate": 4.8982075023741695e-06, "loss": 0.0154, "step": 122080 }, { "epoch": 0.902471837024334, "grad_norm": 0.07885884493589401, "learning_rate": 4.894497863247864e-06, "loss": 0.0161, "step": 122090 }, { "epoch": 0.9025457555956359, "grad_norm": 0.07954913377761841, "learning_rate": 4.890788224121558e-06, "loss": 0.0171, "step": 122100 }, { "epoch": 0.9026196741669377, "grad_norm": 0.10426130145788193, "learning_rate": 4.887078584995252e-06, "loss": 0.0195, "step": 122110 }, { "epoch": 0.9026935927382396, "grad_norm": 0.08550582826137543, "learning_rate": 4.8833689458689466e-06, "loss": 0.0189, "step": 122120 }, { "epoch": 0.9027675113095414, "grad_norm": 0.08839669823646545, "learning_rate": 4.879659306742641e-06, "loss": 0.0167, "step": 122130 }, { "epoch": 0.9028414298808433, "grad_norm": 0.0725245326757431, "learning_rate": 4.875949667616334e-06, "loss": 0.0164, "step": 122140 }, { "epoch": 0.9029153484521452, "grad_norm": 0.061979878693819046, "learning_rate": 4.8722400284900285e-06, "loss": 0.0162, "step": 122150 }, { "epoch": 0.902989267023447, "grad_norm": 0.059237040579319, "learning_rate": 4.868530389363723e-06, "loss": 0.0177, "step": 122160 }, { "epoch": 0.9030631855947489, "grad_norm": 0.07624328136444092, "learning_rate": 4.864820750237417e-06, "loss": 0.0179, "step": 122170 }, { "epoch": 0.9031371041660506, "grad_norm": 0.09410177916288376, "learning_rate": 4.861111111111111e-06, "loss": 0.0184, "step": 122180 }, { "epoch": 0.9032110227373525, "grad_norm": 0.09074148535728455, "learning_rate": 4.857401471984806e-06, "loss": 0.0158, "step": 122190 }, { "epoch": 0.9032849413086543, "grad_norm": 0.07468174397945404, "learning_rate": 4.8536918328585e-06, "loss": 0.0165, "step": 122200 }, { "epoch": 0.9033588598799562, "grad_norm": 0.08903239667415619, "learning_rate": 4.849982193732193e-06, "loss": 0.0175, "step": 122210 }, { "epoch": 0.9034327784512581, "grad_norm": 0.065603107213974, "learning_rate": 4.846272554605888e-06, "loss": 0.0148, "step": 122220 }, { "epoch": 0.9035066970225599, "grad_norm": 0.08339151740074158, "learning_rate": 4.842562915479582e-06, "loss": 0.015, "step": 122230 }, { "epoch": 0.9035806155938618, "grad_norm": 0.0531030036509037, "learning_rate": 4.838853276353276e-06, "loss": 0.0184, "step": 122240 }, { "epoch": 0.9036545341651636, "grad_norm": 0.07239489257335663, "learning_rate": 4.83514363722697e-06, "loss": 0.0178, "step": 122250 }, { "epoch": 0.9037284527364655, "grad_norm": 0.09008802473545074, "learning_rate": 4.8314339981006655e-06, "loss": 0.0168, "step": 122260 }, { "epoch": 0.9038023713077674, "grad_norm": 0.06578698009252548, "learning_rate": 4.82772435897436e-06, "loss": 0.0172, "step": 122270 }, { "epoch": 0.9038762898790692, "grad_norm": 0.0775492787361145, "learning_rate": 4.824014719848054e-06, "loss": 0.0168, "step": 122280 }, { "epoch": 0.9039502084503711, "grad_norm": 0.09722190350294113, "learning_rate": 4.8203050807217475e-06, "loss": 0.0181, "step": 122290 }, { "epoch": 0.9040241270216729, "grad_norm": 0.09262166172266006, "learning_rate": 4.816595441595442e-06, "loss": 0.0154, "step": 122300 }, { "epoch": 0.9040980455929748, "grad_norm": 0.05689336732029915, "learning_rate": 4.812885802469136e-06, "loss": 0.0155, "step": 122310 }, { "epoch": 0.9041719641642766, "grad_norm": 0.07098221778869629, "learning_rate": 4.80917616334283e-06, "loss": 0.0177, "step": 122320 }, { "epoch": 0.9042458827355785, "grad_norm": 0.08449529111385345, "learning_rate": 4.805466524216525e-06, "loss": 0.016, "step": 122330 }, { "epoch": 0.9043198013068804, "grad_norm": 0.061818934977054596, "learning_rate": 4.801756885090219e-06, "loss": 0.0158, "step": 122340 }, { "epoch": 0.9043937198781822, "grad_norm": 0.052378345280885696, "learning_rate": 4.798047245963913e-06, "loss": 0.0171, "step": 122350 }, { "epoch": 0.9044676384494841, "grad_norm": 0.08641374856233597, "learning_rate": 4.794337606837607e-06, "loss": 0.016, "step": 122360 }, { "epoch": 0.9045415570207859, "grad_norm": 0.07966215908527374, "learning_rate": 4.790627967711301e-06, "loss": 0.0163, "step": 122370 }, { "epoch": 0.9046154755920878, "grad_norm": 0.06808850914239883, "learning_rate": 4.786918328584995e-06, "loss": 0.0174, "step": 122380 }, { "epoch": 0.9046893941633896, "grad_norm": 0.0724160447716713, "learning_rate": 4.783208689458689e-06, "loss": 0.0165, "step": 122390 }, { "epoch": 0.9047633127346915, "grad_norm": 0.08387361466884613, "learning_rate": 4.779499050332384e-06, "loss": 0.0164, "step": 122400 }, { "epoch": 0.9048372313059934, "grad_norm": 0.06441134214401245, "learning_rate": 4.775789411206078e-06, "loss": 0.0144, "step": 122410 }, { "epoch": 0.9049111498772952, "grad_norm": 0.09223717451095581, "learning_rate": 4.772079772079772e-06, "loss": 0.0182, "step": 122420 }, { "epoch": 0.904985068448597, "grad_norm": 0.08156295120716095, "learning_rate": 4.7683701329534665e-06, "loss": 0.0166, "step": 122430 }, { "epoch": 0.9050589870198988, "grad_norm": 0.06494969874620438, "learning_rate": 4.764660493827161e-06, "loss": 0.0134, "step": 122440 }, { "epoch": 0.9051329055912007, "grad_norm": 0.07923052459955215, "learning_rate": 4.760950854700855e-06, "loss": 0.0163, "step": 122450 }, { "epoch": 0.9052068241625025, "grad_norm": 0.08734191954135895, "learning_rate": 4.757241215574549e-06, "loss": 0.0196, "step": 122460 }, { "epoch": 0.9052807427338044, "grad_norm": 0.05656689032912254, "learning_rate": 4.7535315764482436e-06, "loss": 0.0157, "step": 122470 }, { "epoch": 0.9053546613051063, "grad_norm": 0.08763660490512848, "learning_rate": 4.749821937321938e-06, "loss": 0.0186, "step": 122480 }, { "epoch": 0.9054285798764081, "grad_norm": 0.08455368131399155, "learning_rate": 4.746112298195632e-06, "loss": 0.0188, "step": 122490 }, { "epoch": 0.90550249844771, "grad_norm": 0.1215326115489006, "learning_rate": 4.742402659069326e-06, "loss": 0.0161, "step": 122500 }, { "epoch": 0.9055764170190118, "grad_norm": 0.07503091543912888, "learning_rate": 4.738693019943021e-06, "loss": 0.0163, "step": 122510 }, { "epoch": 0.9056503355903137, "grad_norm": 0.061274394392967224, "learning_rate": 4.734983380816714e-06, "loss": 0.0191, "step": 122520 }, { "epoch": 0.9057242541616156, "grad_norm": 0.05623488873243332, "learning_rate": 4.731273741690408e-06, "loss": 0.0152, "step": 122530 }, { "epoch": 0.9057981727329174, "grad_norm": 0.09959295392036438, "learning_rate": 4.727564102564103e-06, "loss": 0.016, "step": 122540 }, { "epoch": 0.9058720913042193, "grad_norm": 0.06970833986997604, "learning_rate": 4.723854463437797e-06, "loss": 0.0168, "step": 122550 }, { "epoch": 0.9059460098755211, "grad_norm": 0.0789426937699318, "learning_rate": 4.720144824311491e-06, "loss": 0.0146, "step": 122560 }, { "epoch": 0.906019928446823, "grad_norm": 0.058944690972566605, "learning_rate": 4.7164351851851854e-06, "loss": 0.0154, "step": 122570 }, { "epoch": 0.9060938470181248, "grad_norm": 0.11136980354785919, "learning_rate": 4.71272554605888e-06, "loss": 0.0192, "step": 122580 }, { "epoch": 0.9061677655894267, "grad_norm": 0.07429955899715424, "learning_rate": 4.709015906932574e-06, "loss": 0.0179, "step": 122590 }, { "epoch": 0.9062416841607286, "grad_norm": 0.06887314468622208, "learning_rate": 4.705306267806267e-06, "loss": 0.0161, "step": 122600 }, { "epoch": 0.9063156027320304, "grad_norm": 0.06658325344324112, "learning_rate": 4.701596628679962e-06, "loss": 0.0186, "step": 122610 }, { "epoch": 0.9063895213033323, "grad_norm": 0.09197299927473068, "learning_rate": 4.697886989553656e-06, "loss": 0.0161, "step": 122620 }, { "epoch": 0.9064634398746341, "grad_norm": 0.08984216302633286, "learning_rate": 4.69417735042735e-06, "loss": 0.0177, "step": 122630 }, { "epoch": 0.906537358445936, "grad_norm": 0.07162947952747345, "learning_rate": 4.6904677113010445e-06, "loss": 0.0157, "step": 122640 }, { "epoch": 0.9066112770172378, "grad_norm": 0.06426996737718582, "learning_rate": 4.68675807217474e-06, "loss": 0.0163, "step": 122650 }, { "epoch": 0.9066851955885397, "grad_norm": 0.09726285934448242, "learning_rate": 4.683048433048434e-06, "loss": 0.0179, "step": 122660 }, { "epoch": 0.9067591141598416, "grad_norm": 0.09400387853384018, "learning_rate": 4.679338793922128e-06, "loss": 0.0158, "step": 122670 }, { "epoch": 0.9068330327311434, "grad_norm": 0.08956044912338257, "learning_rate": 4.675629154795822e-06, "loss": 0.0155, "step": 122680 }, { "epoch": 0.9069069513024453, "grad_norm": 0.07334977388381958, "learning_rate": 4.671919515669516e-06, "loss": 0.0173, "step": 122690 }, { "epoch": 0.906980869873747, "grad_norm": 0.05719450116157532, "learning_rate": 4.66820987654321e-06, "loss": 0.0183, "step": 122700 }, { "epoch": 0.9070547884450489, "grad_norm": 0.059781696647405624, "learning_rate": 4.664500237416904e-06, "loss": 0.0164, "step": 122710 }, { "epoch": 0.9071287070163507, "grad_norm": 0.0924462080001831, "learning_rate": 4.660790598290599e-06, "loss": 0.0166, "step": 122720 }, { "epoch": 0.9072026255876526, "grad_norm": 0.06804405897855759, "learning_rate": 4.657080959164293e-06, "loss": 0.0173, "step": 122730 }, { "epoch": 0.9072765441589545, "grad_norm": 0.08799871802330017, "learning_rate": 4.653371320037987e-06, "loss": 0.0186, "step": 122740 }, { "epoch": 0.9073504627302563, "grad_norm": 0.08435172587633133, "learning_rate": 4.649661680911681e-06, "loss": 0.0163, "step": 122750 }, { "epoch": 0.9074243813015582, "grad_norm": 0.09052610397338867, "learning_rate": 4.645952041785375e-06, "loss": 0.019, "step": 122760 }, { "epoch": 0.90749829987286, "grad_norm": 0.055818017572164536, "learning_rate": 4.642242402659069e-06, "loss": 0.0177, "step": 122770 }, { "epoch": 0.9075722184441619, "grad_norm": 0.07775738090276718, "learning_rate": 4.6385327635327635e-06, "loss": 0.0165, "step": 122780 }, { "epoch": 0.9076461370154638, "grad_norm": 0.10018419474363327, "learning_rate": 4.634823124406458e-06, "loss": 0.0179, "step": 122790 }, { "epoch": 0.9077200555867656, "grad_norm": 0.068797767162323, "learning_rate": 4.631113485280152e-06, "loss": 0.0183, "step": 122800 }, { "epoch": 0.9077939741580675, "grad_norm": 0.07335837185382843, "learning_rate": 4.627403846153846e-06, "loss": 0.0173, "step": 122810 }, { "epoch": 0.9078678927293693, "grad_norm": 0.05817072093486786, "learning_rate": 4.6236942070275406e-06, "loss": 0.0157, "step": 122820 }, { "epoch": 0.9079418113006712, "grad_norm": 0.0800141766667366, "learning_rate": 4.619984567901235e-06, "loss": 0.0176, "step": 122830 }, { "epoch": 0.908015729871973, "grad_norm": 0.12860678136348724, "learning_rate": 4.616274928774929e-06, "loss": 0.0183, "step": 122840 }, { "epoch": 0.9080896484432749, "grad_norm": 0.09741950035095215, "learning_rate": 4.612565289648623e-06, "loss": 0.0187, "step": 122850 }, { "epoch": 0.9081635670145768, "grad_norm": 0.06886820495128632, "learning_rate": 4.608855650522318e-06, "loss": 0.0174, "step": 122860 }, { "epoch": 0.9082374855858786, "grad_norm": 0.0636502280831337, "learning_rate": 4.605146011396012e-06, "loss": 0.0184, "step": 122870 }, { "epoch": 0.9083114041571805, "grad_norm": 0.06838538497686386, "learning_rate": 4.601436372269706e-06, "loss": 0.017, "step": 122880 }, { "epoch": 0.9083853227284823, "grad_norm": 0.0679185763001442, "learning_rate": 4.5977267331434005e-06, "loss": 0.0171, "step": 122890 }, { "epoch": 0.9084592412997842, "grad_norm": 0.08525998890399933, "learning_rate": 4.594017094017095e-06, "loss": 0.0181, "step": 122900 }, { "epoch": 0.908533159871086, "grad_norm": 0.10945747792720795, "learning_rate": 4.590307454890788e-06, "loss": 0.0172, "step": 122910 }, { "epoch": 0.9086070784423879, "grad_norm": 0.06596176326274872, "learning_rate": 4.5865978157644825e-06, "loss": 0.0168, "step": 122920 }, { "epoch": 0.9086809970136898, "grad_norm": 0.06511720269918442, "learning_rate": 4.582888176638177e-06, "loss": 0.0165, "step": 122930 }, { "epoch": 0.9087549155849916, "grad_norm": 0.10036970674991608, "learning_rate": 4.579178537511871e-06, "loss": 0.02, "step": 122940 }, { "epoch": 0.9088288341562935, "grad_norm": 0.07263343781232834, "learning_rate": 4.575468898385565e-06, "loss": 0.0181, "step": 122950 }, { "epoch": 0.9089027527275952, "grad_norm": 0.06853675842285156, "learning_rate": 4.5717592592592595e-06, "loss": 0.0164, "step": 122960 }, { "epoch": 0.9089766712988971, "grad_norm": 0.08699023723602295, "learning_rate": 4.568049620132954e-06, "loss": 0.0161, "step": 122970 }, { "epoch": 0.909050589870199, "grad_norm": 0.08658810704946518, "learning_rate": 4.564339981006647e-06, "loss": 0.0181, "step": 122980 }, { "epoch": 0.9091245084415008, "grad_norm": 0.10963263362646103, "learning_rate": 4.5606303418803415e-06, "loss": 0.0182, "step": 122990 }, { "epoch": 0.9091984270128027, "grad_norm": 0.09154821932315826, "learning_rate": 4.556920702754036e-06, "loss": 0.0169, "step": 123000 }, { "epoch": 0.9092723455841045, "grad_norm": 0.07183562219142914, "learning_rate": 4.55321106362773e-06, "loss": 0.0141, "step": 123010 }, { "epoch": 0.9093462641554064, "grad_norm": 0.07396021485328674, "learning_rate": 4.549501424501424e-06, "loss": 0.0173, "step": 123020 }, { "epoch": 0.9094201827267082, "grad_norm": 0.07018588483333588, "learning_rate": 4.5457917853751195e-06, "loss": 0.0172, "step": 123030 }, { "epoch": 0.9094941012980101, "grad_norm": 0.08253616839647293, "learning_rate": 4.542082146248814e-06, "loss": 0.016, "step": 123040 }, { "epoch": 0.909568019869312, "grad_norm": 0.11334005743265152, "learning_rate": 4.538372507122508e-06, "loss": 0.0162, "step": 123050 }, { "epoch": 0.9096419384406138, "grad_norm": 0.09180520474910736, "learning_rate": 4.5346628679962014e-06, "loss": 0.0167, "step": 123060 }, { "epoch": 0.9097158570119157, "grad_norm": 0.08304790407419205, "learning_rate": 4.530953228869896e-06, "loss": 0.0169, "step": 123070 }, { "epoch": 0.9097897755832175, "grad_norm": 0.08140013366937637, "learning_rate": 4.52724358974359e-06, "loss": 0.0149, "step": 123080 }, { "epoch": 0.9098636941545194, "grad_norm": 0.07130993902683258, "learning_rate": 4.523533950617284e-06, "loss": 0.0187, "step": 123090 }, { "epoch": 0.9099376127258212, "grad_norm": 0.07848509401082993, "learning_rate": 4.5198243114909785e-06, "loss": 0.0163, "step": 123100 }, { "epoch": 0.9100115312971231, "grad_norm": 0.07500548660755157, "learning_rate": 4.516114672364673e-06, "loss": 0.0167, "step": 123110 }, { "epoch": 0.910085449868425, "grad_norm": 0.06861239671707153, "learning_rate": 4.512405033238367e-06, "loss": 0.0144, "step": 123120 }, { "epoch": 0.9101593684397268, "grad_norm": 0.07011248916387558, "learning_rate": 4.508695394112061e-06, "loss": 0.0182, "step": 123130 }, { "epoch": 0.9102332870110287, "grad_norm": 0.07584835588932037, "learning_rate": 4.504985754985755e-06, "loss": 0.0162, "step": 123140 }, { "epoch": 0.9103072055823305, "grad_norm": 0.07313469797372818, "learning_rate": 4.501276115859449e-06, "loss": 0.0191, "step": 123150 }, { "epoch": 0.9103811241536324, "grad_norm": 0.07153768092393875, "learning_rate": 4.497566476733143e-06, "loss": 0.0157, "step": 123160 }, { "epoch": 0.9104550427249342, "grad_norm": 0.059805650264024734, "learning_rate": 4.493856837606838e-06, "loss": 0.0146, "step": 123170 }, { "epoch": 0.9105289612962361, "grad_norm": 0.07940264791250229, "learning_rate": 4.490147198480532e-06, "loss": 0.0141, "step": 123180 }, { "epoch": 0.910602879867538, "grad_norm": 0.07751365751028061, "learning_rate": 4.486437559354226e-06, "loss": 0.0144, "step": 123190 }, { "epoch": 0.9106767984388398, "grad_norm": 0.07345723360776901, "learning_rate": 4.48272792022792e-06, "loss": 0.0177, "step": 123200 }, { "epoch": 0.9107507170101417, "grad_norm": 0.14844071865081787, "learning_rate": 4.479018281101615e-06, "loss": 0.0163, "step": 123210 }, { "epoch": 0.9108246355814434, "grad_norm": 0.0766979455947876, "learning_rate": 4.475308641975309e-06, "loss": 0.0147, "step": 123220 }, { "epoch": 0.9108985541527453, "grad_norm": 0.054067812860012054, "learning_rate": 4.471599002849003e-06, "loss": 0.0146, "step": 123230 }, { "epoch": 0.9109724727240472, "grad_norm": 0.16767457127571106, "learning_rate": 4.4678893637226975e-06, "loss": 0.0187, "step": 123240 }, { "epoch": 0.911046391295349, "grad_norm": 0.07993284612894058, "learning_rate": 4.464179724596392e-06, "loss": 0.0174, "step": 123250 }, { "epoch": 0.9111203098666509, "grad_norm": 0.06665073335170746, "learning_rate": 4.460470085470086e-06, "loss": 0.0168, "step": 123260 }, { "epoch": 0.9111942284379527, "grad_norm": 0.10466733574867249, "learning_rate": 4.45676044634378e-06, "loss": 0.0186, "step": 123270 }, { "epoch": 0.9112681470092546, "grad_norm": 0.08831361681222916, "learning_rate": 4.453050807217475e-06, "loss": 0.0165, "step": 123280 }, { "epoch": 0.9113420655805564, "grad_norm": 0.08201731741428375, "learning_rate": 4.449341168091168e-06, "loss": 0.0175, "step": 123290 }, { "epoch": 0.9114159841518583, "grad_norm": 0.066454216837883, "learning_rate": 4.445631528964862e-06, "loss": 0.0168, "step": 123300 }, { "epoch": 0.9114899027231602, "grad_norm": 0.07183511555194855, "learning_rate": 4.4419218898385566e-06, "loss": 0.018, "step": 123310 }, { "epoch": 0.911563821294462, "grad_norm": 0.08452675491571426, "learning_rate": 4.438212250712251e-06, "loss": 0.0174, "step": 123320 }, { "epoch": 0.9116377398657639, "grad_norm": 0.07579536736011505, "learning_rate": 4.434502611585945e-06, "loss": 0.0171, "step": 123330 }, { "epoch": 0.9117116584370657, "grad_norm": 0.0779586210846901, "learning_rate": 4.430792972459639e-06, "loss": 0.0166, "step": 123340 }, { "epoch": 0.9117855770083676, "grad_norm": 0.08135387301445007, "learning_rate": 4.427083333333334e-06, "loss": 0.0152, "step": 123350 }, { "epoch": 0.9118594955796694, "grad_norm": 0.08736226707696915, "learning_rate": 4.423373694207028e-06, "loss": 0.017, "step": 123360 }, { "epoch": 0.9119334141509713, "grad_norm": 0.07891429215669632, "learning_rate": 4.419664055080721e-06, "loss": 0.0164, "step": 123370 }, { "epoch": 0.9120073327222732, "grad_norm": 0.10436808317899704, "learning_rate": 4.415954415954416e-06, "loss": 0.0182, "step": 123380 }, { "epoch": 0.912081251293575, "grad_norm": 0.07729527354240417, "learning_rate": 4.41224477682811e-06, "loss": 0.0167, "step": 123390 }, { "epoch": 0.9121551698648769, "grad_norm": 0.06779775768518448, "learning_rate": 4.408535137701804e-06, "loss": 0.0139, "step": 123400 }, { "epoch": 0.9122290884361787, "grad_norm": 0.09348805993795395, "learning_rate": 4.404825498575499e-06, "loss": 0.0151, "step": 123410 }, { "epoch": 0.9123030070074806, "grad_norm": 0.07028793543577194, "learning_rate": 4.4011158594491936e-06, "loss": 0.0168, "step": 123420 }, { "epoch": 0.9123769255787824, "grad_norm": 0.10762038826942444, "learning_rate": 4.397406220322888e-06, "loss": 0.0179, "step": 123430 }, { "epoch": 0.9124508441500843, "grad_norm": 0.08728470653295517, "learning_rate": 4.393696581196581e-06, "loss": 0.0177, "step": 123440 }, { "epoch": 0.9125247627213862, "grad_norm": 0.07320371270179749, "learning_rate": 4.3899869420702755e-06, "loss": 0.0151, "step": 123450 }, { "epoch": 0.912598681292688, "grad_norm": 0.10701316595077515, "learning_rate": 4.38627730294397e-06, "loss": 0.0181, "step": 123460 }, { "epoch": 0.9126725998639899, "grad_norm": 0.07376556843519211, "learning_rate": 4.382567663817664e-06, "loss": 0.0164, "step": 123470 }, { "epoch": 0.9127465184352916, "grad_norm": 0.07854204624891281, "learning_rate": 4.378858024691358e-06, "loss": 0.019, "step": 123480 }, { "epoch": 0.9128204370065935, "grad_norm": 0.0887388065457344, "learning_rate": 4.375148385565053e-06, "loss": 0.0192, "step": 123490 }, { "epoch": 0.9128943555778954, "grad_norm": 0.0878068134188652, "learning_rate": 4.371438746438747e-06, "loss": 0.0171, "step": 123500 }, { "epoch": 0.9129682741491972, "grad_norm": 0.0852675661444664, "learning_rate": 4.367729107312441e-06, "loss": 0.0136, "step": 123510 }, { "epoch": 0.9130421927204991, "grad_norm": 0.08798334002494812, "learning_rate": 4.364019468186135e-06, "loss": 0.0188, "step": 123520 }, { "epoch": 0.9131161112918009, "grad_norm": 0.06569161266088486, "learning_rate": 4.360309829059829e-06, "loss": 0.0176, "step": 123530 }, { "epoch": 0.9131900298631028, "grad_norm": 0.05657649040222168, "learning_rate": 4.356600189933523e-06, "loss": 0.0163, "step": 123540 }, { "epoch": 0.9132639484344046, "grad_norm": 0.07725798338651657, "learning_rate": 4.352890550807217e-06, "loss": 0.0179, "step": 123550 }, { "epoch": 0.9133378670057065, "grad_norm": 0.07964330911636353, "learning_rate": 4.349180911680912e-06, "loss": 0.018, "step": 123560 }, { "epoch": 0.9134117855770084, "grad_norm": 0.0666557103395462, "learning_rate": 4.345471272554606e-06, "loss": 0.0189, "step": 123570 }, { "epoch": 0.9134857041483102, "grad_norm": 0.11181606352329254, "learning_rate": 4.3417616334283e-06, "loss": 0.0175, "step": 123580 }, { "epoch": 0.9135596227196121, "grad_norm": 0.07617869973182678, "learning_rate": 4.3380519943019945e-06, "loss": 0.0156, "step": 123590 }, { "epoch": 0.9136335412909139, "grad_norm": 0.0608520433306694, "learning_rate": 4.334342355175689e-06, "loss": 0.0158, "step": 123600 }, { "epoch": 0.9137074598622158, "grad_norm": 0.07510219514369965, "learning_rate": 4.330632716049383e-06, "loss": 0.0167, "step": 123610 }, { "epoch": 0.9137813784335176, "grad_norm": 0.07799938321113586, "learning_rate": 4.326923076923077e-06, "loss": 0.0145, "step": 123620 }, { "epoch": 0.9138552970048195, "grad_norm": 0.09128500521183014, "learning_rate": 4.323213437796772e-06, "loss": 0.0195, "step": 123630 }, { "epoch": 0.9139292155761214, "grad_norm": 0.07458452880382538, "learning_rate": 4.319503798670466e-06, "loss": 0.0159, "step": 123640 }, { "epoch": 0.9140031341474232, "grad_norm": 0.06051633879542351, "learning_rate": 4.31579415954416e-06, "loss": 0.0158, "step": 123650 }, { "epoch": 0.9140770527187251, "grad_norm": 0.068628691136837, "learning_rate": 4.312084520417854e-06, "loss": 0.0176, "step": 123660 }, { "epoch": 0.9141509712900269, "grad_norm": 0.05428208038210869, "learning_rate": 4.308374881291549e-06, "loss": 0.0146, "step": 123670 }, { "epoch": 0.9142248898613288, "grad_norm": 0.08900542557239532, "learning_rate": 4.304665242165242e-06, "loss": 0.0169, "step": 123680 }, { "epoch": 0.9142988084326306, "grad_norm": 0.05679711326956749, "learning_rate": 4.300955603038936e-06, "loss": 0.0184, "step": 123690 }, { "epoch": 0.9143727270039325, "grad_norm": 0.07910363376140594, "learning_rate": 4.297245963912631e-06, "loss": 0.0168, "step": 123700 }, { "epoch": 0.9144466455752344, "grad_norm": 0.06929022818803787, "learning_rate": 4.293536324786325e-06, "loss": 0.0179, "step": 123710 }, { "epoch": 0.9145205641465362, "grad_norm": 0.08467814326286316, "learning_rate": 4.289826685660019e-06, "loss": 0.0148, "step": 123720 }, { "epoch": 0.914594482717838, "grad_norm": 0.08828160911798477, "learning_rate": 4.2861170465337135e-06, "loss": 0.0164, "step": 123730 }, { "epoch": 0.9146684012891398, "grad_norm": 0.08820133656263351, "learning_rate": 4.282407407407408e-06, "loss": 0.0158, "step": 123740 }, { "epoch": 0.9147423198604417, "grad_norm": 0.06262107938528061, "learning_rate": 4.278697768281101e-06, "loss": 0.0197, "step": 123750 }, { "epoch": 0.9148162384317436, "grad_norm": 0.07561814039945602, "learning_rate": 4.2749881291547955e-06, "loss": 0.0179, "step": 123760 }, { "epoch": 0.9148901570030454, "grad_norm": 0.08782166242599487, "learning_rate": 4.27127849002849e-06, "loss": 0.0178, "step": 123770 }, { "epoch": 0.9149640755743473, "grad_norm": 0.07191344350576401, "learning_rate": 4.267568850902184e-06, "loss": 0.0177, "step": 123780 }, { "epoch": 0.9150379941456491, "grad_norm": 0.10342707484960556, "learning_rate": 4.263859211775878e-06, "loss": 0.0186, "step": 123790 }, { "epoch": 0.915111912716951, "grad_norm": 0.0876234620809555, "learning_rate": 4.260149572649573e-06, "loss": 0.0171, "step": 123800 }, { "epoch": 0.9151858312882528, "grad_norm": 0.07162667065858841, "learning_rate": 4.256439933523268e-06, "loss": 0.0185, "step": 123810 }, { "epoch": 0.9152597498595547, "grad_norm": 0.07592875510454178, "learning_rate": 4.252730294396962e-06, "loss": 0.0168, "step": 123820 }, { "epoch": 0.9153336684308566, "grad_norm": 0.08524484187364578, "learning_rate": 4.249020655270655e-06, "loss": 0.0176, "step": 123830 }, { "epoch": 0.9154075870021584, "grad_norm": 0.09293634444475174, "learning_rate": 4.24531101614435e-06, "loss": 0.0178, "step": 123840 }, { "epoch": 0.9154815055734603, "grad_norm": 0.10401646792888641, "learning_rate": 4.241601377018044e-06, "loss": 0.0167, "step": 123850 }, { "epoch": 0.9155554241447621, "grad_norm": 0.0697089210152626, "learning_rate": 4.237891737891738e-06, "loss": 0.0171, "step": 123860 }, { "epoch": 0.915629342716064, "grad_norm": 0.06575757265090942, "learning_rate": 4.2341820987654325e-06, "loss": 0.0177, "step": 123870 }, { "epoch": 0.9157032612873658, "grad_norm": 0.05003274977207184, "learning_rate": 4.230472459639127e-06, "loss": 0.0172, "step": 123880 }, { "epoch": 0.9157771798586677, "grad_norm": 0.08424724638462067, "learning_rate": 4.226762820512821e-06, "loss": 0.0207, "step": 123890 }, { "epoch": 0.9158510984299696, "grad_norm": 0.06601136922836304, "learning_rate": 4.223053181386515e-06, "loss": 0.0188, "step": 123900 }, { "epoch": 0.9159250170012714, "grad_norm": 0.082279734313488, "learning_rate": 4.219343542260209e-06, "loss": 0.0167, "step": 123910 }, { "epoch": 0.9159989355725733, "grad_norm": 0.06980005651712418, "learning_rate": 4.215633903133903e-06, "loss": 0.0175, "step": 123920 }, { "epoch": 0.9160728541438751, "grad_norm": 0.08342483639717102, "learning_rate": 4.211924264007597e-06, "loss": 0.0176, "step": 123930 }, { "epoch": 0.916146772715177, "grad_norm": 0.07356125116348267, "learning_rate": 4.2082146248812915e-06, "loss": 0.0143, "step": 123940 }, { "epoch": 0.9162206912864788, "grad_norm": 0.06317978352308273, "learning_rate": 4.204504985754986e-06, "loss": 0.0179, "step": 123950 }, { "epoch": 0.9162946098577807, "grad_norm": 0.08833855390548706, "learning_rate": 4.20079534662868e-06, "loss": 0.0164, "step": 123960 }, { "epoch": 0.9163685284290826, "grad_norm": 0.07598882913589478, "learning_rate": 4.197085707502374e-06, "loss": 0.0166, "step": 123970 }, { "epoch": 0.9164424470003844, "grad_norm": 0.08852225542068481, "learning_rate": 4.193376068376069e-06, "loss": 0.0167, "step": 123980 }, { "epoch": 0.9165163655716863, "grad_norm": 0.08856448531150818, "learning_rate": 4.189666429249763e-06, "loss": 0.0187, "step": 123990 }, { "epoch": 0.916590284142988, "grad_norm": 0.08729839324951172, "learning_rate": 4.185956790123457e-06, "loss": 0.0182, "step": 124000 }, { "epoch": 0.9166642027142899, "grad_norm": 0.07654992491006851, "learning_rate": 4.1822471509971514e-06, "loss": 0.0146, "step": 124010 }, { "epoch": 0.9167381212855918, "grad_norm": 0.06819306313991547, "learning_rate": 4.178537511870846e-06, "loss": 0.017, "step": 124020 }, { "epoch": 0.9168120398568936, "grad_norm": 0.059470996260643005, "learning_rate": 4.17482787274454e-06, "loss": 0.0181, "step": 124030 }, { "epoch": 0.9168859584281955, "grad_norm": 0.0711124911904335, "learning_rate": 4.171118233618234e-06, "loss": 0.0163, "step": 124040 }, { "epoch": 0.9169598769994973, "grad_norm": 0.09192194789648056, "learning_rate": 4.1674085944919285e-06, "loss": 0.0171, "step": 124050 }, { "epoch": 0.9170337955707992, "grad_norm": 0.07689784467220306, "learning_rate": 4.163698955365622e-06, "loss": 0.0167, "step": 124060 }, { "epoch": 0.917107714142101, "grad_norm": 0.0563366636633873, "learning_rate": 4.159989316239316e-06, "loss": 0.016, "step": 124070 }, { "epoch": 0.9171816327134029, "grad_norm": 0.10087716579437256, "learning_rate": 4.1562796771130105e-06, "loss": 0.0161, "step": 124080 }, { "epoch": 0.9172555512847048, "grad_norm": 0.12589262425899506, "learning_rate": 4.152570037986705e-06, "loss": 0.0194, "step": 124090 }, { "epoch": 0.9173294698560066, "grad_norm": 0.06327742338180542, "learning_rate": 4.148860398860399e-06, "loss": 0.0153, "step": 124100 }, { "epoch": 0.9174033884273085, "grad_norm": 0.09386986494064331, "learning_rate": 4.145150759734093e-06, "loss": 0.0151, "step": 124110 }, { "epoch": 0.9174773069986103, "grad_norm": 0.07422593981027603, "learning_rate": 4.141441120607788e-06, "loss": 0.0186, "step": 124120 }, { "epoch": 0.9175512255699122, "grad_norm": 0.07547813653945923, "learning_rate": 4.137731481481482e-06, "loss": 0.0137, "step": 124130 }, { "epoch": 0.917625144141214, "grad_norm": 0.06815105676651001, "learning_rate": 4.134021842355175e-06, "loss": 0.0182, "step": 124140 }, { "epoch": 0.9176990627125159, "grad_norm": 0.08372704684734344, "learning_rate": 4.1303122032288696e-06, "loss": 0.0165, "step": 124150 }, { "epoch": 0.9177729812838178, "grad_norm": 0.06949080526828766, "learning_rate": 4.126602564102564e-06, "loss": 0.0171, "step": 124160 }, { "epoch": 0.9178468998551196, "grad_norm": 0.07713343948125839, "learning_rate": 4.122892924976258e-06, "loss": 0.0166, "step": 124170 }, { "epoch": 0.9179208184264215, "grad_norm": 0.05164538323879242, "learning_rate": 4.119183285849953e-06, "loss": 0.019, "step": 124180 }, { "epoch": 0.9179947369977233, "grad_norm": 0.08802802860736847, "learning_rate": 4.1154736467236475e-06, "loss": 0.019, "step": 124190 }, { "epoch": 0.9180686555690252, "grad_norm": 0.043701138347387314, "learning_rate": 4.111764007597342e-06, "loss": 0.0164, "step": 124200 }, { "epoch": 0.918142574140327, "grad_norm": 0.05825362727046013, "learning_rate": 4.108054368471035e-06, "loss": 0.0176, "step": 124210 }, { "epoch": 0.9182164927116289, "grad_norm": 0.07954519242048264, "learning_rate": 4.1043447293447295e-06, "loss": 0.0177, "step": 124220 }, { "epoch": 0.9182904112829308, "grad_norm": 0.0765024796128273, "learning_rate": 4.100635090218424e-06, "loss": 0.0178, "step": 124230 }, { "epoch": 0.9183643298542326, "grad_norm": 0.08571942150592804, "learning_rate": 4.096925451092118e-06, "loss": 0.0172, "step": 124240 }, { "epoch": 0.9184382484255345, "grad_norm": 0.046985041350126266, "learning_rate": 4.093215811965812e-06, "loss": 0.015, "step": 124250 }, { "epoch": 0.9185121669968362, "grad_norm": 0.0847211554646492, "learning_rate": 4.0895061728395066e-06, "loss": 0.0169, "step": 124260 }, { "epoch": 0.9185860855681381, "grad_norm": 0.0697716549038887, "learning_rate": 4.085796533713201e-06, "loss": 0.0185, "step": 124270 }, { "epoch": 0.91866000413944, "grad_norm": 0.06355872005224228, "learning_rate": 4.082086894586895e-06, "loss": 0.0153, "step": 124280 }, { "epoch": 0.9187339227107418, "grad_norm": 0.11508794873952866, "learning_rate": 4.0783772554605885e-06, "loss": 0.0168, "step": 124290 }, { "epoch": 0.9188078412820437, "grad_norm": 0.07200739532709122, "learning_rate": 4.074667616334283e-06, "loss": 0.0175, "step": 124300 }, { "epoch": 0.9188817598533455, "grad_norm": 0.09681064635515213, "learning_rate": 4.070957977207977e-06, "loss": 0.0173, "step": 124310 }, { "epoch": 0.9189556784246474, "grad_norm": 0.06718669086694717, "learning_rate": 4.067248338081671e-06, "loss": 0.0163, "step": 124320 }, { "epoch": 0.9190295969959492, "grad_norm": 0.05894205719232559, "learning_rate": 4.063538698955366e-06, "loss": 0.0152, "step": 124330 }, { "epoch": 0.9191035155672511, "grad_norm": 0.14502786099910736, "learning_rate": 4.05982905982906e-06, "loss": 0.0198, "step": 124340 }, { "epoch": 0.919177434138553, "grad_norm": 0.07610470056533813, "learning_rate": 4.056119420702754e-06, "loss": 0.0142, "step": 124350 }, { "epoch": 0.9192513527098548, "grad_norm": 0.07440314441919327, "learning_rate": 4.0524097815764484e-06, "loss": 0.0149, "step": 124360 }, { "epoch": 0.9193252712811567, "grad_norm": 0.05952728912234306, "learning_rate": 4.048700142450143e-06, "loss": 0.0153, "step": 124370 }, { "epoch": 0.9193991898524585, "grad_norm": 0.08123992383480072, "learning_rate": 4.044990503323837e-06, "loss": 0.0171, "step": 124380 }, { "epoch": 0.9194731084237604, "grad_norm": 0.08689317107200623, "learning_rate": 4.041280864197531e-06, "loss": 0.015, "step": 124390 }, { "epoch": 0.9195470269950622, "grad_norm": 0.06676415354013443, "learning_rate": 4.0375712250712255e-06, "loss": 0.0155, "step": 124400 }, { "epoch": 0.9196209455663641, "grad_norm": 0.06977392733097076, "learning_rate": 4.03386158594492e-06, "loss": 0.0163, "step": 124410 }, { "epoch": 0.919694864137666, "grad_norm": 0.07469875365495682, "learning_rate": 4.030151946818614e-06, "loss": 0.0127, "step": 124420 }, { "epoch": 0.9197687827089678, "grad_norm": 0.06375137716531754, "learning_rate": 4.026442307692308e-06, "loss": 0.0152, "step": 124430 }, { "epoch": 0.9198427012802697, "grad_norm": 0.06735789030790329, "learning_rate": 4.022732668566002e-06, "loss": 0.018, "step": 124440 }, { "epoch": 0.9199166198515715, "grad_norm": 0.09881290048360825, "learning_rate": 4.019023029439696e-06, "loss": 0.0198, "step": 124450 }, { "epoch": 0.9199905384228734, "grad_norm": 0.13130244612693787, "learning_rate": 4.01531339031339e-06, "loss": 0.0203, "step": 124460 }, { "epoch": 0.9200644569941752, "grad_norm": 0.08290501683950424, "learning_rate": 4.011603751187085e-06, "loss": 0.0196, "step": 124470 }, { "epoch": 0.9201383755654771, "grad_norm": 0.07831768691539764, "learning_rate": 4.007894112060779e-06, "loss": 0.0176, "step": 124480 }, { "epoch": 0.920212294136779, "grad_norm": 0.10180113464593887, "learning_rate": 4.004184472934473e-06, "loss": 0.0169, "step": 124490 }, { "epoch": 0.9202862127080808, "grad_norm": 0.05810206010937691, "learning_rate": 4.000474833808167e-06, "loss": 0.017, "step": 124500 }, { "epoch": 0.9203601312793827, "grad_norm": 0.06321538984775543, "learning_rate": 3.996765194681862e-06, "loss": 0.0154, "step": 124510 }, { "epoch": 0.9204340498506844, "grad_norm": 0.11284110695123672, "learning_rate": 3.993055555555555e-06, "loss": 0.0161, "step": 124520 }, { "epoch": 0.9205079684219863, "grad_norm": 0.07424329966306686, "learning_rate": 3.989345916429249e-06, "loss": 0.0159, "step": 124530 }, { "epoch": 0.9205818869932882, "grad_norm": 0.05204179137945175, "learning_rate": 3.985636277302944e-06, "loss": 0.016, "step": 124540 }, { "epoch": 0.92065580556459, "grad_norm": 0.0912189781665802, "learning_rate": 3.981926638176638e-06, "loss": 0.0176, "step": 124550 }, { "epoch": 0.9207297241358919, "grad_norm": 0.08071061968803406, "learning_rate": 3.978216999050333e-06, "loss": 0.0169, "step": 124560 }, { "epoch": 0.9208036427071937, "grad_norm": 0.08048860728740692, "learning_rate": 3.974507359924027e-06, "loss": 0.0162, "step": 124570 }, { "epoch": 0.9208775612784956, "grad_norm": 0.09860378503799438, "learning_rate": 3.970797720797722e-06, "loss": 0.0173, "step": 124580 }, { "epoch": 0.9209514798497974, "grad_norm": 0.06638229638338089, "learning_rate": 3.967088081671416e-06, "loss": 0.0176, "step": 124590 }, { "epoch": 0.9210253984210993, "grad_norm": 0.09299634397029877, "learning_rate": 3.963378442545109e-06, "loss": 0.0187, "step": 124600 }, { "epoch": 0.9210993169924012, "grad_norm": 0.07731974869966507, "learning_rate": 3.9596688034188036e-06, "loss": 0.0169, "step": 124610 }, { "epoch": 0.921173235563703, "grad_norm": 0.05448630824685097, "learning_rate": 3.955959164292498e-06, "loss": 0.0147, "step": 124620 }, { "epoch": 0.9212471541350049, "grad_norm": 0.0719091147184372, "learning_rate": 3.952249525166192e-06, "loss": 0.016, "step": 124630 }, { "epoch": 0.9213210727063067, "grad_norm": 0.09676989912986755, "learning_rate": 3.948539886039886e-06, "loss": 0.0157, "step": 124640 }, { "epoch": 0.9213949912776086, "grad_norm": 0.07981038093566895, "learning_rate": 3.944830246913581e-06, "loss": 0.0186, "step": 124650 }, { "epoch": 0.9214689098489104, "grad_norm": 0.06857098639011383, "learning_rate": 3.941120607787275e-06, "loss": 0.0188, "step": 124660 }, { "epoch": 0.9215428284202123, "grad_norm": 0.06626041978597641, "learning_rate": 3.937410968660968e-06, "loss": 0.0164, "step": 124670 }, { "epoch": 0.9216167469915142, "grad_norm": 0.08632654696702957, "learning_rate": 3.933701329534663e-06, "loss": 0.0155, "step": 124680 }, { "epoch": 0.921690665562816, "grad_norm": 0.06686916947364807, "learning_rate": 3.929991690408357e-06, "loss": 0.0158, "step": 124690 }, { "epoch": 0.9217645841341179, "grad_norm": 0.11379338055849075, "learning_rate": 3.926282051282051e-06, "loss": 0.0169, "step": 124700 }, { "epoch": 0.9218385027054197, "grad_norm": 0.05981620028614998, "learning_rate": 3.9225724121557454e-06, "loss": 0.018, "step": 124710 }, { "epoch": 0.9219124212767216, "grad_norm": 0.0945410206913948, "learning_rate": 3.91886277302944e-06, "loss": 0.0174, "step": 124720 }, { "epoch": 0.9219863398480235, "grad_norm": 0.08452748507261276, "learning_rate": 3.915153133903134e-06, "loss": 0.0148, "step": 124730 }, { "epoch": 0.9220602584193253, "grad_norm": 0.06958349049091339, "learning_rate": 3.911443494776828e-06, "loss": 0.0161, "step": 124740 }, { "epoch": 0.9221341769906272, "grad_norm": 0.061324093490839005, "learning_rate": 3.9077338556505225e-06, "loss": 0.0157, "step": 124750 }, { "epoch": 0.922208095561929, "grad_norm": 0.07830534875392914, "learning_rate": 3.904024216524217e-06, "loss": 0.0149, "step": 124760 }, { "epoch": 0.9222820141332309, "grad_norm": 0.0901099294424057, "learning_rate": 3.900314577397911e-06, "loss": 0.0148, "step": 124770 }, { "epoch": 0.9223559327045326, "grad_norm": 0.06344560533761978, "learning_rate": 3.896604938271605e-06, "loss": 0.016, "step": 124780 }, { "epoch": 0.9224298512758345, "grad_norm": 0.08566906303167343, "learning_rate": 3.8928952991453e-06, "loss": 0.016, "step": 124790 }, { "epoch": 0.9225037698471364, "grad_norm": 0.05252756178379059, "learning_rate": 3.889185660018994e-06, "loss": 0.0142, "step": 124800 }, { "epoch": 0.9225776884184382, "grad_norm": 0.07893887907266617, "learning_rate": 3.885476020892688e-06, "loss": 0.0194, "step": 124810 }, { "epoch": 0.9226516069897401, "grad_norm": 0.07250846922397614, "learning_rate": 3.8817663817663825e-06, "loss": 0.0144, "step": 124820 }, { "epoch": 0.9227255255610419, "grad_norm": 0.07483262568712234, "learning_rate": 3.878056742640076e-06, "loss": 0.0209, "step": 124830 }, { "epoch": 0.9227994441323438, "grad_norm": 0.09314266592264175, "learning_rate": 3.87434710351377e-06, "loss": 0.0191, "step": 124840 }, { "epoch": 0.9228733627036456, "grad_norm": 0.06463508307933807, "learning_rate": 3.8706374643874644e-06, "loss": 0.0194, "step": 124850 }, { "epoch": 0.9229472812749475, "grad_norm": 0.07391108572483063, "learning_rate": 3.866927825261159e-06, "loss": 0.0157, "step": 124860 }, { "epoch": 0.9230211998462494, "grad_norm": 0.06343664973974228, "learning_rate": 3.863218186134853e-06, "loss": 0.0191, "step": 124870 }, { "epoch": 0.9230951184175512, "grad_norm": 0.09996242076158524, "learning_rate": 3.859508547008547e-06, "loss": 0.016, "step": 124880 }, { "epoch": 0.9231690369888531, "grad_norm": 0.07135334610939026, "learning_rate": 3.8557989078822415e-06, "loss": 0.0198, "step": 124890 }, { "epoch": 0.9232429555601549, "grad_norm": 0.071023128926754, "learning_rate": 3.852089268755936e-06, "loss": 0.0176, "step": 124900 }, { "epoch": 0.9233168741314568, "grad_norm": 0.07268857210874557, "learning_rate": 3.848379629629629e-06, "loss": 0.0176, "step": 124910 }, { "epoch": 0.9233907927027586, "grad_norm": 0.10081833600997925, "learning_rate": 3.8446699905033235e-06, "loss": 0.0179, "step": 124920 }, { "epoch": 0.9234647112740605, "grad_norm": 0.08457359671592712, "learning_rate": 3.840960351377018e-06, "loss": 0.0157, "step": 124930 }, { "epoch": 0.9235386298453624, "grad_norm": 0.07194271683692932, "learning_rate": 3.837250712250712e-06, "loss": 0.0167, "step": 124940 }, { "epoch": 0.9236125484166642, "grad_norm": 0.06114126741886139, "learning_rate": 3.833541073124407e-06, "loss": 0.0164, "step": 124950 }, { "epoch": 0.9236864669879661, "grad_norm": 0.08034610003232956, "learning_rate": 3.8298314339981014e-06, "loss": 0.0191, "step": 124960 }, { "epoch": 0.9237603855592679, "grad_norm": 0.061103809624910355, "learning_rate": 3.826121794871796e-06, "loss": 0.0182, "step": 124970 }, { "epoch": 0.9238343041305698, "grad_norm": 0.0654354989528656, "learning_rate": 3.822412155745489e-06, "loss": 0.0166, "step": 124980 }, { "epoch": 0.9239082227018717, "grad_norm": 0.07981405407190323, "learning_rate": 3.818702516619183e-06, "loss": 0.0202, "step": 124990 }, { "epoch": 0.9239821412731735, "grad_norm": 0.08441367000341415, "learning_rate": 3.814992877492878e-06, "loss": 0.0169, "step": 125000 }, { "epoch": 0.9240560598444754, "grad_norm": 0.08189128339290619, "learning_rate": 3.811283238366572e-06, "loss": 0.0176, "step": 125010 }, { "epoch": 0.9241299784157772, "grad_norm": 0.08971022814512253, "learning_rate": 3.8075735992402662e-06, "loss": 0.0162, "step": 125020 }, { "epoch": 0.924203896987079, "grad_norm": 0.07982345670461655, "learning_rate": 3.8038639601139605e-06, "loss": 0.0153, "step": 125030 }, { "epoch": 0.9242778155583808, "grad_norm": 0.074432373046875, "learning_rate": 3.8001543209876548e-06, "loss": 0.0174, "step": 125040 }, { "epoch": 0.9243517341296827, "grad_norm": 0.08879147469997406, "learning_rate": 3.796444681861349e-06, "loss": 0.0159, "step": 125050 }, { "epoch": 0.9244256527009846, "grad_norm": 0.07412946224212646, "learning_rate": 3.7927350427350425e-06, "loss": 0.0178, "step": 125060 }, { "epoch": 0.9244995712722864, "grad_norm": 0.08697675913572311, "learning_rate": 3.7890254036087367e-06, "loss": 0.0187, "step": 125070 }, { "epoch": 0.9245734898435883, "grad_norm": 0.09308616816997528, "learning_rate": 3.7853157644824314e-06, "loss": 0.0169, "step": 125080 }, { "epoch": 0.9246474084148901, "grad_norm": 0.07256640493869781, "learning_rate": 3.7816061253561257e-06, "loss": 0.0193, "step": 125090 }, { "epoch": 0.924721326986192, "grad_norm": 0.09890392422676086, "learning_rate": 3.77789648622982e-06, "loss": 0.0173, "step": 125100 }, { "epoch": 0.9247952455574938, "grad_norm": 0.07536329329013824, "learning_rate": 3.7741868471035142e-06, "loss": 0.0182, "step": 125110 }, { "epoch": 0.9248691641287957, "grad_norm": 0.05913927033543587, "learning_rate": 3.7704772079772085e-06, "loss": 0.0164, "step": 125120 }, { "epoch": 0.9249430827000976, "grad_norm": 0.09645496308803558, "learning_rate": 3.766767568850903e-06, "loss": 0.02, "step": 125130 }, { "epoch": 0.9250170012713994, "grad_norm": 0.08335822820663452, "learning_rate": 3.7630579297245962e-06, "loss": 0.0162, "step": 125140 }, { "epoch": 0.9250909198427013, "grad_norm": 0.1134815365076065, "learning_rate": 3.7593482905982905e-06, "loss": 0.0183, "step": 125150 }, { "epoch": 0.9251648384140031, "grad_norm": 0.07296968251466751, "learning_rate": 3.7556386514719848e-06, "loss": 0.0156, "step": 125160 }, { "epoch": 0.925238756985305, "grad_norm": 0.06772264093160629, "learning_rate": 3.751929012345679e-06, "loss": 0.0155, "step": 125170 }, { "epoch": 0.9253126755566068, "grad_norm": 0.06695547699928284, "learning_rate": 3.7482193732193733e-06, "loss": 0.0163, "step": 125180 }, { "epoch": 0.9253865941279087, "grad_norm": 0.0516495443880558, "learning_rate": 3.7445097340930676e-06, "loss": 0.0177, "step": 125190 }, { "epoch": 0.9254605126992106, "grad_norm": 0.09707432985305786, "learning_rate": 3.7408000949667623e-06, "loss": 0.0153, "step": 125200 }, { "epoch": 0.9255344312705124, "grad_norm": 0.06112994998693466, "learning_rate": 3.7370904558404557e-06, "loss": 0.0163, "step": 125210 }, { "epoch": 0.9256083498418143, "grad_norm": 0.07663624733686447, "learning_rate": 3.73338081671415e-06, "loss": 0.0182, "step": 125220 }, { "epoch": 0.9256822684131161, "grad_norm": 0.05725317820906639, "learning_rate": 3.7296711775878443e-06, "loss": 0.0144, "step": 125230 }, { "epoch": 0.925756186984418, "grad_norm": 0.09255700558423996, "learning_rate": 3.7259615384615385e-06, "loss": 0.0183, "step": 125240 }, { "epoch": 0.9258301055557199, "grad_norm": 0.05431872233748436, "learning_rate": 3.722251899335233e-06, "loss": 0.0145, "step": 125250 }, { "epoch": 0.9259040241270217, "grad_norm": 0.06756569445133209, "learning_rate": 3.718542260208927e-06, "loss": 0.0152, "step": 125260 }, { "epoch": 0.9259779426983236, "grad_norm": 0.08588846772909164, "learning_rate": 3.7148326210826213e-06, "loss": 0.0165, "step": 125270 }, { "epoch": 0.9260518612696254, "grad_norm": 0.0836460068821907, "learning_rate": 3.7111229819563156e-06, "loss": 0.018, "step": 125280 }, { "epoch": 0.9261257798409273, "grad_norm": 0.08511676639318466, "learning_rate": 3.7074133428300095e-06, "loss": 0.0156, "step": 125290 }, { "epoch": 0.926199698412229, "grad_norm": 0.08848274499177933, "learning_rate": 3.7037037037037037e-06, "loss": 0.0172, "step": 125300 }, { "epoch": 0.926273616983531, "grad_norm": 0.08268193900585175, "learning_rate": 3.699994064577398e-06, "loss": 0.0157, "step": 125310 }, { "epoch": 0.9263475355548328, "grad_norm": 0.056754596531391144, "learning_rate": 3.6962844254510923e-06, "loss": 0.0169, "step": 125320 }, { "epoch": 0.9264214541261346, "grad_norm": 0.0771530270576477, "learning_rate": 3.6925747863247866e-06, "loss": 0.018, "step": 125330 }, { "epoch": 0.9264953726974365, "grad_norm": 0.08571093529462814, "learning_rate": 3.688865147198481e-06, "loss": 0.0193, "step": 125340 }, { "epoch": 0.9265692912687383, "grad_norm": 0.08762447535991669, "learning_rate": 3.685155508072175e-06, "loss": 0.0169, "step": 125350 }, { "epoch": 0.9266432098400402, "grad_norm": 0.0783274844288826, "learning_rate": 3.6814458689458694e-06, "loss": 0.0185, "step": 125360 }, { "epoch": 0.926717128411342, "grad_norm": 0.08024732768535614, "learning_rate": 3.6777362298195632e-06, "loss": 0.0143, "step": 125370 }, { "epoch": 0.9267910469826439, "grad_norm": 0.09043636173009872, "learning_rate": 3.6740265906932575e-06, "loss": 0.0176, "step": 125380 }, { "epoch": 0.9268649655539458, "grad_norm": 0.07826859503984451, "learning_rate": 3.6703169515669518e-06, "loss": 0.0164, "step": 125390 }, { "epoch": 0.9269388841252476, "grad_norm": 0.06709279119968414, "learning_rate": 3.666607312440646e-06, "loss": 0.0175, "step": 125400 }, { "epoch": 0.9270128026965495, "grad_norm": 0.0865020826458931, "learning_rate": 3.6628976733143403e-06, "loss": 0.0167, "step": 125410 }, { "epoch": 0.9270867212678513, "grad_norm": 0.09275317937135696, "learning_rate": 3.6591880341880346e-06, "loss": 0.0169, "step": 125420 }, { "epoch": 0.9271606398391532, "grad_norm": 0.06730522960424423, "learning_rate": 3.655478395061729e-06, "loss": 0.0197, "step": 125430 }, { "epoch": 0.927234558410455, "grad_norm": 0.06249605491757393, "learning_rate": 3.6517687559354223e-06, "loss": 0.015, "step": 125440 }, { "epoch": 0.9273084769817569, "grad_norm": 0.09848795086145401, "learning_rate": 3.6480591168091166e-06, "loss": 0.0178, "step": 125450 }, { "epoch": 0.9273823955530588, "grad_norm": 0.09894700348377228, "learning_rate": 3.644349477682811e-06, "loss": 0.0194, "step": 125460 }, { "epoch": 0.9274563141243606, "grad_norm": 0.0778171494603157, "learning_rate": 3.6406398385565055e-06, "loss": 0.017, "step": 125470 }, { "epoch": 0.9275302326956625, "grad_norm": 0.08636230230331421, "learning_rate": 3.6369301994302e-06, "loss": 0.0192, "step": 125480 }, { "epoch": 0.9276041512669643, "grad_norm": 0.06680939346551895, "learning_rate": 3.633220560303894e-06, "loss": 0.0165, "step": 125490 }, { "epoch": 0.9276780698382662, "grad_norm": 0.08700402081012726, "learning_rate": 3.6295109211775884e-06, "loss": 0.0161, "step": 125500 }, { "epoch": 0.9277519884095681, "grad_norm": 0.07302654534578323, "learning_rate": 3.6258012820512826e-06, "loss": 0.0169, "step": 125510 }, { "epoch": 0.9278259069808699, "grad_norm": 0.08297467976808548, "learning_rate": 3.622091642924976e-06, "loss": 0.0167, "step": 125520 }, { "epoch": 0.9278998255521718, "grad_norm": 0.12444033473730087, "learning_rate": 3.6183820037986703e-06, "loss": 0.0184, "step": 125530 }, { "epoch": 0.9279737441234736, "grad_norm": 0.09066224098205566, "learning_rate": 3.6146723646723646e-06, "loss": 0.0178, "step": 125540 }, { "epoch": 0.9280476626947755, "grad_norm": 0.08981047570705414, "learning_rate": 3.610962725546059e-06, "loss": 0.0184, "step": 125550 }, { "epoch": 0.9281215812660772, "grad_norm": 0.07104262709617615, "learning_rate": 3.607253086419753e-06, "loss": 0.0181, "step": 125560 }, { "epoch": 0.9281954998373791, "grad_norm": 0.08694016188383102, "learning_rate": 3.6035434472934474e-06, "loss": 0.0172, "step": 125570 }, { "epoch": 0.928269418408681, "grad_norm": 0.09042564779520035, "learning_rate": 3.599833808167142e-06, "loss": 0.0177, "step": 125580 }, { "epoch": 0.9283433369799828, "grad_norm": 0.054768797010183334, "learning_rate": 3.5961241690408364e-06, "loss": 0.0163, "step": 125590 }, { "epoch": 0.9284172555512847, "grad_norm": 0.07020504772663116, "learning_rate": 3.59241452991453e-06, "loss": 0.0174, "step": 125600 }, { "epoch": 0.9284911741225865, "grad_norm": 0.08846597373485565, "learning_rate": 3.588704890788224e-06, "loss": 0.0172, "step": 125610 }, { "epoch": 0.9285650926938884, "grad_norm": 0.07999665290117264, "learning_rate": 3.5849952516619184e-06, "loss": 0.0168, "step": 125620 }, { "epoch": 0.9286390112651902, "grad_norm": 0.06280770152807236, "learning_rate": 3.5812856125356126e-06, "loss": 0.0173, "step": 125630 }, { "epoch": 0.9287129298364921, "grad_norm": 0.07466577738523483, "learning_rate": 3.577575973409307e-06, "loss": 0.0164, "step": 125640 }, { "epoch": 0.928786848407794, "grad_norm": 0.0839623361825943, "learning_rate": 3.573866334283001e-06, "loss": 0.0168, "step": 125650 }, { "epoch": 0.9288607669790958, "grad_norm": 0.0837130919098854, "learning_rate": 3.5701566951566954e-06, "loss": 0.0172, "step": 125660 }, { "epoch": 0.9289346855503977, "grad_norm": 0.07399124652147293, "learning_rate": 3.5664470560303893e-06, "loss": 0.0178, "step": 125670 }, { "epoch": 0.9290086041216995, "grad_norm": 0.06902366876602173, "learning_rate": 3.5627374169040836e-06, "loss": 0.0178, "step": 125680 }, { "epoch": 0.9290825226930014, "grad_norm": 0.09093683958053589, "learning_rate": 3.559027777777778e-06, "loss": 0.0161, "step": 125690 }, { "epoch": 0.9291564412643032, "grad_norm": 0.09570712596178055, "learning_rate": 3.555318138651472e-06, "loss": 0.0185, "step": 125700 }, { "epoch": 0.9292303598356051, "grad_norm": 0.07997146993875504, "learning_rate": 3.5516084995251664e-06, "loss": 0.0148, "step": 125710 }, { "epoch": 0.929304278406907, "grad_norm": 0.058948636054992676, "learning_rate": 3.5478988603988607e-06, "loss": 0.0156, "step": 125720 }, { "epoch": 0.9293781969782088, "grad_norm": 0.09420224279165268, "learning_rate": 3.544189221272555e-06, "loss": 0.0148, "step": 125730 }, { "epoch": 0.9294521155495107, "grad_norm": 0.08917292952537537, "learning_rate": 3.540479582146249e-06, "loss": 0.0149, "step": 125740 }, { "epoch": 0.9295260341208125, "grad_norm": 0.058667514473199844, "learning_rate": 3.536769943019943e-06, "loss": 0.0165, "step": 125750 }, { "epoch": 0.9295999526921144, "grad_norm": 0.07369853556156158, "learning_rate": 3.5330603038936373e-06, "loss": 0.0171, "step": 125760 }, { "epoch": 0.9296738712634163, "grad_norm": 0.06561733782291412, "learning_rate": 3.5293506647673316e-06, "loss": 0.0152, "step": 125770 }, { "epoch": 0.9297477898347181, "grad_norm": 0.05797674506902695, "learning_rate": 3.525641025641026e-06, "loss": 0.0175, "step": 125780 }, { "epoch": 0.92982170840602, "grad_norm": 0.09540330618619919, "learning_rate": 3.52193138651472e-06, "loss": 0.0149, "step": 125790 }, { "epoch": 0.9298956269773218, "grad_norm": 0.08474097400903702, "learning_rate": 3.5182217473884144e-06, "loss": 0.0184, "step": 125800 }, { "epoch": 0.9299695455486237, "grad_norm": 0.0823434367775917, "learning_rate": 3.5145121082621087e-06, "loss": 0.0167, "step": 125810 }, { "epoch": 0.9300434641199254, "grad_norm": 0.10081304609775543, "learning_rate": 3.510802469135803e-06, "loss": 0.0173, "step": 125820 }, { "epoch": 0.9301173826912273, "grad_norm": 0.0666218101978302, "learning_rate": 3.5070928300094964e-06, "loss": 0.0164, "step": 125830 }, { "epoch": 0.9301913012625292, "grad_norm": 0.06705756485462189, "learning_rate": 3.5033831908831907e-06, "loss": 0.0147, "step": 125840 }, { "epoch": 0.930265219833831, "grad_norm": 0.0807531476020813, "learning_rate": 3.4996735517568854e-06, "loss": 0.0168, "step": 125850 }, { "epoch": 0.9303391384051329, "grad_norm": 0.08411208540201187, "learning_rate": 3.4959639126305796e-06, "loss": 0.0167, "step": 125860 }, { "epoch": 0.9304130569764347, "grad_norm": 0.085993692278862, "learning_rate": 3.492254273504274e-06, "loss": 0.017, "step": 125870 }, { "epoch": 0.9304869755477366, "grad_norm": 0.07423291355371475, "learning_rate": 3.488544634377968e-06, "loss": 0.0142, "step": 125880 }, { "epoch": 0.9305608941190384, "grad_norm": 0.05742659792304039, "learning_rate": 3.4848349952516625e-06, "loss": 0.0166, "step": 125890 }, { "epoch": 0.9306348126903403, "grad_norm": 0.06945046037435532, "learning_rate": 3.481125356125356e-06, "loss": 0.0193, "step": 125900 }, { "epoch": 0.9307087312616422, "grad_norm": 0.0680810883641243, "learning_rate": 3.47741571699905e-06, "loss": 0.0168, "step": 125910 }, { "epoch": 0.930782649832944, "grad_norm": 0.06685182452201843, "learning_rate": 3.4737060778727444e-06, "loss": 0.0167, "step": 125920 }, { "epoch": 0.9308565684042459, "grad_norm": 0.064430370926857, "learning_rate": 3.4699964387464387e-06, "loss": 0.0164, "step": 125930 }, { "epoch": 0.9309304869755477, "grad_norm": 0.06506548076868057, "learning_rate": 3.466286799620133e-06, "loss": 0.0155, "step": 125940 }, { "epoch": 0.9310044055468496, "grad_norm": 0.08693241328001022, "learning_rate": 3.4625771604938272e-06, "loss": 0.0187, "step": 125950 }, { "epoch": 0.9310783241181514, "grad_norm": 0.10978690534830093, "learning_rate": 3.458867521367522e-06, "loss": 0.0193, "step": 125960 }, { "epoch": 0.9311522426894533, "grad_norm": 0.09855156391859055, "learning_rate": 3.4551578822412162e-06, "loss": 0.0163, "step": 125970 }, { "epoch": 0.9312261612607552, "grad_norm": 0.06699176877737045, "learning_rate": 3.4514482431149096e-06, "loss": 0.0166, "step": 125980 }, { "epoch": 0.931300079832057, "grad_norm": 0.064228355884552, "learning_rate": 3.447738603988604e-06, "loss": 0.0139, "step": 125990 }, { "epoch": 0.9313739984033589, "grad_norm": 0.10097555071115494, "learning_rate": 3.444028964862298e-06, "loss": 0.0181, "step": 126000 }, { "epoch": 0.9314479169746607, "grad_norm": 0.07844749093055725, "learning_rate": 3.4403193257359925e-06, "loss": 0.017, "step": 126010 }, { "epoch": 0.9315218355459626, "grad_norm": 0.06773234158754349, "learning_rate": 3.4366096866096867e-06, "loss": 0.0182, "step": 126020 }, { "epoch": 0.9315957541172645, "grad_norm": 0.08484964072704315, "learning_rate": 3.432900047483381e-06, "loss": 0.0201, "step": 126030 }, { "epoch": 0.9316696726885663, "grad_norm": 0.05464823171496391, "learning_rate": 3.4291904083570753e-06, "loss": 0.0163, "step": 126040 }, { "epoch": 0.9317435912598682, "grad_norm": 0.07915545254945755, "learning_rate": 3.4254807692307695e-06, "loss": 0.0178, "step": 126050 }, { "epoch": 0.93181750983117, "grad_norm": 0.08858298510313034, "learning_rate": 3.4217711301044634e-06, "loss": 0.0167, "step": 126060 }, { "epoch": 0.9318914284024719, "grad_norm": 0.10065144300460815, "learning_rate": 3.4180614909781577e-06, "loss": 0.019, "step": 126070 }, { "epoch": 0.9319653469737736, "grad_norm": 0.04998321086168289, "learning_rate": 3.414351851851852e-06, "loss": 0.0144, "step": 126080 }, { "epoch": 0.9320392655450755, "grad_norm": 0.07834048569202423, "learning_rate": 3.4106422127255462e-06, "loss": 0.0191, "step": 126090 }, { "epoch": 0.9321131841163774, "grad_norm": 0.07724786549806595, "learning_rate": 3.4069325735992405e-06, "loss": 0.016, "step": 126100 }, { "epoch": 0.9321871026876792, "grad_norm": 0.06425752490758896, "learning_rate": 3.4032229344729348e-06, "loss": 0.0164, "step": 126110 }, { "epoch": 0.9322610212589811, "grad_norm": 0.0907018855214119, "learning_rate": 3.399513295346629e-06, "loss": 0.0186, "step": 126120 }, { "epoch": 0.9323349398302829, "grad_norm": 0.09153434634208679, "learning_rate": 3.395803656220323e-06, "loss": 0.0168, "step": 126130 }, { "epoch": 0.9324088584015848, "grad_norm": 0.09171310812234879, "learning_rate": 3.392094017094017e-06, "loss": 0.0177, "step": 126140 }, { "epoch": 0.9324827769728866, "grad_norm": 0.08220788836479187, "learning_rate": 3.3883843779677114e-06, "loss": 0.0167, "step": 126150 }, { "epoch": 0.9325566955441885, "grad_norm": 0.06586333364248276, "learning_rate": 3.3846747388414057e-06, "loss": 0.0159, "step": 126160 }, { "epoch": 0.9326306141154904, "grad_norm": 0.06101555377244949, "learning_rate": 3.3809650997151e-06, "loss": 0.017, "step": 126170 }, { "epoch": 0.9327045326867922, "grad_norm": 0.13610626757144928, "learning_rate": 3.3772554605887943e-06, "loss": 0.0181, "step": 126180 }, { "epoch": 0.9327784512580941, "grad_norm": 0.07935914397239685, "learning_rate": 3.3735458214624885e-06, "loss": 0.017, "step": 126190 }, { "epoch": 0.9328523698293959, "grad_norm": 0.08567145466804504, "learning_rate": 3.369836182336183e-06, "loss": 0.0168, "step": 126200 }, { "epoch": 0.9329262884006978, "grad_norm": 0.09398678690195084, "learning_rate": 3.3661265432098762e-06, "loss": 0.0182, "step": 126210 }, { "epoch": 0.9330002069719996, "grad_norm": 0.07996780425310135, "learning_rate": 3.3624169040835705e-06, "loss": 0.0172, "step": 126220 }, { "epoch": 0.9330741255433015, "grad_norm": 0.08147966116666794, "learning_rate": 3.358707264957265e-06, "loss": 0.0174, "step": 126230 }, { "epoch": 0.9331480441146034, "grad_norm": 0.06722401827573776, "learning_rate": 3.3549976258309595e-06, "loss": 0.014, "step": 126240 }, { "epoch": 0.9332219626859052, "grad_norm": 0.08256746828556061, "learning_rate": 3.3512879867046537e-06, "loss": 0.0161, "step": 126250 }, { "epoch": 0.9332958812572071, "grad_norm": 0.07081350684165955, "learning_rate": 3.347578347578348e-06, "loss": 0.0181, "step": 126260 }, { "epoch": 0.9333697998285089, "grad_norm": 0.06583075225353241, "learning_rate": 3.3438687084520423e-06, "loss": 0.0183, "step": 126270 }, { "epoch": 0.9334437183998108, "grad_norm": 0.0817515179514885, "learning_rate": 3.3401590693257366e-06, "loss": 0.015, "step": 126280 }, { "epoch": 0.9335176369711127, "grad_norm": 0.08005262911319733, "learning_rate": 3.33644943019943e-06, "loss": 0.0171, "step": 126290 }, { "epoch": 0.9335915555424145, "grad_norm": 0.09398916363716125, "learning_rate": 3.3327397910731243e-06, "loss": 0.0178, "step": 126300 }, { "epoch": 0.9336654741137164, "grad_norm": 0.07812084257602692, "learning_rate": 3.3290301519468185e-06, "loss": 0.0176, "step": 126310 }, { "epoch": 0.9337393926850182, "grad_norm": 0.06297401338815689, "learning_rate": 3.325320512820513e-06, "loss": 0.019, "step": 126320 }, { "epoch": 0.9338133112563201, "grad_norm": 0.054416362196207047, "learning_rate": 3.321610873694207e-06, "loss": 0.016, "step": 126330 }, { "epoch": 0.9338872298276218, "grad_norm": 0.08170922100543976, "learning_rate": 3.3179012345679013e-06, "loss": 0.017, "step": 126340 }, { "epoch": 0.9339611483989237, "grad_norm": 0.0713760256767273, "learning_rate": 3.314191595441596e-06, "loss": 0.0174, "step": 126350 }, { "epoch": 0.9340350669702256, "grad_norm": 0.10220906138420105, "learning_rate": 3.3104819563152903e-06, "loss": 0.0152, "step": 126360 }, { "epoch": 0.9341089855415274, "grad_norm": 0.081649549305439, "learning_rate": 3.3067723171889837e-06, "loss": 0.018, "step": 126370 }, { "epoch": 0.9341829041128293, "grad_norm": 0.09127277880907059, "learning_rate": 3.303062678062678e-06, "loss": 0.0169, "step": 126380 }, { "epoch": 0.9342568226841311, "grad_norm": 0.08659278601408005, "learning_rate": 3.2993530389363723e-06, "loss": 0.016, "step": 126390 }, { "epoch": 0.934330741255433, "grad_norm": 0.08412818610668182, "learning_rate": 3.2956433998100666e-06, "loss": 0.0161, "step": 126400 }, { "epoch": 0.9344046598267348, "grad_norm": 0.074937604367733, "learning_rate": 3.291933760683761e-06, "loss": 0.0147, "step": 126410 }, { "epoch": 0.9344785783980367, "grad_norm": 0.09494459629058838, "learning_rate": 3.288224121557455e-06, "loss": 0.015, "step": 126420 }, { "epoch": 0.9345524969693386, "grad_norm": 0.06063415855169296, "learning_rate": 3.2845144824311494e-06, "loss": 0.0139, "step": 126430 }, { "epoch": 0.9346264155406404, "grad_norm": 0.06621581315994263, "learning_rate": 3.2808048433048432e-06, "loss": 0.0155, "step": 126440 }, { "epoch": 0.9347003341119423, "grad_norm": 0.08298254758119583, "learning_rate": 3.2770952041785375e-06, "loss": 0.0153, "step": 126450 }, { "epoch": 0.9347742526832441, "grad_norm": 0.09773992747068405, "learning_rate": 3.2733855650522318e-06, "loss": 0.0157, "step": 126460 }, { "epoch": 0.934848171254546, "grad_norm": 0.07338134944438934, "learning_rate": 3.269675925925926e-06, "loss": 0.0192, "step": 126470 }, { "epoch": 0.9349220898258478, "grad_norm": 0.06597955524921417, "learning_rate": 3.2659662867996203e-06, "loss": 0.0197, "step": 126480 }, { "epoch": 0.9349960083971497, "grad_norm": 0.09893263131380081, "learning_rate": 3.2622566476733146e-06, "loss": 0.015, "step": 126490 }, { "epoch": 0.9350699269684516, "grad_norm": 0.0914556086063385, "learning_rate": 3.258547008547009e-06, "loss": 0.0168, "step": 126500 }, { "epoch": 0.9351438455397534, "grad_norm": 0.07962629944086075, "learning_rate": 3.254837369420703e-06, "loss": 0.0177, "step": 126510 }, { "epoch": 0.9352177641110553, "grad_norm": 0.08136551082134247, "learning_rate": 3.251127730294397e-06, "loss": 0.0176, "step": 126520 }, { "epoch": 0.9352916826823571, "grad_norm": 0.07951479405164719, "learning_rate": 3.2474180911680913e-06, "loss": 0.0183, "step": 126530 }, { "epoch": 0.935365601253659, "grad_norm": 0.06223934143781662, "learning_rate": 3.2437084520417855e-06, "loss": 0.015, "step": 126540 }, { "epoch": 0.9354395198249609, "grad_norm": 0.07506666332483292, "learning_rate": 3.23999881291548e-06, "loss": 0.0171, "step": 126550 }, { "epoch": 0.9355134383962627, "grad_norm": 0.10078860819339752, "learning_rate": 3.236289173789174e-06, "loss": 0.0139, "step": 126560 }, { "epoch": 0.9355873569675646, "grad_norm": 0.0667853131890297, "learning_rate": 3.2325795346628684e-06, "loss": 0.0168, "step": 126570 }, { "epoch": 0.9356612755388664, "grad_norm": 0.09738816320896149, "learning_rate": 3.2288698955365626e-06, "loss": 0.0165, "step": 126580 }, { "epoch": 0.9357351941101683, "grad_norm": 0.0639905333518982, "learning_rate": 3.225160256410257e-06, "loss": 0.0186, "step": 126590 }, { "epoch": 0.93580911268147, "grad_norm": 0.08258546143770218, "learning_rate": 3.2214506172839503e-06, "loss": 0.017, "step": 126600 }, { "epoch": 0.935883031252772, "grad_norm": 0.08823540061712265, "learning_rate": 3.2177409781576446e-06, "loss": 0.0199, "step": 126610 }, { "epoch": 0.9359569498240738, "grad_norm": 0.09814473241567612, "learning_rate": 3.2140313390313393e-06, "loss": 0.016, "step": 126620 }, { "epoch": 0.9360308683953756, "grad_norm": 0.07945670187473297, "learning_rate": 3.2103216999050336e-06, "loss": 0.0157, "step": 126630 }, { "epoch": 0.9361047869666775, "grad_norm": 0.098643459379673, "learning_rate": 3.206612060778728e-06, "loss": 0.0163, "step": 126640 }, { "epoch": 0.9361787055379793, "grad_norm": 0.08727847039699554, "learning_rate": 3.202902421652422e-06, "loss": 0.0185, "step": 126650 }, { "epoch": 0.9362526241092812, "grad_norm": 0.07201675325632095, "learning_rate": 3.1991927825261164e-06, "loss": 0.015, "step": 126660 }, { "epoch": 0.936326542680583, "grad_norm": 0.07368409633636475, "learning_rate": 3.19548314339981e-06, "loss": 0.0191, "step": 126670 }, { "epoch": 0.9364004612518849, "grad_norm": 0.09178756922483444, "learning_rate": 3.191773504273504e-06, "loss": 0.0182, "step": 126680 }, { "epoch": 0.9364743798231868, "grad_norm": 0.05671729892492294, "learning_rate": 3.1880638651471984e-06, "loss": 0.0155, "step": 126690 }, { "epoch": 0.9365482983944886, "grad_norm": 0.07169840484857559, "learning_rate": 3.1843542260208926e-06, "loss": 0.0169, "step": 126700 }, { "epoch": 0.9366222169657905, "grad_norm": 0.08545159548521042, "learning_rate": 3.180644586894587e-06, "loss": 0.0157, "step": 126710 }, { "epoch": 0.9366961355370923, "grad_norm": 0.0851563885807991, "learning_rate": 3.176934947768281e-06, "loss": 0.0168, "step": 126720 }, { "epoch": 0.9367700541083942, "grad_norm": 0.06378570944070816, "learning_rate": 3.173225308641976e-06, "loss": 0.0191, "step": 126730 }, { "epoch": 0.9368439726796961, "grad_norm": 0.07919839024543762, "learning_rate": 3.16951566951567e-06, "loss": 0.0187, "step": 126740 }, { "epoch": 0.9369178912509979, "grad_norm": 0.0702909529209137, "learning_rate": 3.1658060303893636e-06, "loss": 0.0143, "step": 126750 }, { "epoch": 0.9369918098222998, "grad_norm": 0.07548509538173676, "learning_rate": 3.162096391263058e-06, "loss": 0.0173, "step": 126760 }, { "epoch": 0.9370657283936016, "grad_norm": 0.09032569825649261, "learning_rate": 3.158386752136752e-06, "loss": 0.0203, "step": 126770 }, { "epoch": 0.9371396469649035, "grad_norm": 0.06742248684167862, "learning_rate": 3.1546771130104464e-06, "loss": 0.0161, "step": 126780 }, { "epoch": 0.9372135655362053, "grad_norm": 0.07519455999135971, "learning_rate": 3.1509674738841407e-06, "loss": 0.0172, "step": 126790 }, { "epoch": 0.9372874841075072, "grad_norm": 0.09951967000961304, "learning_rate": 3.147257834757835e-06, "loss": 0.0173, "step": 126800 }, { "epoch": 0.9373614026788091, "grad_norm": 0.11275292932987213, "learning_rate": 3.143548195631529e-06, "loss": 0.0186, "step": 126810 }, { "epoch": 0.9374353212501109, "grad_norm": 0.06716936826705933, "learning_rate": 3.1398385565052235e-06, "loss": 0.0174, "step": 126820 }, { "epoch": 0.9375092398214128, "grad_norm": 0.07388816773891449, "learning_rate": 3.1361289173789173e-06, "loss": 0.0173, "step": 126830 }, { "epoch": 0.9375831583927146, "grad_norm": 0.07803674042224884, "learning_rate": 3.1324192782526116e-06, "loss": 0.0186, "step": 126840 }, { "epoch": 0.9376570769640165, "grad_norm": 0.084476538002491, "learning_rate": 3.128709639126306e-06, "loss": 0.0174, "step": 126850 }, { "epoch": 0.9377309955353182, "grad_norm": 0.08462337404489517, "learning_rate": 3.125e-06, "loss": 0.0172, "step": 126860 }, { "epoch": 0.9378049141066201, "grad_norm": 0.09327008575201035, "learning_rate": 3.1212903608736944e-06, "loss": 0.0182, "step": 126870 }, { "epoch": 0.937878832677922, "grad_norm": 0.07979398220777512, "learning_rate": 3.1175807217473883e-06, "loss": 0.0163, "step": 126880 }, { "epoch": 0.9379527512492238, "grad_norm": 0.09190664440393448, "learning_rate": 3.1138710826210825e-06, "loss": 0.0177, "step": 126890 }, { "epoch": 0.9380266698205257, "grad_norm": 0.07389857620000839, "learning_rate": 3.110161443494777e-06, "loss": 0.0176, "step": 126900 }, { "epoch": 0.9381005883918275, "grad_norm": 0.06767547875642776, "learning_rate": 3.106451804368471e-06, "loss": 0.0154, "step": 126910 }, { "epoch": 0.9381745069631294, "grad_norm": 0.10601852089166641, "learning_rate": 3.1027421652421654e-06, "loss": 0.018, "step": 126920 }, { "epoch": 0.9382484255344312, "grad_norm": 0.06298653781414032, "learning_rate": 3.0990325261158596e-06, "loss": 0.0143, "step": 126930 }, { "epoch": 0.9383223441057331, "grad_norm": 0.06366831809282303, "learning_rate": 3.095322886989554e-06, "loss": 0.0147, "step": 126940 }, { "epoch": 0.938396262677035, "grad_norm": 0.06097415089607239, "learning_rate": 3.091613247863248e-06, "loss": 0.0149, "step": 126950 }, { "epoch": 0.9384701812483368, "grad_norm": 0.04413948208093643, "learning_rate": 3.087903608736942e-06, "loss": 0.0144, "step": 126960 }, { "epoch": 0.9385440998196387, "grad_norm": 0.07926429063081741, "learning_rate": 3.0841939696106363e-06, "loss": 0.0185, "step": 126970 }, { "epoch": 0.9386180183909405, "grad_norm": 0.1001443862915039, "learning_rate": 3.0804843304843306e-06, "loss": 0.0161, "step": 126980 }, { "epoch": 0.9386919369622424, "grad_norm": 0.11276740580797195, "learning_rate": 3.076774691358025e-06, "loss": 0.0169, "step": 126990 }, { "epoch": 0.9387658555335443, "grad_norm": 0.087139792740345, "learning_rate": 3.073065052231719e-06, "loss": 0.018, "step": 127000 }, { "epoch": 0.9388397741048461, "grad_norm": 0.06877847015857697, "learning_rate": 3.0693554131054134e-06, "loss": 0.0169, "step": 127010 }, { "epoch": 0.938913692676148, "grad_norm": 0.09786475449800491, "learning_rate": 3.0656457739791077e-06, "loss": 0.0156, "step": 127020 }, { "epoch": 0.9389876112474498, "grad_norm": 0.06537863612174988, "learning_rate": 3.061936134852802e-06, "loss": 0.0167, "step": 127030 }, { "epoch": 0.9390615298187517, "grad_norm": 0.06823769956827164, "learning_rate": 3.058226495726496e-06, "loss": 0.0177, "step": 127040 }, { "epoch": 0.9391354483900535, "grad_norm": 0.12075066566467285, "learning_rate": 3.05451685660019e-06, "loss": 0.0186, "step": 127050 }, { "epoch": 0.9392093669613554, "grad_norm": 0.06154416501522064, "learning_rate": 3.0508072174738843e-06, "loss": 0.0178, "step": 127060 }, { "epoch": 0.9392832855326573, "grad_norm": 0.0991363450884819, "learning_rate": 3.0470975783475786e-06, "loss": 0.0163, "step": 127070 }, { "epoch": 0.9393572041039591, "grad_norm": 0.07791083306074142, "learning_rate": 3.0433879392212725e-06, "loss": 0.0185, "step": 127080 }, { "epoch": 0.939431122675261, "grad_norm": 0.07921697944402695, "learning_rate": 3.0396783000949667e-06, "loss": 0.0163, "step": 127090 }, { "epoch": 0.9395050412465628, "grad_norm": 0.07544318586587906, "learning_rate": 3.035968660968661e-06, "loss": 0.0154, "step": 127100 }, { "epoch": 0.9395789598178647, "grad_norm": 0.06722431629896164, "learning_rate": 3.0322590218423553e-06, "loss": 0.0187, "step": 127110 }, { "epoch": 0.9396528783891664, "grad_norm": 0.06898238509893417, "learning_rate": 3.0285493827160496e-06, "loss": 0.0174, "step": 127120 }, { "epoch": 0.9397267969604683, "grad_norm": 0.10192032903432846, "learning_rate": 3.024839743589744e-06, "loss": 0.0182, "step": 127130 }, { "epoch": 0.9398007155317702, "grad_norm": 0.0944291204214096, "learning_rate": 3.021130104463438e-06, "loss": 0.0161, "step": 127140 }, { "epoch": 0.939874634103072, "grad_norm": 0.05713077634572983, "learning_rate": 3.017420465337132e-06, "loss": 0.0159, "step": 127150 }, { "epoch": 0.9399485526743739, "grad_norm": 0.09956385940313339, "learning_rate": 3.0137108262108262e-06, "loss": 0.0168, "step": 127160 }, { "epoch": 0.9400224712456757, "grad_norm": 0.05738692730665207, "learning_rate": 3.0100011870845205e-06, "loss": 0.0179, "step": 127170 }, { "epoch": 0.9400963898169776, "grad_norm": 0.11991485953330994, "learning_rate": 3.0062915479582148e-06, "loss": 0.0158, "step": 127180 }, { "epoch": 0.9401703083882794, "grad_norm": 0.07158111780881882, "learning_rate": 3.002581908831909e-06, "loss": 0.0162, "step": 127190 }, { "epoch": 0.9402442269595813, "grad_norm": 0.08031366765499115, "learning_rate": 2.9988722697056033e-06, "loss": 0.0188, "step": 127200 }, { "epoch": 0.9403181455308832, "grad_norm": 0.060922037810087204, "learning_rate": 2.9951626305792976e-06, "loss": 0.0164, "step": 127210 }, { "epoch": 0.940392064102185, "grad_norm": 0.0666850134730339, "learning_rate": 2.991452991452992e-06, "loss": 0.0172, "step": 127220 }, { "epoch": 0.9404659826734869, "grad_norm": 0.06392525881528854, "learning_rate": 2.9877433523266857e-06, "loss": 0.0155, "step": 127230 }, { "epoch": 0.9405399012447887, "grad_norm": 0.07114558666944504, "learning_rate": 2.98403371320038e-06, "loss": 0.0167, "step": 127240 }, { "epoch": 0.9406138198160906, "grad_norm": 0.10076846182346344, "learning_rate": 2.9803240740740743e-06, "loss": 0.0204, "step": 127250 }, { "epoch": 0.9406877383873925, "grad_norm": 0.08315786719322205, "learning_rate": 2.9766144349477685e-06, "loss": 0.0152, "step": 127260 }, { "epoch": 0.9407616569586943, "grad_norm": 0.07295054942369461, "learning_rate": 2.9729047958214624e-06, "loss": 0.0159, "step": 127270 }, { "epoch": 0.9408355755299962, "grad_norm": 0.07902075350284576, "learning_rate": 2.9691951566951566e-06, "loss": 0.0185, "step": 127280 }, { "epoch": 0.940909494101298, "grad_norm": 0.08938170969486237, "learning_rate": 2.965485517568851e-06, "loss": 0.0179, "step": 127290 }, { "epoch": 0.9409834126725999, "grad_norm": 0.07118421047925949, "learning_rate": 2.9617758784425456e-06, "loss": 0.0178, "step": 127300 }, { "epoch": 0.9410573312439017, "grad_norm": 0.09154824167490005, "learning_rate": 2.9580662393162395e-06, "loss": 0.018, "step": 127310 }, { "epoch": 0.9411312498152036, "grad_norm": 0.058051031082868576, "learning_rate": 2.9543566001899337e-06, "loss": 0.0185, "step": 127320 }, { "epoch": 0.9412051683865055, "grad_norm": 0.058608926832675934, "learning_rate": 2.950646961063628e-06, "loss": 0.0167, "step": 127330 }, { "epoch": 0.9412790869578073, "grad_norm": 0.08225753158330917, "learning_rate": 2.9469373219373223e-06, "loss": 0.0158, "step": 127340 }, { "epoch": 0.9413530055291092, "grad_norm": 0.06874485313892365, "learning_rate": 2.943227682811016e-06, "loss": 0.018, "step": 127350 }, { "epoch": 0.941426924100411, "grad_norm": 0.09509554505348206, "learning_rate": 2.9395180436847104e-06, "loss": 0.0172, "step": 127360 }, { "epoch": 0.9415008426717129, "grad_norm": 0.06457925587892532, "learning_rate": 2.9358084045584047e-06, "loss": 0.0183, "step": 127370 }, { "epoch": 0.9415747612430146, "grad_norm": 0.09607307612895966, "learning_rate": 2.932098765432099e-06, "loss": 0.0164, "step": 127380 }, { "epoch": 0.9416486798143165, "grad_norm": 0.08715007454156876, "learning_rate": 2.9283891263057932e-06, "loss": 0.0181, "step": 127390 }, { "epoch": 0.9417225983856184, "grad_norm": 0.10236938297748566, "learning_rate": 2.9246794871794875e-06, "loss": 0.0172, "step": 127400 }, { "epoch": 0.9417965169569202, "grad_norm": 0.11540023237466812, "learning_rate": 2.9209698480531818e-06, "loss": 0.0185, "step": 127410 }, { "epoch": 0.9418704355282221, "grad_norm": 0.06768453121185303, "learning_rate": 2.9172602089268756e-06, "loss": 0.0185, "step": 127420 }, { "epoch": 0.9419443540995239, "grad_norm": 0.05372827127575874, "learning_rate": 2.91355056980057e-06, "loss": 0.0144, "step": 127430 }, { "epoch": 0.9420182726708258, "grad_norm": 0.05831387639045715, "learning_rate": 2.909840930674264e-06, "loss": 0.0154, "step": 127440 }, { "epoch": 0.9420921912421276, "grad_norm": 0.0981128066778183, "learning_rate": 2.9061312915479584e-06, "loss": 0.0166, "step": 127450 }, { "epoch": 0.9421661098134295, "grad_norm": 0.07359275966882706, "learning_rate": 2.9024216524216523e-06, "loss": 0.0171, "step": 127460 }, { "epoch": 0.9422400283847314, "grad_norm": 0.06113864481449127, "learning_rate": 2.8987120132953466e-06, "loss": 0.0167, "step": 127470 }, { "epoch": 0.9423139469560332, "grad_norm": 0.053697239607572556, "learning_rate": 2.895002374169041e-06, "loss": 0.015, "step": 127480 }, { "epoch": 0.9423878655273351, "grad_norm": 0.07835987955331802, "learning_rate": 2.891292735042735e-06, "loss": 0.0156, "step": 127490 }, { "epoch": 0.9424617840986369, "grad_norm": 0.0714726373553276, "learning_rate": 2.8875830959164294e-06, "loss": 0.0173, "step": 127500 }, { "epoch": 0.9425357026699388, "grad_norm": 0.08675273507833481, "learning_rate": 2.8838734567901237e-06, "loss": 0.0148, "step": 127510 }, { "epoch": 0.9426096212412407, "grad_norm": 0.05766710638999939, "learning_rate": 2.880163817663818e-06, "loss": 0.0156, "step": 127520 }, { "epoch": 0.9426835398125425, "grad_norm": 0.07460221648216248, "learning_rate": 2.876454178537512e-06, "loss": 0.0168, "step": 127530 }, { "epoch": 0.9427574583838444, "grad_norm": 0.0762360617518425, "learning_rate": 2.872744539411206e-06, "loss": 0.0157, "step": 127540 }, { "epoch": 0.9428313769551462, "grad_norm": 0.06541568040847778, "learning_rate": 2.8690349002849003e-06, "loss": 0.0171, "step": 127550 }, { "epoch": 0.9429052955264481, "grad_norm": 0.07995308935642242, "learning_rate": 2.8653252611585946e-06, "loss": 0.0171, "step": 127560 }, { "epoch": 0.9429792140977499, "grad_norm": 0.08494476228952408, "learning_rate": 2.861615622032289e-06, "loss": 0.017, "step": 127570 }, { "epoch": 0.9430531326690518, "grad_norm": 0.05350199714303017, "learning_rate": 2.857905982905983e-06, "loss": 0.0174, "step": 127580 }, { "epoch": 0.9431270512403537, "grad_norm": 0.08649630844593048, "learning_rate": 2.8541963437796774e-06, "loss": 0.0187, "step": 127590 }, { "epoch": 0.9432009698116555, "grad_norm": 0.07305306941270828, "learning_rate": 2.8504867046533717e-06, "loss": 0.0143, "step": 127600 }, { "epoch": 0.9432748883829574, "grad_norm": 0.075900137424469, "learning_rate": 2.8467770655270655e-06, "loss": 0.0157, "step": 127610 }, { "epoch": 0.9433488069542592, "grad_norm": 0.05308043584227562, "learning_rate": 2.84306742640076e-06, "loss": 0.0152, "step": 127620 }, { "epoch": 0.9434227255255611, "grad_norm": 0.08873714506626129, "learning_rate": 2.839357787274454e-06, "loss": 0.0179, "step": 127630 }, { "epoch": 0.9434966440968628, "grad_norm": 0.08229392766952515, "learning_rate": 2.8356481481481484e-06, "loss": 0.019, "step": 127640 }, { "epoch": 0.9435705626681647, "grad_norm": 0.09823070466518402, "learning_rate": 2.831938509021842e-06, "loss": 0.0156, "step": 127650 }, { "epoch": 0.9436444812394666, "grad_norm": 0.07842585444450378, "learning_rate": 2.8282288698955365e-06, "loss": 0.0167, "step": 127660 }, { "epoch": 0.9437183998107684, "grad_norm": 0.08402416855096817, "learning_rate": 2.8245192307692307e-06, "loss": 0.0174, "step": 127670 }, { "epoch": 0.9437923183820703, "grad_norm": 0.07629430294036865, "learning_rate": 2.820809591642925e-06, "loss": 0.0154, "step": 127680 }, { "epoch": 0.9438662369533721, "grad_norm": 0.10126028209924698, "learning_rate": 2.8170999525166193e-06, "loss": 0.0165, "step": 127690 }, { "epoch": 0.943940155524674, "grad_norm": 0.10771343111991882, "learning_rate": 2.8133903133903136e-06, "loss": 0.0172, "step": 127700 }, { "epoch": 0.9440140740959758, "grad_norm": 0.11257972568273544, "learning_rate": 2.809680674264008e-06, "loss": 0.017, "step": 127710 }, { "epoch": 0.9440879926672777, "grad_norm": 0.06903264671564102, "learning_rate": 2.805971035137702e-06, "loss": 0.0179, "step": 127720 }, { "epoch": 0.9441619112385796, "grad_norm": 0.07695875316858292, "learning_rate": 2.802261396011396e-06, "loss": 0.0153, "step": 127730 }, { "epoch": 0.9442358298098814, "grad_norm": 0.06173216924071312, "learning_rate": 2.7985517568850902e-06, "loss": 0.0183, "step": 127740 }, { "epoch": 0.9443097483811833, "grad_norm": 0.06688038259744644, "learning_rate": 2.7948421177587845e-06, "loss": 0.0167, "step": 127750 }, { "epoch": 0.9443836669524851, "grad_norm": 0.08888844400644302, "learning_rate": 2.7911324786324788e-06, "loss": 0.0177, "step": 127760 }, { "epoch": 0.944457585523787, "grad_norm": 0.106315016746521, "learning_rate": 2.787422839506173e-06, "loss": 0.0185, "step": 127770 }, { "epoch": 0.9445315040950889, "grad_norm": 0.08033721894025803, "learning_rate": 2.7837132003798673e-06, "loss": 0.0162, "step": 127780 }, { "epoch": 0.9446054226663907, "grad_norm": 0.0840701088309288, "learning_rate": 2.7800035612535616e-06, "loss": 0.0197, "step": 127790 }, { "epoch": 0.9446793412376926, "grad_norm": 0.060849256813526154, "learning_rate": 2.776293922127256e-06, "loss": 0.0177, "step": 127800 }, { "epoch": 0.9447532598089944, "grad_norm": 0.08838897198438644, "learning_rate": 2.7725842830009497e-06, "loss": 0.0153, "step": 127810 }, { "epoch": 0.9448271783802963, "grad_norm": 0.06309714913368225, "learning_rate": 2.768874643874644e-06, "loss": 0.0161, "step": 127820 }, { "epoch": 0.9449010969515981, "grad_norm": 0.0807114914059639, "learning_rate": 2.7651650047483383e-06, "loss": 0.0151, "step": 127830 }, { "epoch": 0.9449750155229, "grad_norm": 0.06764519959688187, "learning_rate": 2.761455365622032e-06, "loss": 0.0162, "step": 127840 }, { "epoch": 0.9450489340942019, "grad_norm": 0.09021104127168655, "learning_rate": 2.7577457264957264e-06, "loss": 0.0171, "step": 127850 }, { "epoch": 0.9451228526655037, "grad_norm": 0.06262607127428055, "learning_rate": 2.7540360873694207e-06, "loss": 0.0194, "step": 127860 }, { "epoch": 0.9451967712368056, "grad_norm": 0.08771193772554398, "learning_rate": 2.750326448243115e-06, "loss": 0.0138, "step": 127870 }, { "epoch": 0.9452706898081074, "grad_norm": 0.10063749551773071, "learning_rate": 2.746616809116809e-06, "loss": 0.0216, "step": 127880 }, { "epoch": 0.9453446083794093, "grad_norm": 0.07008014619350433, "learning_rate": 2.7429071699905035e-06, "loss": 0.016, "step": 127890 }, { "epoch": 0.945418526950711, "grad_norm": 0.06852864474058151, "learning_rate": 2.7391975308641978e-06, "loss": 0.0171, "step": 127900 }, { "epoch": 0.945492445522013, "grad_norm": 0.056721948087215424, "learning_rate": 2.735487891737892e-06, "loss": 0.0145, "step": 127910 }, { "epoch": 0.9455663640933148, "grad_norm": 0.06805742532014847, "learning_rate": 2.731778252611586e-06, "loss": 0.0166, "step": 127920 }, { "epoch": 0.9456402826646166, "grad_norm": 0.06866194307804108, "learning_rate": 2.72806861348528e-06, "loss": 0.0179, "step": 127930 }, { "epoch": 0.9457142012359185, "grad_norm": 0.12696869671344757, "learning_rate": 2.7243589743589744e-06, "loss": 0.0174, "step": 127940 }, { "epoch": 0.9457881198072203, "grad_norm": 0.05472124367952347, "learning_rate": 2.7206493352326687e-06, "loss": 0.014, "step": 127950 }, { "epoch": 0.9458620383785222, "grad_norm": 0.08839566260576248, "learning_rate": 2.716939696106363e-06, "loss": 0.0159, "step": 127960 }, { "epoch": 0.945935956949824, "grad_norm": 0.09860622137784958, "learning_rate": 2.7132300569800572e-06, "loss": 0.02, "step": 127970 }, { "epoch": 0.9460098755211259, "grad_norm": 0.07679364830255508, "learning_rate": 2.7095204178537515e-06, "loss": 0.0189, "step": 127980 }, { "epoch": 0.9460837940924278, "grad_norm": 0.07669613510370255, "learning_rate": 2.705810778727446e-06, "loss": 0.0168, "step": 127990 }, { "epoch": 0.9461577126637296, "grad_norm": 0.13627739250659943, "learning_rate": 2.7021011396011396e-06, "loss": 0.0178, "step": 128000 }, { "epoch": 0.9462316312350315, "grad_norm": 0.09532894939184189, "learning_rate": 2.698391500474834e-06, "loss": 0.0186, "step": 128010 }, { "epoch": 0.9463055498063333, "grad_norm": 0.07109887897968292, "learning_rate": 2.694681861348528e-06, "loss": 0.0175, "step": 128020 }, { "epoch": 0.9463794683776352, "grad_norm": 0.08016806095838547, "learning_rate": 2.6909722222222225e-06, "loss": 0.0161, "step": 128030 }, { "epoch": 0.9464533869489371, "grad_norm": 0.10615555942058563, "learning_rate": 2.6872625830959163e-06, "loss": 0.0143, "step": 128040 }, { "epoch": 0.9465273055202389, "grad_norm": 0.0732378363609314, "learning_rate": 2.6835529439696106e-06, "loss": 0.0156, "step": 128050 }, { "epoch": 0.9466012240915408, "grad_norm": 0.06836659461259842, "learning_rate": 2.679843304843305e-06, "loss": 0.0177, "step": 128060 }, { "epoch": 0.9466751426628426, "grad_norm": 0.06310658156871796, "learning_rate": 2.6761336657169995e-06, "loss": 0.0178, "step": 128070 }, { "epoch": 0.9467490612341445, "grad_norm": 0.07476349174976349, "learning_rate": 2.6724240265906934e-06, "loss": 0.0153, "step": 128080 }, { "epoch": 0.9468229798054463, "grad_norm": 0.061532557010650635, "learning_rate": 2.6687143874643877e-06, "loss": 0.0154, "step": 128090 }, { "epoch": 0.9468968983767482, "grad_norm": 0.07229252904653549, "learning_rate": 2.665004748338082e-06, "loss": 0.0164, "step": 128100 }, { "epoch": 0.9469708169480501, "grad_norm": 0.09586562216281891, "learning_rate": 2.661295109211776e-06, "loss": 0.0187, "step": 128110 }, { "epoch": 0.9470447355193519, "grad_norm": 0.06675871461629868, "learning_rate": 2.65758547008547e-06, "loss": 0.0172, "step": 128120 }, { "epoch": 0.9471186540906538, "grad_norm": 0.05785641446709633, "learning_rate": 2.6538758309591643e-06, "loss": 0.0165, "step": 128130 }, { "epoch": 0.9471925726619556, "grad_norm": 0.08318644762039185, "learning_rate": 2.6501661918328586e-06, "loss": 0.0164, "step": 128140 }, { "epoch": 0.9472664912332575, "grad_norm": 0.08673836290836334, "learning_rate": 2.646456552706553e-06, "loss": 0.0183, "step": 128150 }, { "epoch": 0.9473404098045592, "grad_norm": 0.07311297208070755, "learning_rate": 2.642746913580247e-06, "loss": 0.0173, "step": 128160 }, { "epoch": 0.9474143283758611, "grad_norm": 0.09003617614507675, "learning_rate": 2.6390372744539414e-06, "loss": 0.0186, "step": 128170 }, { "epoch": 0.947488246947163, "grad_norm": 0.059183746576309204, "learning_rate": 2.6353276353276357e-06, "loss": 0.0159, "step": 128180 }, { "epoch": 0.9475621655184648, "grad_norm": 0.0824420377612114, "learning_rate": 2.6316179962013296e-06, "loss": 0.017, "step": 128190 }, { "epoch": 0.9476360840897667, "grad_norm": 0.07933705300092697, "learning_rate": 2.627908357075024e-06, "loss": 0.0156, "step": 128200 }, { "epoch": 0.9477100026610685, "grad_norm": 0.09012126922607422, "learning_rate": 2.624198717948718e-06, "loss": 0.02, "step": 128210 }, { "epoch": 0.9477839212323704, "grad_norm": 0.0675535500049591, "learning_rate": 2.6204890788224124e-06, "loss": 0.0142, "step": 128220 }, { "epoch": 0.9478578398036722, "grad_norm": 0.08034668117761612, "learning_rate": 2.6167794396961062e-06, "loss": 0.0162, "step": 128230 }, { "epoch": 0.9479317583749741, "grad_norm": 0.06311357021331787, "learning_rate": 2.6130698005698005e-06, "loss": 0.0191, "step": 128240 }, { "epoch": 0.948005676946276, "grad_norm": 0.0800691470503807, "learning_rate": 2.6093601614434948e-06, "loss": 0.0148, "step": 128250 }, { "epoch": 0.9480795955175778, "grad_norm": 0.10849236696958542, "learning_rate": 2.6056505223171895e-06, "loss": 0.0168, "step": 128260 }, { "epoch": 0.9481535140888797, "grad_norm": 0.08853597939014435, "learning_rate": 2.6019408831908833e-06, "loss": 0.0182, "step": 128270 }, { "epoch": 0.9482274326601815, "grad_norm": 0.09330402314662933, "learning_rate": 2.5982312440645776e-06, "loss": 0.0138, "step": 128280 }, { "epoch": 0.9483013512314834, "grad_norm": 0.06071847304701805, "learning_rate": 2.594521604938272e-06, "loss": 0.0162, "step": 128290 }, { "epoch": 0.9483752698027853, "grad_norm": 0.07208188623189926, "learning_rate": 2.590811965811966e-06, "loss": 0.0166, "step": 128300 }, { "epoch": 0.9484491883740871, "grad_norm": 0.0893167182803154, "learning_rate": 2.58710232668566e-06, "loss": 0.0155, "step": 128310 }, { "epoch": 0.948523106945389, "grad_norm": 0.07082314789295197, "learning_rate": 2.5833926875593543e-06, "loss": 0.0176, "step": 128320 }, { "epoch": 0.9485970255166908, "grad_norm": 0.08515604585409164, "learning_rate": 2.5796830484330485e-06, "loss": 0.0191, "step": 128330 }, { "epoch": 0.9486709440879927, "grad_norm": 0.11537264287471771, "learning_rate": 2.575973409306743e-06, "loss": 0.017, "step": 128340 }, { "epoch": 0.9487448626592945, "grad_norm": 0.07748724520206451, "learning_rate": 2.572263770180437e-06, "loss": 0.02, "step": 128350 }, { "epoch": 0.9488187812305964, "grad_norm": 0.06140293553471565, "learning_rate": 2.5685541310541313e-06, "loss": 0.0162, "step": 128360 }, { "epoch": 0.9488926998018983, "grad_norm": 0.08523867279291153, "learning_rate": 2.5648444919278256e-06, "loss": 0.0177, "step": 128370 }, { "epoch": 0.9489666183732001, "grad_norm": 0.06785237044095993, "learning_rate": 2.5611348528015195e-06, "loss": 0.0171, "step": 128380 }, { "epoch": 0.949040536944502, "grad_norm": 0.07030639052391052, "learning_rate": 2.5574252136752137e-06, "loss": 0.0174, "step": 128390 }, { "epoch": 0.9491144555158038, "grad_norm": 0.06864111125469208, "learning_rate": 2.553715574548908e-06, "loss": 0.0193, "step": 128400 }, { "epoch": 0.9491883740871057, "grad_norm": 0.09462801367044449, "learning_rate": 2.5500059354226023e-06, "loss": 0.0157, "step": 128410 }, { "epoch": 0.9492622926584074, "grad_norm": 0.08532844483852386, "learning_rate": 2.546296296296296e-06, "loss": 0.0181, "step": 128420 }, { "epoch": 0.9493362112297093, "grad_norm": 0.0970272496342659, "learning_rate": 2.5425866571699904e-06, "loss": 0.0181, "step": 128430 }, { "epoch": 0.9494101298010112, "grad_norm": 0.07665450870990753, "learning_rate": 2.5388770180436847e-06, "loss": 0.0179, "step": 128440 }, { "epoch": 0.949484048372313, "grad_norm": 0.07551456242799759, "learning_rate": 2.5351673789173794e-06, "loss": 0.0178, "step": 128450 }, { "epoch": 0.9495579669436149, "grad_norm": 0.0852522924542427, "learning_rate": 2.5314577397910732e-06, "loss": 0.0189, "step": 128460 }, { "epoch": 0.9496318855149167, "grad_norm": 0.07986302673816681, "learning_rate": 2.5277481006647675e-06, "loss": 0.0162, "step": 128470 }, { "epoch": 0.9497058040862186, "grad_norm": 0.06319686025381088, "learning_rate": 2.5240384615384618e-06, "loss": 0.0168, "step": 128480 }, { "epoch": 0.9497797226575205, "grad_norm": 0.09003529697656631, "learning_rate": 2.520328822412156e-06, "loss": 0.0172, "step": 128490 }, { "epoch": 0.9498536412288223, "grad_norm": 0.08166031539440155, "learning_rate": 2.51661918328585e-06, "loss": 0.0157, "step": 128500 }, { "epoch": 0.9499275598001242, "grad_norm": 0.07876944541931152, "learning_rate": 2.512909544159544e-06, "loss": 0.0153, "step": 128510 }, { "epoch": 0.950001478371426, "grad_norm": 0.08147630095481873, "learning_rate": 2.5091999050332384e-06, "loss": 0.0162, "step": 128520 }, { "epoch": 0.9500753969427279, "grad_norm": 0.07436820864677429, "learning_rate": 2.5054902659069327e-06, "loss": 0.0158, "step": 128530 }, { "epoch": 0.9501493155140297, "grad_norm": 0.07813509553670883, "learning_rate": 2.501780626780627e-06, "loss": 0.0162, "step": 128540 }, { "epoch": 0.9502232340853316, "grad_norm": 0.07872185111045837, "learning_rate": 2.4980709876543213e-06, "loss": 0.0147, "step": 128550 }, { "epoch": 0.9502971526566335, "grad_norm": 0.09251312166452408, "learning_rate": 2.4943613485280155e-06, "loss": 0.0183, "step": 128560 }, { "epoch": 0.9503710712279353, "grad_norm": 0.08193115890026093, "learning_rate": 2.49065170940171e-06, "loss": 0.0152, "step": 128570 }, { "epoch": 0.9504449897992372, "grad_norm": 0.07224851846694946, "learning_rate": 2.4869420702754037e-06, "loss": 0.0176, "step": 128580 }, { "epoch": 0.950518908370539, "grad_norm": 0.06449359655380249, "learning_rate": 2.483232431149098e-06, "loss": 0.016, "step": 128590 }, { "epoch": 0.9505928269418409, "grad_norm": 0.06900657713413239, "learning_rate": 2.479522792022792e-06, "loss": 0.0182, "step": 128600 }, { "epoch": 0.9506667455131427, "grad_norm": 0.08904869109392166, "learning_rate": 2.475813152896486e-06, "loss": 0.0172, "step": 128610 }, { "epoch": 0.9507406640844446, "grad_norm": 0.07366520911455154, "learning_rate": 2.4721035137701803e-06, "loss": 0.0165, "step": 128620 }, { "epoch": 0.9508145826557465, "grad_norm": 0.06534479558467865, "learning_rate": 2.4683938746438746e-06, "loss": 0.0184, "step": 128630 }, { "epoch": 0.9508885012270483, "grad_norm": 0.08039779961109161, "learning_rate": 2.464684235517569e-06, "loss": 0.0154, "step": 128640 }, { "epoch": 0.9509624197983502, "grad_norm": 0.08867663890123367, "learning_rate": 2.460974596391263e-06, "loss": 0.0169, "step": 128650 }, { "epoch": 0.951036338369652, "grad_norm": 0.0995791032910347, "learning_rate": 2.4572649572649574e-06, "loss": 0.0157, "step": 128660 }, { "epoch": 0.9511102569409539, "grad_norm": 0.07323886454105377, "learning_rate": 2.4535553181386517e-06, "loss": 0.0161, "step": 128670 }, { "epoch": 0.9511841755122556, "grad_norm": 0.06839929521083832, "learning_rate": 2.449845679012346e-06, "loss": 0.0166, "step": 128680 }, { "epoch": 0.9512580940835575, "grad_norm": 0.06438833475112915, "learning_rate": 2.44613603988604e-06, "loss": 0.0145, "step": 128690 }, { "epoch": 0.9513320126548594, "grad_norm": 0.07878058403730392, "learning_rate": 2.442426400759734e-06, "loss": 0.0159, "step": 128700 }, { "epoch": 0.9514059312261612, "grad_norm": 0.06943828612565994, "learning_rate": 2.4387167616334284e-06, "loss": 0.0176, "step": 128710 }, { "epoch": 0.9514798497974631, "grad_norm": 0.08369556069374084, "learning_rate": 2.4350071225071226e-06, "loss": 0.0164, "step": 128720 }, { "epoch": 0.9515537683687649, "grad_norm": 0.10918588936328888, "learning_rate": 2.431297483380817e-06, "loss": 0.0171, "step": 128730 }, { "epoch": 0.9516276869400668, "grad_norm": 0.06301585584878922, "learning_rate": 2.427587844254511e-06, "loss": 0.0156, "step": 128740 }, { "epoch": 0.9517016055113687, "grad_norm": 0.08701598644256592, "learning_rate": 2.4238782051282054e-06, "loss": 0.0164, "step": 128750 }, { "epoch": 0.9517755240826705, "grad_norm": 0.09200041741132736, "learning_rate": 2.4201685660018997e-06, "loss": 0.0186, "step": 128760 }, { "epoch": 0.9518494426539724, "grad_norm": 0.07510281354188919, "learning_rate": 2.4164589268755936e-06, "loss": 0.018, "step": 128770 }, { "epoch": 0.9519233612252742, "grad_norm": 0.07271352410316467, "learning_rate": 2.412749287749288e-06, "loss": 0.013, "step": 128780 }, { "epoch": 0.9519972797965761, "grad_norm": 0.09494227916002274, "learning_rate": 2.409039648622982e-06, "loss": 0.0191, "step": 128790 }, { "epoch": 0.9520711983678779, "grad_norm": 0.07326217740774155, "learning_rate": 2.4053300094966764e-06, "loss": 0.0157, "step": 128800 }, { "epoch": 0.9521451169391798, "grad_norm": 0.08859758824110031, "learning_rate": 2.4016203703703702e-06, "loss": 0.0159, "step": 128810 }, { "epoch": 0.9522190355104817, "grad_norm": 0.06138451024889946, "learning_rate": 2.3979107312440645e-06, "loss": 0.0147, "step": 128820 }, { "epoch": 0.9522929540817835, "grad_norm": 0.09262833744287491, "learning_rate": 2.3942010921177588e-06, "loss": 0.0164, "step": 128830 }, { "epoch": 0.9523668726530854, "grad_norm": 0.06121642515063286, "learning_rate": 2.390491452991453e-06, "loss": 0.016, "step": 128840 }, { "epoch": 0.9524407912243872, "grad_norm": 0.055575739592313766, "learning_rate": 2.3867818138651473e-06, "loss": 0.0149, "step": 128850 }, { "epoch": 0.9525147097956891, "grad_norm": 0.06918497383594513, "learning_rate": 2.3830721747388416e-06, "loss": 0.0144, "step": 128860 }, { "epoch": 0.9525886283669909, "grad_norm": 0.12187926471233368, "learning_rate": 2.379362535612536e-06, "loss": 0.0206, "step": 128870 }, { "epoch": 0.9526625469382928, "grad_norm": 0.054340194910764694, "learning_rate": 2.3756528964862297e-06, "loss": 0.0168, "step": 128880 }, { "epoch": 0.9527364655095947, "grad_norm": 0.07291319966316223, "learning_rate": 2.371943257359924e-06, "loss": 0.0182, "step": 128890 }, { "epoch": 0.9528103840808965, "grad_norm": 0.05469479411840439, "learning_rate": 2.3682336182336183e-06, "loss": 0.0156, "step": 128900 }, { "epoch": 0.9528843026521984, "grad_norm": 0.07560256868600845, "learning_rate": 2.3645239791073125e-06, "loss": 0.0154, "step": 128910 }, { "epoch": 0.9529582212235002, "grad_norm": 0.08988160640001297, "learning_rate": 2.360814339981007e-06, "loss": 0.017, "step": 128920 }, { "epoch": 0.9530321397948021, "grad_norm": 0.08384236693382263, "learning_rate": 2.357104700854701e-06, "loss": 0.0198, "step": 128930 }, { "epoch": 0.9531060583661038, "grad_norm": 0.09252948313951492, "learning_rate": 2.3533950617283954e-06, "loss": 0.0167, "step": 128940 }, { "epoch": 0.9531799769374057, "grad_norm": 0.10098661482334137, "learning_rate": 2.3496854226020896e-06, "loss": 0.0164, "step": 128950 }, { "epoch": 0.9532538955087076, "grad_norm": 0.07959005981683731, "learning_rate": 2.3459757834757835e-06, "loss": 0.0164, "step": 128960 }, { "epoch": 0.9533278140800094, "grad_norm": 0.07650546729564667, "learning_rate": 2.3422661443494778e-06, "loss": 0.0177, "step": 128970 }, { "epoch": 0.9534017326513113, "grad_norm": 0.07977327704429626, "learning_rate": 2.338556505223172e-06, "loss": 0.0167, "step": 128980 }, { "epoch": 0.9534756512226131, "grad_norm": 0.09248465299606323, "learning_rate": 2.3348468660968663e-06, "loss": 0.019, "step": 128990 }, { "epoch": 0.953549569793915, "grad_norm": 0.06973160803318024, "learning_rate": 2.33113722697056e-06, "loss": 0.0169, "step": 129000 }, { "epoch": 0.9536234883652169, "grad_norm": 0.0796571671962738, "learning_rate": 2.3274275878442544e-06, "loss": 0.015, "step": 129010 }, { "epoch": 0.9536974069365187, "grad_norm": 0.07682634890079498, "learning_rate": 2.3237179487179487e-06, "loss": 0.0152, "step": 129020 }, { "epoch": 0.9537713255078206, "grad_norm": 0.06715840101242065, "learning_rate": 2.3200083095916434e-06, "loss": 0.0136, "step": 129030 }, { "epoch": 0.9538452440791224, "grad_norm": 0.06069952994585037, "learning_rate": 2.3162986704653372e-06, "loss": 0.017, "step": 129040 }, { "epoch": 0.9539191626504243, "grad_norm": 0.08695065975189209, "learning_rate": 2.3125890313390315e-06, "loss": 0.0189, "step": 129050 }, { "epoch": 0.9539930812217261, "grad_norm": 0.08549745380878448, "learning_rate": 2.308879392212726e-06, "loss": 0.0178, "step": 129060 }, { "epoch": 0.954066999793028, "grad_norm": 0.0622650645673275, "learning_rate": 2.3051697530864196e-06, "loss": 0.0157, "step": 129070 }, { "epoch": 0.9541409183643299, "grad_norm": 0.057839758694171906, "learning_rate": 2.301460113960114e-06, "loss": 0.0168, "step": 129080 }, { "epoch": 0.9542148369356317, "grad_norm": 0.10336287319660187, "learning_rate": 2.297750474833808e-06, "loss": 0.0202, "step": 129090 }, { "epoch": 0.9542887555069336, "grad_norm": 0.10132849961519241, "learning_rate": 2.2940408357075025e-06, "loss": 0.019, "step": 129100 }, { "epoch": 0.9543626740782354, "grad_norm": 0.0783289447426796, "learning_rate": 2.2903311965811967e-06, "loss": 0.0167, "step": 129110 }, { "epoch": 0.9544365926495373, "grad_norm": 0.07380151748657227, "learning_rate": 2.286621557454891e-06, "loss": 0.0179, "step": 129120 }, { "epoch": 0.9545105112208391, "grad_norm": 0.06636784225702286, "learning_rate": 2.2829119183285853e-06, "loss": 0.0182, "step": 129130 }, { "epoch": 0.954584429792141, "grad_norm": 0.07031899690628052, "learning_rate": 2.2792022792022796e-06, "loss": 0.018, "step": 129140 }, { "epoch": 0.9546583483634429, "grad_norm": 0.08217660337686539, "learning_rate": 2.2754926400759734e-06, "loss": 0.0181, "step": 129150 }, { "epoch": 0.9547322669347447, "grad_norm": 0.08827585726976395, "learning_rate": 2.2717830009496677e-06, "loss": 0.0161, "step": 129160 }, { "epoch": 0.9548061855060466, "grad_norm": 0.07113460451364517, "learning_rate": 2.268073361823362e-06, "loss": 0.0165, "step": 129170 }, { "epoch": 0.9548801040773484, "grad_norm": 0.09373348206281662, "learning_rate": 2.2643637226970562e-06, "loss": 0.0162, "step": 129180 }, { "epoch": 0.9549540226486503, "grad_norm": 0.07932069897651672, "learning_rate": 2.26065408357075e-06, "loss": 0.0176, "step": 129190 }, { "epoch": 0.955027941219952, "grad_norm": 0.06112619861960411, "learning_rate": 2.2569444444444443e-06, "loss": 0.0152, "step": 129200 }, { "epoch": 0.955101859791254, "grad_norm": 0.07475372403860092, "learning_rate": 2.2532348053181386e-06, "loss": 0.0149, "step": 129210 }, { "epoch": 0.9551757783625558, "grad_norm": 0.09839701652526855, "learning_rate": 2.2495251661918333e-06, "loss": 0.0179, "step": 129220 }, { "epoch": 0.9552496969338576, "grad_norm": 0.08877590298652649, "learning_rate": 2.245815527065527e-06, "loss": 0.0172, "step": 129230 }, { "epoch": 0.9553236155051595, "grad_norm": 0.09224338829517365, "learning_rate": 2.2421058879392214e-06, "loss": 0.0176, "step": 129240 }, { "epoch": 0.9553975340764613, "grad_norm": 0.07588019222021103, "learning_rate": 2.2383962488129157e-06, "loss": 0.0174, "step": 129250 }, { "epoch": 0.9554714526477632, "grad_norm": 0.06307175010442734, "learning_rate": 2.23468660968661e-06, "loss": 0.0171, "step": 129260 }, { "epoch": 0.9555453712190651, "grad_norm": 0.1095893457531929, "learning_rate": 2.230976970560304e-06, "loss": 0.0171, "step": 129270 }, { "epoch": 0.9556192897903669, "grad_norm": 0.07829178124666214, "learning_rate": 2.227267331433998e-06, "loss": 0.016, "step": 129280 }, { "epoch": 0.9556932083616688, "grad_norm": 0.09236520528793335, "learning_rate": 2.2235576923076924e-06, "loss": 0.0157, "step": 129290 }, { "epoch": 0.9557671269329706, "grad_norm": 0.09746488928794861, "learning_rate": 2.2198480531813866e-06, "loss": 0.0202, "step": 129300 }, { "epoch": 0.9558410455042725, "grad_norm": 0.09018874168395996, "learning_rate": 2.216138414055081e-06, "loss": 0.0164, "step": 129310 }, { "epoch": 0.9559149640755743, "grad_norm": 0.06953372806310654, "learning_rate": 2.212428774928775e-06, "loss": 0.0175, "step": 129320 }, { "epoch": 0.9559888826468762, "grad_norm": 0.08356118947267532, "learning_rate": 2.2087191358024695e-06, "loss": 0.0177, "step": 129330 }, { "epoch": 0.9560628012181781, "grad_norm": 0.06745917350053787, "learning_rate": 2.2050094966761633e-06, "loss": 0.0139, "step": 129340 }, { "epoch": 0.9561367197894799, "grad_norm": 0.06323149800300598, "learning_rate": 2.2012998575498576e-06, "loss": 0.0168, "step": 129350 }, { "epoch": 0.9562106383607818, "grad_norm": 0.06741084158420563, "learning_rate": 2.197590218423552e-06, "loss": 0.0168, "step": 129360 }, { "epoch": 0.9562845569320836, "grad_norm": 0.06664817035198212, "learning_rate": 2.193880579297246e-06, "loss": 0.0184, "step": 129370 }, { "epoch": 0.9563584755033855, "grad_norm": 0.10696565359830856, "learning_rate": 2.19017094017094e-06, "loss": 0.0192, "step": 129380 }, { "epoch": 0.9564323940746873, "grad_norm": 0.09820018708705902, "learning_rate": 2.1864613010446343e-06, "loss": 0.017, "step": 129390 }, { "epoch": 0.9565063126459892, "grad_norm": 0.07728125154972076, "learning_rate": 2.1827516619183285e-06, "loss": 0.0156, "step": 129400 }, { "epoch": 0.9565802312172911, "grad_norm": 0.046400558203458786, "learning_rate": 2.1790420227920232e-06, "loss": 0.0156, "step": 129410 }, { "epoch": 0.9566541497885929, "grad_norm": 0.0884847566485405, "learning_rate": 2.175332383665717e-06, "loss": 0.0162, "step": 129420 }, { "epoch": 0.9567280683598948, "grad_norm": 0.11267104744911194, "learning_rate": 2.1716227445394113e-06, "loss": 0.0196, "step": 129430 }, { "epoch": 0.9568019869311966, "grad_norm": 0.0674358531832695, "learning_rate": 2.1679131054131056e-06, "loss": 0.0162, "step": 129440 }, { "epoch": 0.9568759055024985, "grad_norm": 0.041945770382881165, "learning_rate": 2.1642034662868e-06, "loss": 0.0159, "step": 129450 }, { "epoch": 0.9569498240738002, "grad_norm": 0.10006273537874222, "learning_rate": 2.1604938271604937e-06, "loss": 0.0153, "step": 129460 }, { "epoch": 0.9570237426451021, "grad_norm": 0.06909745931625366, "learning_rate": 2.156784188034188e-06, "loss": 0.0172, "step": 129470 }, { "epoch": 0.957097661216404, "grad_norm": 0.08743210136890411, "learning_rate": 2.1530745489078823e-06, "loss": 0.014, "step": 129480 }, { "epoch": 0.9571715797877058, "grad_norm": 0.10677314549684525, "learning_rate": 2.1493649097815766e-06, "loss": 0.0165, "step": 129490 }, { "epoch": 0.9572454983590077, "grad_norm": 0.0807448998093605, "learning_rate": 2.145655270655271e-06, "loss": 0.017, "step": 129500 }, { "epoch": 0.9573194169303095, "grad_norm": 0.08446327596902847, "learning_rate": 2.141945631528965e-06, "loss": 0.0156, "step": 129510 }, { "epoch": 0.9573933355016114, "grad_norm": 0.08753930777311325, "learning_rate": 2.1382359924026594e-06, "loss": 0.0165, "step": 129520 }, { "epoch": 0.9574672540729133, "grad_norm": 0.061072368174791336, "learning_rate": 2.1345263532763537e-06, "loss": 0.0149, "step": 129530 }, { "epoch": 0.9575411726442151, "grad_norm": 0.06465945392847061, "learning_rate": 2.1308167141500475e-06, "loss": 0.0168, "step": 129540 }, { "epoch": 0.957615091215517, "grad_norm": 0.10568177700042725, "learning_rate": 2.1271070750237418e-06, "loss": 0.0168, "step": 129550 }, { "epoch": 0.9576890097868188, "grad_norm": 0.05580895394086838, "learning_rate": 2.123397435897436e-06, "loss": 0.0152, "step": 129560 }, { "epoch": 0.9577629283581207, "grad_norm": 0.07329615205526352, "learning_rate": 2.11968779677113e-06, "loss": 0.015, "step": 129570 }, { "epoch": 0.9578368469294225, "grad_norm": 0.0981418713927269, "learning_rate": 2.115978157644824e-06, "loss": 0.0178, "step": 129580 }, { "epoch": 0.9579107655007244, "grad_norm": 0.06811246275901794, "learning_rate": 2.1122685185185184e-06, "loss": 0.0156, "step": 129590 }, { "epoch": 0.9579846840720263, "grad_norm": 0.08827519416809082, "learning_rate": 2.108558879392213e-06, "loss": 0.0151, "step": 129600 }, { "epoch": 0.9580586026433281, "grad_norm": 0.06045457348227501, "learning_rate": 2.104849240265907e-06, "loss": 0.0192, "step": 129610 }, { "epoch": 0.95813252121463, "grad_norm": 0.07158850133419037, "learning_rate": 2.1011396011396013e-06, "loss": 0.0175, "step": 129620 }, { "epoch": 0.9582064397859318, "grad_norm": 0.11043514311313629, "learning_rate": 2.0974299620132955e-06, "loss": 0.0176, "step": 129630 }, { "epoch": 0.9582803583572337, "grad_norm": 0.07115330547094345, "learning_rate": 2.09372032288699e-06, "loss": 0.0171, "step": 129640 }, { "epoch": 0.9583542769285355, "grad_norm": 0.07935353368520737, "learning_rate": 2.0900106837606837e-06, "loss": 0.0169, "step": 129650 }, { "epoch": 0.9584281954998374, "grad_norm": 0.08272580802440643, "learning_rate": 2.086301044634378e-06, "loss": 0.0187, "step": 129660 }, { "epoch": 0.9585021140711393, "grad_norm": 0.0728270560503006, "learning_rate": 2.082591405508072e-06, "loss": 0.0185, "step": 129670 }, { "epoch": 0.9585760326424411, "grad_norm": 0.09612071514129639, "learning_rate": 2.0788817663817665e-06, "loss": 0.0191, "step": 129680 }, { "epoch": 0.958649951213743, "grad_norm": 0.08082949370145798, "learning_rate": 2.0751721272554607e-06, "loss": 0.0177, "step": 129690 }, { "epoch": 0.9587238697850448, "grad_norm": 0.09920556843280792, "learning_rate": 2.071462488129155e-06, "loss": 0.0161, "step": 129700 }, { "epoch": 0.9587977883563467, "grad_norm": 0.056050848215818405, "learning_rate": 2.0677528490028493e-06, "loss": 0.0175, "step": 129710 }, { "epoch": 0.9588717069276484, "grad_norm": 0.09262334555387497, "learning_rate": 2.0640432098765436e-06, "loss": 0.0189, "step": 129720 }, { "epoch": 0.9589456254989503, "grad_norm": 0.0797777995467186, "learning_rate": 2.0603335707502374e-06, "loss": 0.0151, "step": 129730 }, { "epoch": 0.9590195440702523, "grad_norm": 0.07810277491807938, "learning_rate": 2.0566239316239317e-06, "loss": 0.0157, "step": 129740 }, { "epoch": 0.959093462641554, "grad_norm": 0.088081493973732, "learning_rate": 2.052914292497626e-06, "loss": 0.0176, "step": 129750 }, { "epoch": 0.9591673812128559, "grad_norm": 0.07194243371486664, "learning_rate": 2.0492046533713202e-06, "loss": 0.0153, "step": 129760 }, { "epoch": 0.9592412997841577, "grad_norm": 0.05726609006524086, "learning_rate": 2.045495014245014e-06, "loss": 0.0152, "step": 129770 }, { "epoch": 0.9593152183554596, "grad_norm": 0.08413214981555939, "learning_rate": 2.0417853751187084e-06, "loss": 0.0187, "step": 129780 }, { "epoch": 0.9593891369267615, "grad_norm": 0.07600409537553787, "learning_rate": 2.0380757359924026e-06, "loss": 0.0155, "step": 129790 }, { "epoch": 0.9594630554980633, "grad_norm": 0.11079154163599014, "learning_rate": 2.034366096866097e-06, "loss": 0.0184, "step": 129800 }, { "epoch": 0.9595369740693652, "grad_norm": 0.07840550690889359, "learning_rate": 2.030656457739791e-06, "loss": 0.0155, "step": 129810 }, { "epoch": 0.959610892640667, "grad_norm": 0.07995374500751495, "learning_rate": 2.0269468186134854e-06, "loss": 0.017, "step": 129820 }, { "epoch": 0.9596848112119689, "grad_norm": 0.06168173998594284, "learning_rate": 2.0232371794871797e-06, "loss": 0.0154, "step": 129830 }, { "epoch": 0.9597587297832707, "grad_norm": 0.057839542627334595, "learning_rate": 2.0195275403608736e-06, "loss": 0.0152, "step": 129840 }, { "epoch": 0.9598326483545726, "grad_norm": 0.0621907114982605, "learning_rate": 2.015817901234568e-06, "loss": 0.017, "step": 129850 }, { "epoch": 0.9599065669258745, "grad_norm": 0.07333408296108246, "learning_rate": 2.012108262108262e-06, "loss": 0.0181, "step": 129860 }, { "epoch": 0.9599804854971763, "grad_norm": 0.09220781177282333, "learning_rate": 2.0083986229819564e-06, "loss": 0.0194, "step": 129870 }, { "epoch": 0.9600544040684782, "grad_norm": 0.07996411621570587, "learning_rate": 2.0046889838556507e-06, "loss": 0.0159, "step": 129880 }, { "epoch": 0.96012832263978, "grad_norm": 0.07438327372074127, "learning_rate": 2.000979344729345e-06, "loss": 0.0175, "step": 129890 }, { "epoch": 0.9602022412110819, "grad_norm": 0.06726469844579697, "learning_rate": 1.997269705603039e-06, "loss": 0.0177, "step": 129900 }, { "epoch": 0.9602761597823837, "grad_norm": 0.14511814713478088, "learning_rate": 1.9935600664767335e-06, "loss": 0.0167, "step": 129910 }, { "epoch": 0.9603500783536856, "grad_norm": 0.07616829872131348, "learning_rate": 1.9898504273504273e-06, "loss": 0.0181, "step": 129920 }, { "epoch": 0.9604239969249875, "grad_norm": 0.07339037954807281, "learning_rate": 1.9861407882241216e-06, "loss": 0.0173, "step": 129930 }, { "epoch": 0.9604979154962893, "grad_norm": 0.07685656100511551, "learning_rate": 1.982431149097816e-06, "loss": 0.0177, "step": 129940 }, { "epoch": 0.9605718340675912, "grad_norm": 0.06435885280370712, "learning_rate": 1.97872150997151e-06, "loss": 0.019, "step": 129950 }, { "epoch": 0.960645752638893, "grad_norm": 0.07235367596149445, "learning_rate": 1.975011870845204e-06, "loss": 0.0158, "step": 129960 }, { "epoch": 0.9607196712101949, "grad_norm": 0.10989423841238022, "learning_rate": 1.9713022317188983e-06, "loss": 0.0161, "step": 129970 }, { "epoch": 0.9607935897814966, "grad_norm": 0.08029764890670776, "learning_rate": 1.9675925925925925e-06, "loss": 0.0154, "step": 129980 }, { "epoch": 0.9608675083527985, "grad_norm": 0.06860236823558807, "learning_rate": 1.9638829534662872e-06, "loss": 0.0179, "step": 129990 }, { "epoch": 0.9609414269241005, "grad_norm": 0.07281980663537979, "learning_rate": 1.960173314339981e-06, "loss": 0.0179, "step": 130000 }, { "epoch": 0.9609414269241005, "eval_f1": 0.63648960197002, "eval_loss": 0.01656239666044712, "eval_precision": 0.5083805166236832, "eval_recall": 0.8509156514550863, "eval_runtime": 2907.7863, "eval_samples_per_second": 186.098, "eval_steps_per_second": 2.908, "step": 130000 } ], "logging_steps": 10, "max_steps": 135284, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.7620045586432e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }