{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 42462, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00023550468654326222, "grad_norm": 3.2309679985046387, "learning_rate": 0.00019995760915642223, "loss": 3.1368, "step": 10 }, { "epoch": 0.00047100937308652445, "grad_norm": 2.1174814701080322, "learning_rate": 0.00019991050821911358, "loss": 2.6966, "step": 20 }, { "epoch": 0.0007065140596297867, "grad_norm": 2.325835704803467, "learning_rate": 0.00019986340728180493, "loss": 2.4996, "step": 30 }, { "epoch": 0.0009420187461730489, "grad_norm": 1.9865907430648804, "learning_rate": 0.00019981630634449625, "loss": 2.7315, "step": 40 }, { "epoch": 0.0011775234327163111, "grad_norm": 1.900598406791687, "learning_rate": 0.00019976920540718763, "loss": 2.6127, "step": 50 }, { "epoch": 0.0014130281192595733, "grad_norm": 1.5695827007293701, "learning_rate": 0.00019972210446987895, "loss": 2.5491, "step": 60 }, { "epoch": 0.0016485328058028356, "grad_norm": 2.3850433826446533, "learning_rate": 0.0001996750035325703, "loss": 2.5412, "step": 70 }, { "epoch": 0.0018840374923460978, "grad_norm": 2.0210142135620117, "learning_rate": 0.00019962790259526165, "loss": 2.485, "step": 80 }, { "epoch": 0.00211954217888936, "grad_norm": 1.617475152015686, "learning_rate": 0.000199580801657953, "loss": 2.5976, "step": 90 }, { "epoch": 0.0023550468654326222, "grad_norm": 1.6759183406829834, "learning_rate": 0.00019953370072064435, "loss": 2.3563, "step": 100 }, { "epoch": 0.0025905515519758842, "grad_norm": 1.495890498161316, "learning_rate": 0.0001994865997833357, "loss": 2.6193, "step": 110 }, { "epoch": 0.0028260562385191467, "grad_norm": 2.131957769393921, "learning_rate": 0.00019943949884602705, "loss": 2.5216, "step": 120 }, { "epoch": 0.0030615609250624087, "grad_norm": 1.818166732788086, "learning_rate": 0.0001993923979087184, "loss": 2.4632, "step": 130 }, { "epoch": 0.003297065611605671, "grad_norm": 1.854960560798645, "learning_rate": 0.00019934529697140973, "loss": 2.4897, "step": 140 }, { "epoch": 0.003532570298148933, "grad_norm": 1.624773383140564, "learning_rate": 0.0001992981960341011, "loss": 2.614, "step": 150 }, { "epoch": 0.0037680749846921956, "grad_norm": 1.9114423990249634, "learning_rate": 0.00019925109509679243, "loss": 2.1351, "step": 160 }, { "epoch": 0.004003579671235458, "grad_norm": 1.608393669128418, "learning_rate": 0.00019920399415948378, "loss": 2.5292, "step": 170 }, { "epoch": 0.00423908435777872, "grad_norm": 1.6805859804153442, "learning_rate": 0.00019915689322217513, "loss": 2.5797, "step": 180 }, { "epoch": 0.004474589044321982, "grad_norm": 1.54912269115448, "learning_rate": 0.00019910979228486648, "loss": 2.6075, "step": 190 }, { "epoch": 0.0047100937308652445, "grad_norm": 1.4087010622024536, "learning_rate": 0.00019906269134755783, "loss": 2.3585, "step": 200 }, { "epoch": 0.004945598417408506, "grad_norm": 1.7299461364746094, "learning_rate": 0.00019901559041024918, "loss": 2.3914, "step": 210 }, { "epoch": 0.0051811031039517685, "grad_norm": 1.4757139682769775, "learning_rate": 0.0001989684894729405, "loss": 2.3994, "step": 220 }, { "epoch": 0.005416607790495031, "grad_norm": 1.5432946681976318, "learning_rate": 0.00019892138853563188, "loss": 2.5407, "step": 230 }, { "epoch": 0.005652112477038293, "grad_norm": 1.5481261014938354, "learning_rate": 0.0001988742875983232, "loss": 2.4017, "step": 240 }, { "epoch": 0.005887617163581555, "grad_norm": 1.5139665603637695, "learning_rate": 0.00019882718666101455, "loss": 2.2548, "step": 250 }, { "epoch": 0.006123121850124817, "grad_norm": 1.4921780824661255, "learning_rate": 0.00019878008572370593, "loss": 2.293, "step": 260 }, { "epoch": 0.00635862653666808, "grad_norm": 1.9517822265625, "learning_rate": 0.00019873298478639725, "loss": 2.3931, "step": 270 }, { "epoch": 0.006594131223211342, "grad_norm": 1.6674144268035889, "learning_rate": 0.00019868588384908863, "loss": 2.6797, "step": 280 }, { "epoch": 0.006829635909754604, "grad_norm": 1.8776614665985107, "learning_rate": 0.00019863878291177995, "loss": 2.3755, "step": 290 }, { "epoch": 0.007065140596297866, "grad_norm": 1.704877257347107, "learning_rate": 0.0001985916819744713, "loss": 2.577, "step": 300 }, { "epoch": 0.007300645282841129, "grad_norm": 1.7447631359100342, "learning_rate": 0.00019854458103716265, "loss": 2.6707, "step": 310 }, { "epoch": 0.007536149969384391, "grad_norm": 1.7486447095870972, "learning_rate": 0.000198497480099854, "loss": 2.4179, "step": 320 }, { "epoch": 0.007771654655927653, "grad_norm": 1.5924261808395386, "learning_rate": 0.00019845037916254535, "loss": 2.3293, "step": 330 }, { "epoch": 0.008007159342470916, "grad_norm": 2.1535685062408447, "learning_rate": 0.0001984032782252367, "loss": 2.4561, "step": 340 }, { "epoch": 0.008242664029014177, "grad_norm": 1.8906278610229492, "learning_rate": 0.00019835617728792803, "loss": 2.5716, "step": 350 }, { "epoch": 0.00847816871555744, "grad_norm": 2.237260580062866, "learning_rate": 0.0001983090763506194, "loss": 2.4573, "step": 360 }, { "epoch": 0.008713673402100702, "grad_norm": 1.506896734237671, "learning_rate": 0.00019826197541331073, "loss": 2.4239, "step": 370 }, { "epoch": 0.008949178088643964, "grad_norm": 1.8521124124526978, "learning_rate": 0.00019821487447600208, "loss": 2.518, "step": 380 }, { "epoch": 0.009184682775187226, "grad_norm": 1.684022307395935, "learning_rate": 0.00019816777353869343, "loss": 2.2849, "step": 390 }, { "epoch": 0.009420187461730489, "grad_norm": 1.6935802698135376, "learning_rate": 0.00019812067260138478, "loss": 2.5127, "step": 400 }, { "epoch": 0.009655692148273751, "grad_norm": 2.0532476902008057, "learning_rate": 0.00019807357166407613, "loss": 2.2883, "step": 410 }, { "epoch": 0.009891196834817012, "grad_norm": 1.579057216644287, "learning_rate": 0.00019802647072676748, "loss": 2.3682, "step": 420 }, { "epoch": 0.010126701521360275, "grad_norm": 1.471557855606079, "learning_rate": 0.0001979793697894588, "loss": 2.4559, "step": 430 }, { "epoch": 0.010362206207903537, "grad_norm": 1.4747744798660278, "learning_rate": 0.00019793226885215018, "loss": 2.3266, "step": 440 }, { "epoch": 0.0105977108944468, "grad_norm": 1.7954132556915283, "learning_rate": 0.0001978851679148415, "loss": 2.4466, "step": 450 }, { "epoch": 0.010833215580990062, "grad_norm": 1.8787152767181396, "learning_rate": 0.00019783806697753285, "loss": 2.2744, "step": 460 }, { "epoch": 0.011068720267533324, "grad_norm": 2.769911527633667, "learning_rate": 0.0001977909660402242, "loss": 2.4138, "step": 470 }, { "epoch": 0.011304224954076587, "grad_norm": 1.7480900287628174, "learning_rate": 0.00019774386510291555, "loss": 2.3967, "step": 480 }, { "epoch": 0.01153972964061985, "grad_norm": 1.5646064281463623, "learning_rate": 0.0001976967641656069, "loss": 2.3035, "step": 490 }, { "epoch": 0.01177523432716311, "grad_norm": 2.1122241020202637, "learning_rate": 0.00019764966322829825, "loss": 2.4634, "step": 500 }, { "epoch": 0.012010739013706372, "grad_norm": 1.5615967512130737, "learning_rate": 0.00019760256229098958, "loss": 2.4727, "step": 510 }, { "epoch": 0.012246243700249635, "grad_norm": 1.6492537260055542, "learning_rate": 0.00019755546135368095, "loss": 2.309, "step": 520 }, { "epoch": 0.012481748386792897, "grad_norm": 1.8484909534454346, "learning_rate": 0.00019750836041637228, "loss": 2.5138, "step": 530 }, { "epoch": 0.01271725307333616, "grad_norm": 1.2929155826568604, "learning_rate": 0.00019746125947906365, "loss": 2.1399, "step": 540 }, { "epoch": 0.012952757759879422, "grad_norm": 2.6417770385742188, "learning_rate": 0.000197414158541755, "loss": 2.5062, "step": 550 }, { "epoch": 0.013188262446422684, "grad_norm": 1.4375731945037842, "learning_rate": 0.00019736705760444633, "loss": 2.218, "step": 560 }, { "epoch": 0.013423767132965945, "grad_norm": 1.6705974340438843, "learning_rate": 0.0001973199566671377, "loss": 2.1678, "step": 570 }, { "epoch": 0.013659271819509208, "grad_norm": 1.7592695951461792, "learning_rate": 0.00019727285572982903, "loss": 2.1822, "step": 580 }, { "epoch": 0.01389477650605247, "grad_norm": 1.7886682748794556, "learning_rate": 0.00019722575479252038, "loss": 2.3795, "step": 590 }, { "epoch": 0.014130281192595733, "grad_norm": 1.9300228357315063, "learning_rate": 0.00019717865385521173, "loss": 2.3488, "step": 600 }, { "epoch": 0.014365785879138995, "grad_norm": 1.589822769165039, "learning_rate": 0.00019713155291790308, "loss": 2.4307, "step": 610 }, { "epoch": 0.014601290565682257, "grad_norm": 1.503128170967102, "learning_rate": 0.00019708445198059443, "loss": 2.277, "step": 620 }, { "epoch": 0.01483679525222552, "grad_norm": 1.7925318479537964, "learning_rate": 0.00019703735104328578, "loss": 2.3734, "step": 630 }, { "epoch": 0.015072299938768782, "grad_norm": 1.6709870100021362, "learning_rate": 0.0001969902501059771, "loss": 2.1709, "step": 640 }, { "epoch": 0.015307804625312043, "grad_norm": 1.6741284132003784, "learning_rate": 0.00019694314916866848, "loss": 2.2209, "step": 650 }, { "epoch": 0.015543309311855305, "grad_norm": 1.6825313568115234, "learning_rate": 0.0001968960482313598, "loss": 2.255, "step": 660 }, { "epoch": 0.015778813998398568, "grad_norm": 1.544534683227539, "learning_rate": 0.00019684894729405115, "loss": 2.3872, "step": 670 }, { "epoch": 0.016014318684941832, "grad_norm": 2.33375883102417, "learning_rate": 0.0001968018463567425, "loss": 2.4629, "step": 680 }, { "epoch": 0.016249823371485093, "grad_norm": 1.4496179819107056, "learning_rate": 0.00019675474541943385, "loss": 2.3638, "step": 690 }, { "epoch": 0.016485328058028353, "grad_norm": 1.7347285747528076, "learning_rate": 0.0001967076444821252, "loss": 2.4611, "step": 700 }, { "epoch": 0.016720832744571618, "grad_norm": 1.5036282539367676, "learning_rate": 0.00019666054354481655, "loss": 2.3117, "step": 710 }, { "epoch": 0.01695633743111488, "grad_norm": 2.0738086700439453, "learning_rate": 0.0001966134426075079, "loss": 2.3374, "step": 720 }, { "epoch": 0.017191842117658142, "grad_norm": 2.5572900772094727, "learning_rate": 0.00019656634167019926, "loss": 2.3246, "step": 730 }, { "epoch": 0.017427346804201403, "grad_norm": 1.6567786931991577, "learning_rate": 0.00019651924073289058, "loss": 2.4771, "step": 740 }, { "epoch": 0.017662851490744667, "grad_norm": 2.6085476875305176, "learning_rate": 0.00019647213979558196, "loss": 2.3132, "step": 750 }, { "epoch": 0.017898356177287928, "grad_norm": 1.939334511756897, "learning_rate": 0.00019642503885827328, "loss": 2.4239, "step": 760 }, { "epoch": 0.01813386086383119, "grad_norm": 1.840398907661438, "learning_rate": 0.00019637793792096463, "loss": 2.3915, "step": 770 }, { "epoch": 0.018369365550374453, "grad_norm": 1.8545604944229126, "learning_rate": 0.00019633083698365598, "loss": 2.3342, "step": 780 }, { "epoch": 0.018604870236917714, "grad_norm": 1.8592214584350586, "learning_rate": 0.00019628373604634733, "loss": 2.4354, "step": 790 }, { "epoch": 0.018840374923460978, "grad_norm": 1.6031461954116821, "learning_rate": 0.00019623663510903868, "loss": 2.3723, "step": 800 }, { "epoch": 0.01907587961000424, "grad_norm": 2.243943214416504, "learning_rate": 0.00019618953417173003, "loss": 2.5432, "step": 810 }, { "epoch": 0.019311384296547503, "grad_norm": 1.997883677482605, "learning_rate": 0.00019614243323442135, "loss": 2.2411, "step": 820 }, { "epoch": 0.019546888983090763, "grad_norm": 1.7321577072143555, "learning_rate": 0.00019609533229711273, "loss": 2.2699, "step": 830 }, { "epoch": 0.019782393669634024, "grad_norm": 1.544219732284546, "learning_rate": 0.00019604823135980408, "loss": 2.265, "step": 840 }, { "epoch": 0.02001789835617729, "grad_norm": 1.6504725217819214, "learning_rate": 0.0001960011304224954, "loss": 2.4386, "step": 850 }, { "epoch": 0.02025340304272055, "grad_norm": 2.053715705871582, "learning_rate": 0.00019595402948518678, "loss": 2.2794, "step": 860 }, { "epoch": 0.020488907729263813, "grad_norm": 1.469041347503662, "learning_rate": 0.0001959069285478781, "loss": 2.4016, "step": 870 }, { "epoch": 0.020724412415807074, "grad_norm": 1.7059731483459473, "learning_rate": 0.00019585982761056948, "loss": 2.3508, "step": 880 }, { "epoch": 0.020959917102350338, "grad_norm": 1.9896105527877808, "learning_rate": 0.0001958127266732608, "loss": 2.3903, "step": 890 }, { "epoch": 0.0211954217888936, "grad_norm": 2.041901111602783, "learning_rate": 0.00019576562573595216, "loss": 2.4199, "step": 900 }, { "epoch": 0.02143092647543686, "grad_norm": 1.4538580179214478, "learning_rate": 0.0001957185247986435, "loss": 2.2481, "step": 910 }, { "epoch": 0.021666431161980124, "grad_norm": 2.297203302383423, "learning_rate": 0.00019567142386133486, "loss": 2.3375, "step": 920 }, { "epoch": 0.021901935848523384, "grad_norm": 2.172999143600464, "learning_rate": 0.0001956243229240262, "loss": 2.223, "step": 930 }, { "epoch": 0.02213744053506665, "grad_norm": 1.5507631301879883, "learning_rate": 0.00019557722198671756, "loss": 2.2435, "step": 940 }, { "epoch": 0.02237294522160991, "grad_norm": 2.088479518890381, "learning_rate": 0.00019553012104940888, "loss": 2.1769, "step": 950 }, { "epoch": 0.022608449908153173, "grad_norm": 1.3809598684310913, "learning_rate": 0.00019548302011210026, "loss": 2.3431, "step": 960 }, { "epoch": 0.022843954594696434, "grad_norm": 1.5656312704086304, "learning_rate": 0.00019543591917479158, "loss": 2.2484, "step": 970 }, { "epoch": 0.0230794592812397, "grad_norm": 1.9035502672195435, "learning_rate": 0.00019538881823748293, "loss": 2.1907, "step": 980 }, { "epoch": 0.02331496396778296, "grad_norm": 2.3373143672943115, "learning_rate": 0.00019534171730017428, "loss": 2.2871, "step": 990 }, { "epoch": 0.02355046865432622, "grad_norm": 1.9408587217330933, "learning_rate": 0.00019529461636286563, "loss": 1.9912, "step": 1000 }, { "epoch": 0.023785973340869484, "grad_norm": 2.7549495697021484, "learning_rate": 0.00019524751542555698, "loss": 2.4807, "step": 1010 }, { "epoch": 0.024021478027412745, "grad_norm": 1.6193737983703613, "learning_rate": 0.00019520041448824833, "loss": 2.2551, "step": 1020 }, { "epoch": 0.02425698271395601, "grad_norm": 2.092519760131836, "learning_rate": 0.00019515331355093965, "loss": 2.0965, "step": 1030 }, { "epoch": 0.02449248740049927, "grad_norm": 2.3660218715667725, "learning_rate": 0.00019510621261363103, "loss": 2.3436, "step": 1040 }, { "epoch": 0.024727992087042534, "grad_norm": 2.076950788497925, "learning_rate": 0.00019505911167632235, "loss": 2.0685, "step": 1050 }, { "epoch": 0.024963496773585794, "grad_norm": 1.881016731262207, "learning_rate": 0.0001950120107390137, "loss": 2.524, "step": 1060 }, { "epoch": 0.025199001460129055, "grad_norm": 1.6038298606872559, "learning_rate": 0.00019496490980170506, "loss": 2.3772, "step": 1070 }, { "epoch": 0.02543450614667232, "grad_norm": 1.5274938344955444, "learning_rate": 0.0001949178088643964, "loss": 2.3456, "step": 1080 }, { "epoch": 0.02567001083321558, "grad_norm": 1.5426788330078125, "learning_rate": 0.00019487070792708776, "loss": 2.4562, "step": 1090 }, { "epoch": 0.025905515519758844, "grad_norm": 1.9040606021881104, "learning_rate": 0.0001948236069897791, "loss": 2.2722, "step": 1100 }, { "epoch": 0.026141020206302105, "grad_norm": 2.3787999153137207, "learning_rate": 0.00019477650605247046, "loss": 2.4295, "step": 1110 }, { "epoch": 0.02637652489284537, "grad_norm": 1.6062979698181152, "learning_rate": 0.0001947294051151618, "loss": 2.3269, "step": 1120 }, { "epoch": 0.02661202957938863, "grad_norm": 1.6376535892486572, "learning_rate": 0.00019468230417785316, "loss": 2.3202, "step": 1130 }, { "epoch": 0.02684753426593189, "grad_norm": 1.78897225856781, "learning_rate": 0.0001946352032405445, "loss": 2.4023, "step": 1140 }, { "epoch": 0.027083038952475155, "grad_norm": 1.784119963645935, "learning_rate": 0.00019458810230323586, "loss": 2.177, "step": 1150 }, { "epoch": 0.027318543639018415, "grad_norm": 2.4228804111480713, "learning_rate": 0.00019454100136592718, "loss": 2.4059, "step": 1160 }, { "epoch": 0.02755404832556168, "grad_norm": 2.120675802230835, "learning_rate": 0.00019449390042861856, "loss": 2.2732, "step": 1170 }, { "epoch": 0.02778955301210494, "grad_norm": 1.370073676109314, "learning_rate": 0.00019444679949130988, "loss": 2.2269, "step": 1180 }, { "epoch": 0.028025057698648204, "grad_norm": 1.6620279550552368, "learning_rate": 0.00019439969855400123, "loss": 2.2124, "step": 1190 }, { "epoch": 0.028260562385191465, "grad_norm": 2.0860698223114014, "learning_rate": 0.00019435259761669258, "loss": 2.1872, "step": 1200 }, { "epoch": 0.02849606707173473, "grad_norm": 1.944350004196167, "learning_rate": 0.00019430549667938393, "loss": 2.3289, "step": 1210 }, { "epoch": 0.02873157175827799, "grad_norm": 1.8327244520187378, "learning_rate": 0.00019425839574207528, "loss": 2.2747, "step": 1220 }, { "epoch": 0.02896707644482125, "grad_norm": 1.52091383934021, "learning_rate": 0.00019421129480476663, "loss": 2.3557, "step": 1230 }, { "epoch": 0.029202581131364515, "grad_norm": 1.7364777326583862, "learning_rate": 0.00019416419386745796, "loss": 2.2268, "step": 1240 }, { "epoch": 0.029438085817907775, "grad_norm": 1.7765967845916748, "learning_rate": 0.00019411709293014933, "loss": 2.1311, "step": 1250 }, { "epoch": 0.02967359050445104, "grad_norm": 2.070098876953125, "learning_rate": 0.00019406999199284066, "loss": 2.2586, "step": 1260 }, { "epoch": 0.0299090951909943, "grad_norm": 1.7408677339553833, "learning_rate": 0.000194022891055532, "loss": 2.2579, "step": 1270 }, { "epoch": 0.030144599877537565, "grad_norm": 1.849951982498169, "learning_rate": 0.00019397579011822336, "loss": 2.3557, "step": 1280 }, { "epoch": 0.030380104564080825, "grad_norm": 1.4832192659378052, "learning_rate": 0.0001939286891809147, "loss": 2.0548, "step": 1290 }, { "epoch": 0.030615609250624086, "grad_norm": 2.5369913578033447, "learning_rate": 0.00019388158824360606, "loss": 2.2137, "step": 1300 }, { "epoch": 0.03085111393716735, "grad_norm": 1.648578405380249, "learning_rate": 0.0001938344873062974, "loss": 2.4065, "step": 1310 }, { "epoch": 0.03108661862371061, "grad_norm": 1.6784148216247559, "learning_rate": 0.00019378738636898876, "loss": 2.1999, "step": 1320 }, { "epoch": 0.03132212331025387, "grad_norm": 2.132889747619629, "learning_rate": 0.0001937402854316801, "loss": 2.3155, "step": 1330 }, { "epoch": 0.031557627996797136, "grad_norm": 1.9378546476364136, "learning_rate": 0.00019369318449437143, "loss": 2.2407, "step": 1340 }, { "epoch": 0.0317931326833404, "grad_norm": 4.045490741729736, "learning_rate": 0.0001936460835570628, "loss": 2.2026, "step": 1350 }, { "epoch": 0.032028637369883664, "grad_norm": 1.7972216606140137, "learning_rate": 0.00019359898261975413, "loss": 2.3747, "step": 1360 }, { "epoch": 0.03226414205642692, "grad_norm": 1.7621222734451294, "learning_rate": 0.00019355188168244548, "loss": 2.1652, "step": 1370 }, { "epoch": 0.032499646742970185, "grad_norm": 2.10644268989563, "learning_rate": 0.00019350478074513683, "loss": 2.4651, "step": 1380 }, { "epoch": 0.03273515142951345, "grad_norm": 1.6137688159942627, "learning_rate": 0.00019345767980782818, "loss": 2.1893, "step": 1390 }, { "epoch": 0.03297065611605671, "grad_norm": 2.142392873764038, "learning_rate": 0.00019341057887051953, "loss": 2.3385, "step": 1400 }, { "epoch": 0.03320616080259997, "grad_norm": 2.0509915351867676, "learning_rate": 0.00019336347793321088, "loss": 2.2966, "step": 1410 }, { "epoch": 0.033441665489143235, "grad_norm": 1.7054039239883423, "learning_rate": 0.00019331637699590223, "loss": 2.2535, "step": 1420 }, { "epoch": 0.0336771701756865, "grad_norm": 1.9987514019012451, "learning_rate": 0.00019326927605859358, "loss": 2.2129, "step": 1430 }, { "epoch": 0.03391267486222976, "grad_norm": 1.558950424194336, "learning_rate": 0.00019322217512128493, "loss": 2.3284, "step": 1440 }, { "epoch": 0.03414817954877302, "grad_norm": 2.1738619804382324, "learning_rate": 0.00019317507418397626, "loss": 2.2122, "step": 1450 }, { "epoch": 0.034383684235316285, "grad_norm": 2.3210151195526123, "learning_rate": 0.00019312797324666763, "loss": 2.1787, "step": 1460 }, { "epoch": 0.03461918892185954, "grad_norm": 2.7615976333618164, "learning_rate": 0.00019308087230935896, "loss": 2.1883, "step": 1470 }, { "epoch": 0.034854693608402806, "grad_norm": 1.751855731010437, "learning_rate": 0.00019303377137205033, "loss": 2.2706, "step": 1480 }, { "epoch": 0.03509019829494607, "grad_norm": 1.46992027759552, "learning_rate": 0.00019298667043474166, "loss": 2.1759, "step": 1490 }, { "epoch": 0.035325702981489335, "grad_norm": 1.7911906242370605, "learning_rate": 0.000192939569497433, "loss": 2.0725, "step": 1500 }, { "epoch": 0.03556120766803259, "grad_norm": 1.5120116472244263, "learning_rate": 0.00019289246856012436, "loss": 2.3206, "step": 1510 }, { "epoch": 0.035796712354575856, "grad_norm": 1.7404943704605103, "learning_rate": 0.0001928453676228157, "loss": 2.1382, "step": 1520 }, { "epoch": 0.03603221704111912, "grad_norm": 1.7643208503723145, "learning_rate": 0.00019279826668550706, "loss": 2.0674, "step": 1530 }, { "epoch": 0.03626772172766238, "grad_norm": 2.0491485595703125, "learning_rate": 0.0001927511657481984, "loss": 2.378, "step": 1540 }, { "epoch": 0.03650322641420564, "grad_norm": 1.71403169631958, "learning_rate": 0.00019270406481088973, "loss": 2.3117, "step": 1550 }, { "epoch": 0.036738731100748906, "grad_norm": 1.729164719581604, "learning_rate": 0.0001926569638735811, "loss": 2.2056, "step": 1560 }, { "epoch": 0.03697423578729217, "grad_norm": 2.1834709644317627, "learning_rate": 0.00019260986293627243, "loss": 2.265, "step": 1570 }, { "epoch": 0.03720974047383543, "grad_norm": 1.5792131423950195, "learning_rate": 0.00019256276199896378, "loss": 2.2569, "step": 1580 }, { "epoch": 0.03744524516037869, "grad_norm": 2.3097856044769287, "learning_rate": 0.00019251566106165513, "loss": 2.2394, "step": 1590 }, { "epoch": 0.037680749846921956, "grad_norm": 1.9641480445861816, "learning_rate": 0.00019246856012434648, "loss": 2.3833, "step": 1600 }, { "epoch": 0.03791625453346521, "grad_norm": 2.1574530601501465, "learning_rate": 0.00019242145918703783, "loss": 2.1989, "step": 1610 }, { "epoch": 0.03815175922000848, "grad_norm": 2.2117180824279785, "learning_rate": 0.00019237435824972918, "loss": 2.286, "step": 1620 }, { "epoch": 0.03838726390655174, "grad_norm": 1.692057728767395, "learning_rate": 0.0001923272573124205, "loss": 2.3328, "step": 1630 }, { "epoch": 0.038622768593095005, "grad_norm": 2.3093721866607666, "learning_rate": 0.00019228015637511188, "loss": 2.284, "step": 1640 }, { "epoch": 0.03885827327963826, "grad_norm": 1.6981611251831055, "learning_rate": 0.0001922330554378032, "loss": 2.1517, "step": 1650 }, { "epoch": 0.03909377796618153, "grad_norm": 2.144850730895996, "learning_rate": 0.00019218595450049456, "loss": 2.3897, "step": 1660 }, { "epoch": 0.03932928265272479, "grad_norm": 1.5663037300109863, "learning_rate": 0.00019213885356318593, "loss": 2.2038, "step": 1670 }, { "epoch": 0.03956478733926805, "grad_norm": 2.053711175918579, "learning_rate": 0.00019209175262587726, "loss": 2.1427, "step": 1680 }, { "epoch": 0.03980029202581131, "grad_norm": 1.7935670614242554, "learning_rate": 0.00019204465168856863, "loss": 2.3839, "step": 1690 }, { "epoch": 0.04003579671235458, "grad_norm": 2.1188805103302, "learning_rate": 0.00019199755075125996, "loss": 2.2879, "step": 1700 }, { "epoch": 0.04027130139889784, "grad_norm": 1.8197453022003174, "learning_rate": 0.0001919504498139513, "loss": 2.361, "step": 1710 }, { "epoch": 0.0405068060854411, "grad_norm": 1.9226861000061035, "learning_rate": 0.00019190334887664266, "loss": 2.3839, "step": 1720 }, { "epoch": 0.04074231077198436, "grad_norm": 1.8571581840515137, "learning_rate": 0.000191856247939334, "loss": 2.2876, "step": 1730 }, { "epoch": 0.040977815458527626, "grad_norm": 1.9353306293487549, "learning_rate": 0.00019180914700202536, "loss": 2.1383, "step": 1740 }, { "epoch": 0.041213320145070884, "grad_norm": 2.7373709678649902, "learning_rate": 0.0001917620460647167, "loss": 2.1032, "step": 1750 }, { "epoch": 0.04144882483161415, "grad_norm": 1.6866846084594727, "learning_rate": 0.00019171494512740803, "loss": 2.3656, "step": 1760 }, { "epoch": 0.04168432951815741, "grad_norm": 1.7577126026153564, "learning_rate": 0.0001916678441900994, "loss": 2.2786, "step": 1770 }, { "epoch": 0.041919834204700676, "grad_norm": 2.247462272644043, "learning_rate": 0.00019162074325279073, "loss": 2.252, "step": 1780 }, { "epoch": 0.04215533889124393, "grad_norm": 1.7799142599105835, "learning_rate": 0.00019157364231548208, "loss": 2.2314, "step": 1790 }, { "epoch": 0.0423908435777872, "grad_norm": 2.1439526081085205, "learning_rate": 0.00019152654137817343, "loss": 2.4094, "step": 1800 }, { "epoch": 0.04262634826433046, "grad_norm": 2.395840644836426, "learning_rate": 0.00019147944044086478, "loss": 2.1436, "step": 1810 }, { "epoch": 0.04286185295087372, "grad_norm": 1.6308691501617432, "learning_rate": 0.00019143233950355613, "loss": 2.2567, "step": 1820 }, { "epoch": 0.04309735763741698, "grad_norm": 2.162234306335449, "learning_rate": 0.00019138523856624748, "loss": 2.3629, "step": 1830 }, { "epoch": 0.04333286232396025, "grad_norm": 2.5992627143859863, "learning_rate": 0.0001913381376289388, "loss": 2.192, "step": 1840 }, { "epoch": 0.04356836701050351, "grad_norm": 1.7631303071975708, "learning_rate": 0.00019129103669163018, "loss": 2.2224, "step": 1850 }, { "epoch": 0.04380387169704677, "grad_norm": 1.678625464439392, "learning_rate": 0.0001912439357543215, "loss": 2.2322, "step": 1860 }, { "epoch": 0.04403937638359003, "grad_norm": 2.3097527027130127, "learning_rate": 0.00019119683481701286, "loss": 2.3262, "step": 1870 }, { "epoch": 0.0442748810701333, "grad_norm": 1.3386963605880737, "learning_rate": 0.0001911497338797042, "loss": 2.1346, "step": 1880 }, { "epoch": 0.04451038575667656, "grad_norm": 2.4035725593566895, "learning_rate": 0.00019110263294239556, "loss": 2.2836, "step": 1890 }, { "epoch": 0.04474589044321982, "grad_norm": 1.9946550130844116, "learning_rate": 0.0001910555320050869, "loss": 2.2045, "step": 1900 }, { "epoch": 0.04498139512976308, "grad_norm": 1.757352590560913, "learning_rate": 0.00019100843106777826, "loss": 1.9896, "step": 1910 }, { "epoch": 0.04521689981630635, "grad_norm": 1.8178822994232178, "learning_rate": 0.0001909613301304696, "loss": 2.2243, "step": 1920 }, { "epoch": 0.045452404502849604, "grad_norm": 2.1335227489471436, "learning_rate": 0.00019091422919316096, "loss": 2.4226, "step": 1930 }, { "epoch": 0.04568790918939287, "grad_norm": 2.0507328510284424, "learning_rate": 0.00019086712825585228, "loss": 2.3667, "step": 1940 }, { "epoch": 0.04592341387593613, "grad_norm": 2.5613133907318115, "learning_rate": 0.00019082002731854366, "loss": 2.4204, "step": 1950 }, { "epoch": 0.0461589185624794, "grad_norm": 2.1812310218811035, "learning_rate": 0.000190772926381235, "loss": 2.1989, "step": 1960 }, { "epoch": 0.046394423249022654, "grad_norm": 2.316821336746216, "learning_rate": 0.00019072582544392633, "loss": 2.4335, "step": 1970 }, { "epoch": 0.04662992793556592, "grad_norm": 2.272609233856201, "learning_rate": 0.0001906787245066177, "loss": 2.2962, "step": 1980 }, { "epoch": 0.04686543262210918, "grad_norm": 2.0263876914978027, "learning_rate": 0.00019063162356930903, "loss": 2.2607, "step": 1990 }, { "epoch": 0.04710093730865244, "grad_norm": 2.9174461364746094, "learning_rate": 0.00019058452263200038, "loss": 2.2248, "step": 2000 }, { "epoch": 0.047336441995195704, "grad_norm": 2.3031280040740967, "learning_rate": 0.00019053742169469173, "loss": 2.3064, "step": 2010 }, { "epoch": 0.04757194668173897, "grad_norm": 1.9068163633346558, "learning_rate": 0.00019049032075738308, "loss": 2.1707, "step": 2020 }, { "epoch": 0.04780745136828223, "grad_norm": 1.6343876123428345, "learning_rate": 0.00019044321982007443, "loss": 2.1399, "step": 2030 }, { "epoch": 0.04804295605482549, "grad_norm": 2.0931334495544434, "learning_rate": 0.00019039611888276578, "loss": 2.1365, "step": 2040 }, { "epoch": 0.04827846074136875, "grad_norm": 2.3466007709503174, "learning_rate": 0.0001903490179454571, "loss": 2.2243, "step": 2050 }, { "epoch": 0.04851396542791202, "grad_norm": 1.7455843687057495, "learning_rate": 0.00019030191700814849, "loss": 2.2342, "step": 2060 }, { "epoch": 0.048749470114455275, "grad_norm": 2.6780898571014404, "learning_rate": 0.0001902548160708398, "loss": 2.3248, "step": 2070 }, { "epoch": 0.04898497480099854, "grad_norm": 2.0855660438537598, "learning_rate": 0.00019020771513353119, "loss": 2.3911, "step": 2080 }, { "epoch": 0.0492204794875418, "grad_norm": 1.9923450946807861, "learning_rate": 0.0001901606141962225, "loss": 2.2859, "step": 2090 }, { "epoch": 0.04945598417408507, "grad_norm": 1.83405601978302, "learning_rate": 0.00019011351325891386, "loss": 2.0618, "step": 2100 }, { "epoch": 0.049691488860628324, "grad_norm": 2.5639121532440186, "learning_rate": 0.0001900664123216052, "loss": 2.3821, "step": 2110 }, { "epoch": 0.04992699354717159, "grad_norm": 1.992323875427246, "learning_rate": 0.00019001931138429656, "loss": 2.2083, "step": 2120 }, { "epoch": 0.05016249823371485, "grad_norm": 2.1780548095703125, "learning_rate": 0.0001899722104469879, "loss": 2.2257, "step": 2130 }, { "epoch": 0.05039800292025811, "grad_norm": 2.254702091217041, "learning_rate": 0.00018992510950967926, "loss": 2.175, "step": 2140 }, { "epoch": 0.050633507606801374, "grad_norm": 2.233217477798462, "learning_rate": 0.00018987800857237058, "loss": 2.2972, "step": 2150 }, { "epoch": 0.05086901229334464, "grad_norm": 2.3001439571380615, "learning_rate": 0.00018983090763506196, "loss": 2.3244, "step": 2160 }, { "epoch": 0.0511045169798879, "grad_norm": 2.1131844520568848, "learning_rate": 0.00018978851679148415, "loss": 2.4339, "step": 2170 }, { "epoch": 0.05134002166643116, "grad_norm": 1.8972138166427612, "learning_rate": 0.0001897414158541755, "loss": 2.1409, "step": 2180 }, { "epoch": 0.051575526352974424, "grad_norm": 2.831122875213623, "learning_rate": 0.00018969431491686685, "loss": 2.4679, "step": 2190 }, { "epoch": 0.05181103103951769, "grad_norm": 1.9056183099746704, "learning_rate": 0.0001896472139795582, "loss": 2.2394, "step": 2200 }, { "epoch": 0.052046535726060945, "grad_norm": 2.218238115310669, "learning_rate": 0.00018960011304224955, "loss": 2.0855, "step": 2210 }, { "epoch": 0.05228204041260421, "grad_norm": 1.6970261335372925, "learning_rate": 0.0001895530121049409, "loss": 2.3911, "step": 2220 }, { "epoch": 0.052517545099147474, "grad_norm": 1.9589953422546387, "learning_rate": 0.00018950591116763225, "loss": 2.2206, "step": 2230 }, { "epoch": 0.05275304978569074, "grad_norm": 1.9307024478912354, "learning_rate": 0.0001894588102303236, "loss": 2.3854, "step": 2240 }, { "epoch": 0.052988554472233995, "grad_norm": 1.5521360635757446, "learning_rate": 0.00018941170929301495, "loss": 2.3013, "step": 2250 }, { "epoch": 0.05322405915877726, "grad_norm": 2.1857752799987793, "learning_rate": 0.00018936460835570628, "loss": 2.3844, "step": 2260 }, { "epoch": 0.053459563845320524, "grad_norm": 2.2541661262512207, "learning_rate": 0.00018931750741839765, "loss": 2.1811, "step": 2270 }, { "epoch": 0.05369506853186378, "grad_norm": 2.868990421295166, "learning_rate": 0.00018927040648108898, "loss": 2.391, "step": 2280 }, { "epoch": 0.053930573218407045, "grad_norm": 1.7669481039047241, "learning_rate": 0.00018922330554378033, "loss": 2.2318, "step": 2290 }, { "epoch": 0.05416607790495031, "grad_norm": 2.0685200691223145, "learning_rate": 0.00018917620460647168, "loss": 2.2587, "step": 2300 }, { "epoch": 0.05440158259149357, "grad_norm": 2.7001943588256836, "learning_rate": 0.00018912910366916303, "loss": 2.2861, "step": 2310 }, { "epoch": 0.05463708727803683, "grad_norm": 1.7368396520614624, "learning_rate": 0.00018908200273185438, "loss": 2.119, "step": 2320 }, { "epoch": 0.054872591964580095, "grad_norm": 2.3278701305389404, "learning_rate": 0.00018903490179454573, "loss": 2.1874, "step": 2330 }, { "epoch": 0.05510809665112336, "grad_norm": 2.0269453525543213, "learning_rate": 0.00018898780085723705, "loss": 2.3772, "step": 2340 }, { "epoch": 0.055343601337666616, "grad_norm": 1.5783114433288574, "learning_rate": 0.00018894069991992843, "loss": 2.1799, "step": 2350 }, { "epoch": 0.05557910602420988, "grad_norm": 2.1277246475219727, "learning_rate": 0.00018889359898261975, "loss": 2.1011, "step": 2360 }, { "epoch": 0.055814610710753144, "grad_norm": 1.865946650505066, "learning_rate": 0.0001888464980453111, "loss": 2.2497, "step": 2370 }, { "epoch": 0.05605011539729641, "grad_norm": 2.0735392570495605, "learning_rate": 0.00018879939710800245, "loss": 2.2633, "step": 2380 }, { "epoch": 0.056285620083839666, "grad_norm": 1.9640772342681885, "learning_rate": 0.0001887522961706938, "loss": 2.2787, "step": 2390 }, { "epoch": 0.05652112477038293, "grad_norm": 1.4566375017166138, "learning_rate": 0.00018870519523338515, "loss": 2.1823, "step": 2400 }, { "epoch": 0.056756629456926194, "grad_norm": 1.715774416923523, "learning_rate": 0.0001886580942960765, "loss": 2.1143, "step": 2410 }, { "epoch": 0.05699213414346946, "grad_norm": 4.055193901062012, "learning_rate": 0.00018861099335876785, "loss": 2.4697, "step": 2420 }, { "epoch": 0.057227638830012716, "grad_norm": 2.1839892864227295, "learning_rate": 0.0001885638924214592, "loss": 2.0825, "step": 2430 }, { "epoch": 0.05746314351655598, "grad_norm": 1.8288459777832031, "learning_rate": 0.00018851679148415053, "loss": 2.1559, "step": 2440 }, { "epoch": 0.057698648203099244, "grad_norm": 1.7593369483947754, "learning_rate": 0.0001884696905468419, "loss": 2.1335, "step": 2450 }, { "epoch": 0.0579341528896425, "grad_norm": 1.703242540359497, "learning_rate": 0.00018842258960953323, "loss": 2.2544, "step": 2460 }, { "epoch": 0.058169657576185765, "grad_norm": 2.0734493732452393, "learning_rate": 0.00018837548867222458, "loss": 2.2457, "step": 2470 }, { "epoch": 0.05840516226272903, "grad_norm": 1.746780514717102, "learning_rate": 0.00018832838773491593, "loss": 2.3449, "step": 2480 }, { "epoch": 0.058640666949272294, "grad_norm": 2.1612048149108887, "learning_rate": 0.00018828128679760728, "loss": 2.37, "step": 2490 }, { "epoch": 0.05887617163581555, "grad_norm": 1.9451191425323486, "learning_rate": 0.00018823418586029863, "loss": 2.0534, "step": 2500 }, { "epoch": 0.059111676322358815, "grad_norm": 1.6598166227340698, "learning_rate": 0.00018818708492298998, "loss": 2.2115, "step": 2510 }, { "epoch": 0.05934718100890208, "grad_norm": 1.8540189266204834, "learning_rate": 0.00018813998398568133, "loss": 2.2075, "step": 2520 }, { "epoch": 0.05958268569544534, "grad_norm": 1.914438009262085, "learning_rate": 0.00018809288304837268, "loss": 2.4852, "step": 2530 }, { "epoch": 0.0598181903819886, "grad_norm": 1.941332459449768, "learning_rate": 0.00018804578211106403, "loss": 2.2285, "step": 2540 }, { "epoch": 0.060053695068531865, "grad_norm": 1.4906508922576904, "learning_rate": 0.00018799868117375535, "loss": 2.2549, "step": 2550 }, { "epoch": 0.06028919975507513, "grad_norm": 2.5642271041870117, "learning_rate": 0.00018795158023644673, "loss": 2.3277, "step": 2560 }, { "epoch": 0.060524704441618386, "grad_norm": 2.510061264038086, "learning_rate": 0.00018790447929913805, "loss": 2.2822, "step": 2570 }, { "epoch": 0.06076020912816165, "grad_norm": 1.7272487878799438, "learning_rate": 0.00018785737836182943, "loss": 2.1391, "step": 2580 }, { "epoch": 0.060995713814704915, "grad_norm": 1.9994730949401855, "learning_rate": 0.00018781027742452075, "loss": 2.2, "step": 2590 }, { "epoch": 0.06123121850124817, "grad_norm": 1.5051331520080566, "learning_rate": 0.0001877631764872121, "loss": 2.3043, "step": 2600 }, { "epoch": 0.061466723187791436, "grad_norm": 1.6909672021865845, "learning_rate": 0.00018771607554990345, "loss": 2.0515, "step": 2610 }, { "epoch": 0.0617022278743347, "grad_norm": 1.6561975479125977, "learning_rate": 0.0001876689746125948, "loss": 2.3871, "step": 2620 }, { "epoch": 0.061937732560877964, "grad_norm": 1.6686687469482422, "learning_rate": 0.00018762187367528615, "loss": 2.2119, "step": 2630 }, { "epoch": 0.06217323724742122, "grad_norm": 1.7708022594451904, "learning_rate": 0.0001875747727379775, "loss": 2.3067, "step": 2640 }, { "epoch": 0.062408741933964486, "grad_norm": 1.873261570930481, "learning_rate": 0.00018752767180066883, "loss": 2.3496, "step": 2650 }, { "epoch": 0.06264424662050774, "grad_norm": 1.7290661334991455, "learning_rate": 0.0001874805708633602, "loss": 2.2128, "step": 2660 }, { "epoch": 0.06287975130705101, "grad_norm": 2.2264304161071777, "learning_rate": 0.00018743346992605153, "loss": 2.1921, "step": 2670 }, { "epoch": 0.06311525599359427, "grad_norm": 1.793036699295044, "learning_rate": 0.00018738636898874288, "loss": 2.352, "step": 2680 }, { "epoch": 0.06335076068013754, "grad_norm": 2.034780502319336, "learning_rate": 0.00018733926805143423, "loss": 2.3553, "step": 2690 }, { "epoch": 0.0635862653666808, "grad_norm": 1.685520887374878, "learning_rate": 0.00018729216711412558, "loss": 2.1674, "step": 2700 }, { "epoch": 0.06382177005322406, "grad_norm": 2.387843370437622, "learning_rate": 0.00018724506617681693, "loss": 2.2459, "step": 2710 }, { "epoch": 0.06405727473976733, "grad_norm": 2.0628840923309326, "learning_rate": 0.00018719796523950828, "loss": 2.3385, "step": 2720 }, { "epoch": 0.06429277942631058, "grad_norm": 1.780592679977417, "learning_rate": 0.0001871508643021996, "loss": 2.0707, "step": 2730 }, { "epoch": 0.06452828411285384, "grad_norm": 1.9920777082443237, "learning_rate": 0.00018710376336489098, "loss": 2.175, "step": 2740 }, { "epoch": 0.0647637887993971, "grad_norm": 1.7009214162826538, "learning_rate": 0.0001870566624275823, "loss": 2.0857, "step": 2750 }, { "epoch": 0.06499929348594037, "grad_norm": 2.2230193614959717, "learning_rate": 0.00018700956149027365, "loss": 2.4301, "step": 2760 }, { "epoch": 0.06523479817248364, "grad_norm": 1.7082926034927368, "learning_rate": 0.000186962460552965, "loss": 2.2562, "step": 2770 }, { "epoch": 0.0654703028590269, "grad_norm": 1.9396960735321045, "learning_rate": 0.00018691535961565635, "loss": 2.3725, "step": 2780 }, { "epoch": 0.06570580754557016, "grad_norm": 1.9210023880004883, "learning_rate": 0.00018686825867834773, "loss": 2.197, "step": 2790 }, { "epoch": 0.06594131223211341, "grad_norm": 1.7751202583312988, "learning_rate": 0.00018682115774103905, "loss": 2.4285, "step": 2800 }, { "epoch": 0.06617681691865668, "grad_norm": 2.46057391166687, "learning_rate": 0.0001867740568037304, "loss": 2.0906, "step": 2810 }, { "epoch": 0.06641232160519994, "grad_norm": 1.982324481010437, "learning_rate": 0.00018672695586642175, "loss": 2.3326, "step": 2820 }, { "epoch": 0.0666478262917432, "grad_norm": 2.0937414169311523, "learning_rate": 0.0001866798549291131, "loss": 2.1654, "step": 2830 }, { "epoch": 0.06688333097828647, "grad_norm": 1.8799653053283691, "learning_rate": 0.00018663275399180446, "loss": 2.178, "step": 2840 }, { "epoch": 0.06711883566482973, "grad_norm": 2.4298360347747803, "learning_rate": 0.0001865856530544958, "loss": 2.3419, "step": 2850 }, { "epoch": 0.067354340351373, "grad_norm": 1.9179294109344482, "learning_rate": 0.00018653855211718713, "loss": 2.3138, "step": 2860 }, { "epoch": 0.06758984503791625, "grad_norm": 1.727042317390442, "learning_rate": 0.0001864914511798785, "loss": 2.1584, "step": 2870 }, { "epoch": 0.06782534972445951, "grad_norm": 5.538887023925781, "learning_rate": 0.00018644435024256983, "loss": 2.1706, "step": 2880 }, { "epoch": 0.06806085441100278, "grad_norm": 1.6711513996124268, "learning_rate": 0.00018639724930526118, "loss": 2.1269, "step": 2890 }, { "epoch": 0.06829635909754604, "grad_norm": 3.3184592723846436, "learning_rate": 0.00018635014836795253, "loss": 2.201, "step": 2900 }, { "epoch": 0.0685318637840893, "grad_norm": 2.0365543365478516, "learning_rate": 0.00018630304743064388, "loss": 2.203, "step": 2910 }, { "epoch": 0.06876736847063257, "grad_norm": 2.007431745529175, "learning_rate": 0.00018625594649333523, "loss": 2.1961, "step": 2920 }, { "epoch": 0.06900287315717583, "grad_norm": 5.275245189666748, "learning_rate": 0.00018620884555602658, "loss": 1.9558, "step": 2930 }, { "epoch": 0.06923837784371908, "grad_norm": 1.7514394521713257, "learning_rate": 0.0001861617446187179, "loss": 2.2311, "step": 2940 }, { "epoch": 0.06947388253026235, "grad_norm": 1.685663104057312, "learning_rate": 0.00018611464368140928, "loss": 2.4731, "step": 2950 }, { "epoch": 0.06970938721680561, "grad_norm": 1.8519153594970703, "learning_rate": 0.0001860675427441006, "loss": 2.3555, "step": 2960 }, { "epoch": 0.06994489190334888, "grad_norm": 1.6579869985580444, "learning_rate": 0.00018602515190052282, "loss": 2.1312, "step": 2970 }, { "epoch": 0.07018039658989214, "grad_norm": 1.7049615383148193, "learning_rate": 0.00018597805096321417, "loss": 2.1154, "step": 2980 }, { "epoch": 0.0704159012764354, "grad_norm": 2.0084738731384277, "learning_rate": 0.00018593095002590552, "loss": 2.1103, "step": 2990 }, { "epoch": 0.07065140596297867, "grad_norm": 2.9388327598571777, "learning_rate": 0.00018588384908859687, "loss": 2.2574, "step": 3000 }, { "epoch": 0.07088691064952192, "grad_norm": 1.6756362915039062, "learning_rate": 0.00018583674815128822, "loss": 2.1279, "step": 3010 }, { "epoch": 0.07112241533606518, "grad_norm": 2.137333393096924, "learning_rate": 0.00018578964721397955, "loss": 2.3244, "step": 3020 }, { "epoch": 0.07135792002260845, "grad_norm": 1.6805435419082642, "learning_rate": 0.00018574254627667092, "loss": 2.3783, "step": 3030 }, { "epoch": 0.07159342470915171, "grad_norm": 1.4257123470306396, "learning_rate": 0.00018569544533936227, "loss": 2.1998, "step": 3040 }, { "epoch": 0.07182892939569498, "grad_norm": 2.6023623943328857, "learning_rate": 0.0001856483444020536, "loss": 2.2162, "step": 3050 }, { "epoch": 0.07206443408223824, "grad_norm": 1.765554666519165, "learning_rate": 0.00018560124346474497, "loss": 2.1655, "step": 3060 }, { "epoch": 0.0722999387687815, "grad_norm": 1.852608561515808, "learning_rate": 0.0001855541425274363, "loss": 2.1649, "step": 3070 }, { "epoch": 0.07253544345532476, "grad_norm": 1.706581950187683, "learning_rate": 0.00018550704159012767, "loss": 2.1517, "step": 3080 }, { "epoch": 0.07277094814186802, "grad_norm": 2.3597655296325684, "learning_rate": 0.000185459940652819, "loss": 2.0139, "step": 3090 }, { "epoch": 0.07300645282841128, "grad_norm": 2.2678277492523193, "learning_rate": 0.00018541283971551035, "loss": 2.0053, "step": 3100 }, { "epoch": 0.07324195751495455, "grad_norm": 2.10081148147583, "learning_rate": 0.0001853657387782017, "loss": 2.2485, "step": 3110 }, { "epoch": 0.07347746220149781, "grad_norm": 1.7612814903259277, "learning_rate": 0.00018531863784089305, "loss": 2.2997, "step": 3120 }, { "epoch": 0.07371296688804108, "grad_norm": 1.8904122114181519, "learning_rate": 0.0001852715369035844, "loss": 2.1279, "step": 3130 }, { "epoch": 0.07394847157458434, "grad_norm": 1.769583821296692, "learning_rate": 0.00018522443596627575, "loss": 2.2626, "step": 3140 }, { "epoch": 0.07418397626112759, "grad_norm": 2.3224611282348633, "learning_rate": 0.00018517733502896707, "loss": 2.3991, "step": 3150 }, { "epoch": 0.07441948094767085, "grad_norm": 1.9851993322372437, "learning_rate": 0.00018513023409165845, "loss": 2.1701, "step": 3160 }, { "epoch": 0.07465498563421412, "grad_norm": 2.010765790939331, "learning_rate": 0.00018508313315434977, "loss": 2.2649, "step": 3170 }, { "epoch": 0.07489049032075738, "grad_norm": 1.9443213939666748, "learning_rate": 0.00018503603221704112, "loss": 2.2097, "step": 3180 }, { "epoch": 0.07512599500730065, "grad_norm": 2.350292682647705, "learning_rate": 0.00018498893127973247, "loss": 2.2244, "step": 3190 }, { "epoch": 0.07536149969384391, "grad_norm": 1.83067786693573, "learning_rate": 0.00018494183034242382, "loss": 2.0924, "step": 3200 }, { "epoch": 0.07559700438038718, "grad_norm": 2.046450614929199, "learning_rate": 0.00018489472940511517, "loss": 2.214, "step": 3210 }, { "epoch": 0.07583250906693043, "grad_norm": 2.111867666244507, "learning_rate": 0.00018484762846780652, "loss": 2.5115, "step": 3220 }, { "epoch": 0.07606801375347369, "grad_norm": 1.992246150970459, "learning_rate": 0.00018480052753049785, "loss": 2.1997, "step": 3230 }, { "epoch": 0.07630351844001695, "grad_norm": 2.763298511505127, "learning_rate": 0.00018475342659318922, "loss": 2.299, "step": 3240 }, { "epoch": 0.07653902312656022, "grad_norm": 1.8757209777832031, "learning_rate": 0.00018470632565588055, "loss": 2.323, "step": 3250 }, { "epoch": 0.07677452781310348, "grad_norm": 1.779782772064209, "learning_rate": 0.0001846592247185719, "loss": 2.0727, "step": 3260 }, { "epoch": 0.07701003249964675, "grad_norm": 2.201812267303467, "learning_rate": 0.00018461212378126325, "loss": 2.4871, "step": 3270 }, { "epoch": 0.07724553718619001, "grad_norm": 2.0720343589782715, "learning_rate": 0.0001845650228439546, "loss": 2.1938, "step": 3280 }, { "epoch": 0.07748104187273326, "grad_norm": 1.743800163269043, "learning_rate": 0.00018451792190664595, "loss": 2.2414, "step": 3290 }, { "epoch": 0.07771654655927653, "grad_norm": 1.9348535537719727, "learning_rate": 0.0001844708209693373, "loss": 2.0799, "step": 3300 }, { "epoch": 0.07795205124581979, "grad_norm": 2.4140963554382324, "learning_rate": 0.00018442372003202862, "loss": 2.1721, "step": 3310 }, { "epoch": 0.07818755593236305, "grad_norm": 1.6908040046691895, "learning_rate": 0.00018437661909472, "loss": 2.0753, "step": 3320 }, { "epoch": 0.07842306061890632, "grad_norm": 2.733171224594116, "learning_rate": 0.00018432951815741135, "loss": 2.1139, "step": 3330 }, { "epoch": 0.07865856530544958, "grad_norm": 2.480011463165283, "learning_rate": 0.0001842824172201027, "loss": 2.2136, "step": 3340 }, { "epoch": 0.07889406999199285, "grad_norm": 2.223186731338501, "learning_rate": 0.00018423531628279405, "loss": 1.8241, "step": 3350 }, { "epoch": 0.0791295746785361, "grad_norm": 2.359806537628174, "learning_rate": 0.00018418821534548537, "loss": 2.2171, "step": 3360 }, { "epoch": 0.07936507936507936, "grad_norm": 1.6184040307998657, "learning_rate": 0.00018414111440817675, "loss": 2.3467, "step": 3370 }, { "epoch": 0.07960058405162262, "grad_norm": 1.9266674518585205, "learning_rate": 0.00018409401347086807, "loss": 2.092, "step": 3380 }, { "epoch": 0.07983608873816589, "grad_norm": 3.338719129562378, "learning_rate": 0.00018404691253355942, "loss": 2.2806, "step": 3390 }, { "epoch": 0.08007159342470915, "grad_norm": 2.559915781021118, "learning_rate": 0.00018399981159625077, "loss": 2.346, "step": 3400 }, { "epoch": 0.08030709811125242, "grad_norm": 2.17266845703125, "learning_rate": 0.00018395271065894212, "loss": 2.3376, "step": 3410 }, { "epoch": 0.08054260279779568, "grad_norm": 1.8615611791610718, "learning_rate": 0.00018390560972163347, "loss": 2.204, "step": 3420 }, { "epoch": 0.08077810748433893, "grad_norm": 1.8173818588256836, "learning_rate": 0.00018385850878432482, "loss": 2.1612, "step": 3430 }, { "epoch": 0.0810136121708822, "grad_norm": 2.4427809715270996, "learning_rate": 0.00018381140784701615, "loss": 2.379, "step": 3440 }, { "epoch": 0.08124911685742546, "grad_norm": 1.7036974430084229, "learning_rate": 0.00018376430690970753, "loss": 2.3024, "step": 3450 }, { "epoch": 0.08148462154396872, "grad_norm": 2.6452062129974365, "learning_rate": 0.00018371720597239885, "loss": 2.0425, "step": 3460 }, { "epoch": 0.08172012623051199, "grad_norm": 1.7686418294906616, "learning_rate": 0.0001836701050350902, "loss": 2.1656, "step": 3470 }, { "epoch": 0.08195563091705525, "grad_norm": 1.9566192626953125, "learning_rate": 0.00018362300409778155, "loss": 2.3603, "step": 3480 }, { "epoch": 0.08219113560359852, "grad_norm": 1.8868483304977417, "learning_rate": 0.0001835759031604729, "loss": 2.2435, "step": 3490 }, { "epoch": 0.08242664029014177, "grad_norm": 2.2280383110046387, "learning_rate": 0.00018352880222316425, "loss": 2.2404, "step": 3500 }, { "epoch": 0.08266214497668503, "grad_norm": 2.1244733333587646, "learning_rate": 0.0001834817012858556, "loss": 2.0227, "step": 3510 }, { "epoch": 0.0828976496632283, "grad_norm": 1.9479572772979736, "learning_rate": 0.00018343460034854695, "loss": 2.0774, "step": 3520 }, { "epoch": 0.08313315434977156, "grad_norm": 1.8838822841644287, "learning_rate": 0.0001833874994112383, "loss": 2.3049, "step": 3530 }, { "epoch": 0.08336865903631482, "grad_norm": 2.044419288635254, "learning_rate": 0.00018334039847392962, "loss": 2.0416, "step": 3540 }, { "epoch": 0.08360416372285809, "grad_norm": 2.2245490550994873, "learning_rate": 0.000183293297536621, "loss": 2.13, "step": 3550 }, { "epoch": 0.08383966840940135, "grad_norm": 2.318081855773926, "learning_rate": 0.00018324619659931232, "loss": 2.1373, "step": 3560 }, { "epoch": 0.0840751730959446, "grad_norm": 1.8568158149719238, "learning_rate": 0.00018319909566200367, "loss": 2.2176, "step": 3570 }, { "epoch": 0.08431067778248787, "grad_norm": 2.3757684230804443, "learning_rate": 0.00018315199472469502, "loss": 2.299, "step": 3580 }, { "epoch": 0.08454618246903113, "grad_norm": 3.102747917175293, "learning_rate": 0.00018310489378738637, "loss": 2.1751, "step": 3590 }, { "epoch": 0.0847816871555744, "grad_norm": 2.0704846382141113, "learning_rate": 0.00018305779285007772, "loss": 2.2471, "step": 3600 }, { "epoch": 0.08501719184211766, "grad_norm": 1.9258019924163818, "learning_rate": 0.00018301069191276908, "loss": 2.1349, "step": 3610 }, { "epoch": 0.08525269652866092, "grad_norm": 2.328375816345215, "learning_rate": 0.00018296359097546043, "loss": 2.3323, "step": 3620 }, { "epoch": 0.08548820121520419, "grad_norm": 1.6533223390579224, "learning_rate": 0.00018291649003815178, "loss": 2.3557, "step": 3630 }, { "epoch": 0.08572370590174744, "grad_norm": 1.7346618175506592, "learning_rate": 0.00018286938910084313, "loss": 2.1023, "step": 3640 }, { "epoch": 0.0859592105882907, "grad_norm": 2.2735652923583984, "learning_rate": 0.00018282228816353445, "loss": 2.2031, "step": 3650 }, { "epoch": 0.08619471527483397, "grad_norm": 1.8555744886398315, "learning_rate": 0.00018277518722622583, "loss": 2.2743, "step": 3660 }, { "epoch": 0.08643021996137723, "grad_norm": 1.919329285621643, "learning_rate": 0.00018272808628891715, "loss": 2.2808, "step": 3670 }, { "epoch": 0.0866657246479205, "grad_norm": 2.246380090713501, "learning_rate": 0.00018268098535160853, "loss": 2.3911, "step": 3680 }, { "epoch": 0.08690122933446376, "grad_norm": 1.6249585151672363, "learning_rate": 0.00018263388441429985, "loss": 2.0467, "step": 3690 }, { "epoch": 0.08713673402100702, "grad_norm": 1.8340191841125488, "learning_rate": 0.0001825867834769912, "loss": 2.188, "step": 3700 }, { "epoch": 0.08737223870755029, "grad_norm": 2.3175435066223145, "learning_rate": 0.00018253968253968255, "loss": 2.2183, "step": 3710 }, { "epoch": 0.08760774339409354, "grad_norm": 2.404165267944336, "learning_rate": 0.0001824925816023739, "loss": 2.0468, "step": 3720 }, { "epoch": 0.0878432480806368, "grad_norm": 2.1523420810699463, "learning_rate": 0.00018244548066506525, "loss": 2.1458, "step": 3730 }, { "epoch": 0.08807875276718007, "grad_norm": 2.125816583633423, "learning_rate": 0.0001823983797277566, "loss": 2.3276, "step": 3740 }, { "epoch": 0.08831425745372333, "grad_norm": 2.124753713607788, "learning_rate": 0.00018235127879044792, "loss": 2.222, "step": 3750 }, { "epoch": 0.0885497621402666, "grad_norm": 2.3880615234375, "learning_rate": 0.0001823041778531393, "loss": 2.1973, "step": 3760 }, { "epoch": 0.08878526682680986, "grad_norm": 1.9938665628433228, "learning_rate": 0.00018225707691583062, "loss": 2.0841, "step": 3770 }, { "epoch": 0.08902077151335312, "grad_norm": 2.1049611568450928, "learning_rate": 0.00018220997597852197, "loss": 2.364, "step": 3780 }, { "epoch": 0.08925627619989637, "grad_norm": 2.110180139541626, "learning_rate": 0.00018216287504121333, "loss": 2.346, "step": 3790 }, { "epoch": 0.08949178088643964, "grad_norm": 1.598199725151062, "learning_rate": 0.00018211577410390468, "loss": 2.0503, "step": 3800 }, { "epoch": 0.0897272855729829, "grad_norm": 2.412278175354004, "learning_rate": 0.00018206867316659603, "loss": 2.2061, "step": 3810 }, { "epoch": 0.08996279025952617, "grad_norm": 2.158580780029297, "learning_rate": 0.00018202157222928738, "loss": 2.1454, "step": 3820 }, { "epoch": 0.09019829494606943, "grad_norm": 1.819800853729248, "learning_rate": 0.0001819744712919787, "loss": 2.4598, "step": 3830 }, { "epoch": 0.0904337996326127, "grad_norm": 1.7876063585281372, "learning_rate": 0.00018192737035467008, "loss": 2.1973, "step": 3840 }, { "epoch": 0.09066930431915596, "grad_norm": 2.018787384033203, "learning_rate": 0.0001818802694173614, "loss": 2.209, "step": 3850 }, { "epoch": 0.09090480900569921, "grad_norm": 1.9427393674850464, "learning_rate": 0.00018183316848005275, "loss": 2.064, "step": 3860 }, { "epoch": 0.09114031369224247, "grad_norm": 2.2805685997009277, "learning_rate": 0.0001817860675427441, "loss": 2.2278, "step": 3870 }, { "epoch": 0.09137581837878574, "grad_norm": 1.9191609621047974, "learning_rate": 0.00018173896660543545, "loss": 2.1242, "step": 3880 }, { "epoch": 0.091611323065329, "grad_norm": 2.0127153396606445, "learning_rate": 0.00018169186566812683, "loss": 2.1634, "step": 3890 }, { "epoch": 0.09184682775187226, "grad_norm": 1.8756777048110962, "learning_rate": 0.00018164476473081815, "loss": 2.0746, "step": 3900 }, { "epoch": 0.09208233243841553, "grad_norm": 1.7346595525741577, "learning_rate": 0.0001815976637935095, "loss": 2.1975, "step": 3910 }, { "epoch": 0.0923178371249588, "grad_norm": 2.1969356536865234, "learning_rate": 0.00018155056285620085, "loss": 2.1953, "step": 3920 }, { "epoch": 0.09255334181150204, "grad_norm": 2.3179337978363037, "learning_rate": 0.0001815034619188922, "loss": 2.2919, "step": 3930 }, { "epoch": 0.09278884649804531, "grad_norm": 2.3929030895233154, "learning_rate": 0.00018145636098158355, "loss": 2.184, "step": 3940 }, { "epoch": 0.09302435118458857, "grad_norm": 2.515260934829712, "learning_rate": 0.0001814092600442749, "loss": 2.2885, "step": 3950 }, { "epoch": 0.09325985587113184, "grad_norm": 2.0467185974121094, "learning_rate": 0.00018136215910696623, "loss": 2.2213, "step": 3960 }, { "epoch": 0.0934953605576751, "grad_norm": 1.7174040079116821, "learning_rate": 0.0001813150581696576, "loss": 2.0542, "step": 3970 }, { "epoch": 0.09373086524421836, "grad_norm": 2.1928911209106445, "learning_rate": 0.00018126795723234893, "loss": 2.0091, "step": 3980 }, { "epoch": 0.09396636993076163, "grad_norm": 2.9600062370300293, "learning_rate": 0.00018122085629504028, "loss": 2.3568, "step": 3990 }, { "epoch": 0.09420187461730488, "grad_norm": 1.843605637550354, "learning_rate": 0.00018117375535773163, "loss": 2.1123, "step": 4000 }, { "epoch": 0.09443737930384814, "grad_norm": 1.823076844215393, "learning_rate": 0.00018112665442042298, "loss": 2.5867, "step": 4010 }, { "epoch": 0.09467288399039141, "grad_norm": 2.1632487773895264, "learning_rate": 0.00018107955348311433, "loss": 2.1294, "step": 4020 }, { "epoch": 0.09490838867693467, "grad_norm": 1.726159930229187, "learning_rate": 0.00018103245254580568, "loss": 2.2689, "step": 4030 }, { "epoch": 0.09514389336347794, "grad_norm": 2.210451364517212, "learning_rate": 0.000180985351608497, "loss": 2.044, "step": 4040 }, { "epoch": 0.0953793980500212, "grad_norm": 2.0674283504486084, "learning_rate": 0.00018093825067118838, "loss": 2.2215, "step": 4050 }, { "epoch": 0.09561490273656446, "grad_norm": 2.5523428916931152, "learning_rate": 0.0001808911497338797, "loss": 1.9213, "step": 4060 }, { "epoch": 0.09585040742310771, "grad_norm": 2.207009792327881, "learning_rate": 0.00018084404879657105, "loss": 2.4428, "step": 4070 }, { "epoch": 0.09608591210965098, "grad_norm": 1.6344585418701172, "learning_rate": 0.0001807969478592624, "loss": 2.0769, "step": 4080 }, { "epoch": 0.09632141679619424, "grad_norm": 1.8036699295043945, "learning_rate": 0.00018074984692195375, "loss": 2.3486, "step": 4090 }, { "epoch": 0.0965569214827375, "grad_norm": 1.708190679550171, "learning_rate": 0.0001807027459846451, "loss": 1.9418, "step": 4100 }, { "epoch": 0.09679242616928077, "grad_norm": 1.9009705781936646, "learning_rate": 0.00018065564504733645, "loss": 2.2658, "step": 4110 }, { "epoch": 0.09702793085582403, "grad_norm": 1.9692081212997437, "learning_rate": 0.0001806085441100278, "loss": 1.9867, "step": 4120 }, { "epoch": 0.0972634355423673, "grad_norm": 1.6455093622207642, "learning_rate": 0.00018056144317271915, "loss": 2.2132, "step": 4130 }, { "epoch": 0.09749894022891055, "grad_norm": 1.9711624383926392, "learning_rate": 0.00018051434223541048, "loss": 2.3335, "step": 4140 }, { "epoch": 0.09773444491545381, "grad_norm": 2.410085678100586, "learning_rate": 0.00018046724129810185, "loss": 2.1966, "step": 4150 }, { "epoch": 0.09796994960199708, "grad_norm": 1.9884003400802612, "learning_rate": 0.0001804201403607932, "loss": 2.2239, "step": 4160 }, { "epoch": 0.09820545428854034, "grad_norm": 1.7518419027328491, "learning_rate": 0.00018037303942348453, "loss": 2.2178, "step": 4170 }, { "epoch": 0.0984409589750836, "grad_norm": 2.0874931812286377, "learning_rate": 0.0001803259384861759, "loss": 2.0643, "step": 4180 }, { "epoch": 0.09867646366162687, "grad_norm": 2.409583568572998, "learning_rate": 0.00018027883754886723, "loss": 2.2727, "step": 4190 }, { "epoch": 0.09891196834817013, "grad_norm": 2.025693655014038, "learning_rate": 0.00018023173661155858, "loss": 2.0157, "step": 4200 }, { "epoch": 0.09914747303471338, "grad_norm": 1.8936916589736938, "learning_rate": 0.00018018463567424993, "loss": 2.0757, "step": 4210 }, { "epoch": 0.09938297772125665, "grad_norm": 1.8962174654006958, "learning_rate": 0.00018013753473694128, "loss": 2.1957, "step": 4220 }, { "epoch": 0.09961848240779991, "grad_norm": 2.055262565612793, "learning_rate": 0.00018009043379963263, "loss": 2.1943, "step": 4230 }, { "epoch": 0.09985398709434318, "grad_norm": 2.352290153503418, "learning_rate": 0.00018004333286232398, "loss": 2.3158, "step": 4240 }, { "epoch": 0.10008949178088644, "grad_norm": 1.8247345685958862, "learning_rate": 0.0001799962319250153, "loss": 2.1219, "step": 4250 }, { "epoch": 0.1003249964674297, "grad_norm": 2.0612590312957764, "learning_rate": 0.00017994913098770668, "loss": 2.1914, "step": 4260 }, { "epoch": 0.10056050115397297, "grad_norm": 2.3253748416900635, "learning_rate": 0.000179902030050398, "loss": 2.2833, "step": 4270 }, { "epoch": 0.10079600584051622, "grad_norm": 2.5230417251586914, "learning_rate": 0.00017985492911308938, "loss": 2.2253, "step": 4280 }, { "epoch": 0.10103151052705948, "grad_norm": 2.022261142730713, "learning_rate": 0.0001798078281757807, "loss": 2.0184, "step": 4290 }, { "epoch": 0.10126701521360275, "grad_norm": 3.5165741443634033, "learning_rate": 0.00017976072723847205, "loss": 2.2235, "step": 4300 }, { "epoch": 0.10150251990014601, "grad_norm": 2.481456756591797, "learning_rate": 0.0001797136263011634, "loss": 2.3874, "step": 4310 }, { "epoch": 0.10173802458668928, "grad_norm": 1.780819058418274, "learning_rate": 0.00017966652536385475, "loss": 2.2076, "step": 4320 }, { "epoch": 0.10197352927323254, "grad_norm": 1.6852662563323975, "learning_rate": 0.0001796194244265461, "loss": 2.1545, "step": 4330 }, { "epoch": 0.1022090339597758, "grad_norm": 2.13392972946167, "learning_rate": 0.00017957232348923745, "loss": 2.2223, "step": 4340 }, { "epoch": 0.10244453864631906, "grad_norm": 1.8714747428894043, "learning_rate": 0.00017952522255192878, "loss": 2.0202, "step": 4350 }, { "epoch": 0.10268004333286232, "grad_norm": 1.8120633363723755, "learning_rate": 0.00017947812161462015, "loss": 2.2459, "step": 4360 }, { "epoch": 0.10291554801940558, "grad_norm": 2.3447976112365723, "learning_rate": 0.00017943102067731148, "loss": 2.1867, "step": 4370 }, { "epoch": 0.10315105270594885, "grad_norm": 2.4707727432250977, "learning_rate": 0.00017938391974000283, "loss": 2.3336, "step": 4380 }, { "epoch": 0.10338655739249211, "grad_norm": 1.8624794483184814, "learning_rate": 0.00017933681880269418, "loss": 2.2378, "step": 4390 }, { "epoch": 0.10362206207903538, "grad_norm": 1.687395691871643, "learning_rate": 0.00017928971786538553, "loss": 2.1106, "step": 4400 }, { "epoch": 0.10385756676557864, "grad_norm": 1.6846327781677246, "learning_rate": 0.00017924261692807688, "loss": 2.0948, "step": 4410 }, { "epoch": 0.10409307145212189, "grad_norm": 1.7584487199783325, "learning_rate": 0.00017919551599076823, "loss": 2.2761, "step": 4420 }, { "epoch": 0.10432857613866516, "grad_norm": 2.1012890338897705, "learning_rate": 0.00017914841505345955, "loss": 2.3109, "step": 4430 }, { "epoch": 0.10456408082520842, "grad_norm": 1.6275670528411865, "learning_rate": 0.00017910131411615093, "loss": 2.1511, "step": 4440 }, { "epoch": 0.10479958551175168, "grad_norm": 1.744971513748169, "learning_rate": 0.00017905421317884228, "loss": 2.3064, "step": 4450 }, { "epoch": 0.10503509019829495, "grad_norm": 1.8816249370574951, "learning_rate": 0.0001790071122415336, "loss": 2.4038, "step": 4460 }, { "epoch": 0.10527059488483821, "grad_norm": 2.0134501457214355, "learning_rate": 0.00017896001130422498, "loss": 2.3979, "step": 4470 }, { "epoch": 0.10550609957138148, "grad_norm": 2.5849740505218506, "learning_rate": 0.0001789129103669163, "loss": 2.4298, "step": 4480 }, { "epoch": 0.10574160425792473, "grad_norm": 1.6863932609558105, "learning_rate": 0.00017886580942960768, "loss": 2.0796, "step": 4490 }, { "epoch": 0.10597710894446799, "grad_norm": 2.486143112182617, "learning_rate": 0.000178818708492299, "loss": 2.1871, "step": 4500 }, { "epoch": 0.10621261363101125, "grad_norm": 1.7740905284881592, "learning_rate": 0.00017877160755499035, "loss": 1.9941, "step": 4510 }, { "epoch": 0.10644811831755452, "grad_norm": 2.0685887336730957, "learning_rate": 0.0001787245066176817, "loss": 2.2425, "step": 4520 }, { "epoch": 0.10668362300409778, "grad_norm": 1.8682607412338257, "learning_rate": 0.00017867740568037305, "loss": 2.4737, "step": 4530 }, { "epoch": 0.10691912769064105, "grad_norm": 3.012972116470337, "learning_rate": 0.0001786303047430644, "loss": 2.2102, "step": 4540 }, { "epoch": 0.10715463237718431, "grad_norm": 2.1660654544830322, "learning_rate": 0.00017858320380575575, "loss": 1.9413, "step": 4550 }, { "epoch": 0.10739013706372756, "grad_norm": 2.0424931049346924, "learning_rate": 0.00017853610286844708, "loss": 2.1046, "step": 4560 }, { "epoch": 0.10762564175027083, "grad_norm": 2.7950327396392822, "learning_rate": 0.00017848900193113845, "loss": 2.0865, "step": 4570 }, { "epoch": 0.10786114643681409, "grad_norm": 1.6264240741729736, "learning_rate": 0.00017844190099382978, "loss": 2.1237, "step": 4580 }, { "epoch": 0.10809665112335735, "grad_norm": 1.7138210535049438, "learning_rate": 0.00017839480005652113, "loss": 2.2916, "step": 4590 }, { "epoch": 0.10833215580990062, "grad_norm": 1.89190673828125, "learning_rate": 0.00017834769911921248, "loss": 2.3419, "step": 4600 }, { "epoch": 0.10856766049644388, "grad_norm": 1.813306212425232, "learning_rate": 0.00017830059818190383, "loss": 2.3194, "step": 4610 }, { "epoch": 0.10880316518298715, "grad_norm": 1.9797568321228027, "learning_rate": 0.00017825349724459518, "loss": 2.242, "step": 4620 }, { "epoch": 0.1090386698695304, "grad_norm": 1.423639178276062, "learning_rate": 0.00017820639630728653, "loss": 2.1681, "step": 4630 }, { "epoch": 0.10927417455607366, "grad_norm": 2.0920000076293945, "learning_rate": 0.00017815929536997785, "loss": 2.155, "step": 4640 }, { "epoch": 0.10950967924261693, "grad_norm": 2.133775234222412, "learning_rate": 0.00017811219443266923, "loss": 2.1228, "step": 4650 }, { "epoch": 0.10974518392916019, "grad_norm": 1.449283242225647, "learning_rate": 0.00017806509349536055, "loss": 2.2264, "step": 4660 }, { "epoch": 0.10998068861570345, "grad_norm": 2.10311222076416, "learning_rate": 0.0001780179925580519, "loss": 2.1021, "step": 4670 }, { "epoch": 0.11021619330224672, "grad_norm": 1.8965257406234741, "learning_rate": 0.00017797089162074325, "loss": 2.0347, "step": 4680 }, { "epoch": 0.11045169798878998, "grad_norm": 1.989089846611023, "learning_rate": 0.0001779237906834346, "loss": 2.3597, "step": 4690 }, { "epoch": 0.11068720267533323, "grad_norm": 2.173595666885376, "learning_rate": 0.00017787668974612595, "loss": 2.3751, "step": 4700 }, { "epoch": 0.1109227073618765, "grad_norm": 2.043771743774414, "learning_rate": 0.0001778295888088173, "loss": 2.1705, "step": 4710 }, { "epoch": 0.11115821204841976, "grad_norm": 2.4140965938568115, "learning_rate": 0.00017778248787150865, "loss": 2.2574, "step": 4720 }, { "epoch": 0.11139371673496302, "grad_norm": 1.8277655839920044, "learning_rate": 0.0001777353869342, "loss": 2.1179, "step": 4730 }, { "epoch": 0.11162922142150629, "grad_norm": 1.941874623298645, "learning_rate": 0.00017768828599689135, "loss": 2.0465, "step": 4740 }, { "epoch": 0.11186472610804955, "grad_norm": 1.754234790802002, "learning_rate": 0.0001776411850595827, "loss": 2.2535, "step": 4750 }, { "epoch": 0.11210023079459282, "grad_norm": 1.6321264505386353, "learning_rate": 0.00017759408412227405, "loss": 2.041, "step": 4760 }, { "epoch": 0.11233573548113608, "grad_norm": 2.81117582321167, "learning_rate": 0.00017754698318496538, "loss": 2.2673, "step": 4770 }, { "epoch": 0.11257124016767933, "grad_norm": 1.9449315071105957, "learning_rate": 0.00017749988224765676, "loss": 2.1277, "step": 4780 }, { "epoch": 0.1128067448542226, "grad_norm": 1.9723138809204102, "learning_rate": 0.00017745278131034808, "loss": 2.141, "step": 4790 }, { "epoch": 0.11304224954076586, "grad_norm": 2.3215932846069336, "learning_rate": 0.00017740568037303943, "loss": 2.1263, "step": 4800 }, { "epoch": 0.11327775422730912, "grad_norm": 2.240413188934326, "learning_rate": 0.00017735857943573078, "loss": 2.1786, "step": 4810 }, { "epoch": 0.11351325891385239, "grad_norm": 2.177074432373047, "learning_rate": 0.00017731147849842213, "loss": 2.1403, "step": 4820 }, { "epoch": 0.11374876360039565, "grad_norm": 1.7291984558105469, "learning_rate": 0.00017726437756111348, "loss": 2.2137, "step": 4830 }, { "epoch": 0.11398426828693892, "grad_norm": 1.975380539894104, "learning_rate": 0.00017721727662380483, "loss": 2.2869, "step": 4840 }, { "epoch": 0.11421977297348217, "grad_norm": 2.778428554534912, "learning_rate": 0.00017717017568649615, "loss": 2.151, "step": 4850 }, { "epoch": 0.11445527766002543, "grad_norm": 2.3732733726501465, "learning_rate": 0.00017712307474918753, "loss": 2.1637, "step": 4860 }, { "epoch": 0.1146907823465687, "grad_norm": 3.130542516708374, "learning_rate": 0.00017707597381187885, "loss": 2.2621, "step": 4870 }, { "epoch": 0.11492628703311196, "grad_norm": 2.3386194705963135, "learning_rate": 0.00017702887287457023, "loss": 2.2108, "step": 4880 }, { "epoch": 0.11516179171965522, "grad_norm": 1.9925601482391357, "learning_rate": 0.00017698177193726155, "loss": 2.2729, "step": 4890 }, { "epoch": 0.11539729640619849, "grad_norm": 2.1639785766601562, "learning_rate": 0.0001769346709999529, "loss": 2.2079, "step": 4900 }, { "epoch": 0.11563280109274175, "grad_norm": 2.6207916736602783, "learning_rate": 0.00017688757006264425, "loss": 2.1154, "step": 4910 }, { "epoch": 0.115868305779285, "grad_norm": 2.0274581909179688, "learning_rate": 0.0001768404691253356, "loss": 2.2918, "step": 4920 }, { "epoch": 0.11610381046582827, "grad_norm": 2.088710069656372, "learning_rate": 0.00017679336818802695, "loss": 2.2056, "step": 4930 }, { "epoch": 0.11633931515237153, "grad_norm": 2.373544931411743, "learning_rate": 0.0001767462672507183, "loss": 2.1674, "step": 4940 }, { "epoch": 0.1165748198389148, "grad_norm": 2.527215003967285, "learning_rate": 0.00017669916631340963, "loss": 1.9731, "step": 4950 }, { "epoch": 0.11681032452545806, "grad_norm": 2.6474502086639404, "learning_rate": 0.000176652065376101, "loss": 2.0882, "step": 4960 }, { "epoch": 0.11704582921200132, "grad_norm": 2.4764654636383057, "learning_rate": 0.00017660496443879233, "loss": 2.3867, "step": 4970 }, { "epoch": 0.11728133389854459, "grad_norm": 1.8438410758972168, "learning_rate": 0.00017655786350148368, "loss": 2.3935, "step": 4980 }, { "epoch": 0.11751683858508784, "grad_norm": 2.0502359867095947, "learning_rate": 0.00017651076256417503, "loss": 2.1566, "step": 4990 }, { "epoch": 0.1177523432716311, "grad_norm": 2.705681562423706, "learning_rate": 0.00017646366162686638, "loss": 2.2442, "step": 5000 }, { "epoch": 0.11798784795817437, "grad_norm": 1.7821354866027832, "learning_rate": 0.00017641656068955773, "loss": 2.095, "step": 5010 }, { "epoch": 0.11822335264471763, "grad_norm": 2.171154022216797, "learning_rate": 0.00017636945975224908, "loss": 2.1128, "step": 5020 }, { "epoch": 0.1184588573312609, "grad_norm": 1.8381446599960327, "learning_rate": 0.00017632235881494043, "loss": 2.2702, "step": 5030 }, { "epoch": 0.11869436201780416, "grad_norm": 2.394317865371704, "learning_rate": 0.00017627525787763178, "loss": 2.2467, "step": 5040 }, { "epoch": 0.11892986670434742, "grad_norm": 2.100402593612671, "learning_rate": 0.00017622815694032313, "loss": 2.1857, "step": 5050 }, { "epoch": 0.11916537139089067, "grad_norm": 1.8596925735473633, "learning_rate": 0.00017618105600301445, "loss": 2.0607, "step": 5060 }, { "epoch": 0.11940087607743394, "grad_norm": 2.523073196411133, "learning_rate": 0.00017613395506570583, "loss": 2.2643, "step": 5070 }, { "epoch": 0.1196363807639772, "grad_norm": 1.6431152820587158, "learning_rate": 0.00017608685412839715, "loss": 2.3096, "step": 5080 }, { "epoch": 0.11987188545052047, "grad_norm": 1.8515095710754395, "learning_rate": 0.00017603975319108853, "loss": 2.2366, "step": 5090 }, { "epoch": 0.12010739013706373, "grad_norm": 2.4143874645233154, "learning_rate": 0.00017599265225377985, "loss": 2.2266, "step": 5100 }, { "epoch": 0.120342894823607, "grad_norm": 2.4558234214782715, "learning_rate": 0.0001759455513164712, "loss": 2.3298, "step": 5110 }, { "epoch": 0.12057839951015026, "grad_norm": 1.5276613235473633, "learning_rate": 0.00017589845037916256, "loss": 2.1767, "step": 5120 }, { "epoch": 0.12081390419669351, "grad_norm": 2.6570372581481934, "learning_rate": 0.0001758513494418539, "loss": 2.3225, "step": 5130 }, { "epoch": 0.12104940888323677, "grad_norm": 2.54841685295105, "learning_rate": 0.00017580424850454526, "loss": 2.3588, "step": 5140 }, { "epoch": 0.12128491356978004, "grad_norm": 1.772416114807129, "learning_rate": 0.0001757571475672366, "loss": 2.3906, "step": 5150 }, { "epoch": 0.1215204182563233, "grad_norm": 1.865113615989685, "learning_rate": 0.00017571004662992793, "loss": 2.2392, "step": 5160 }, { "epoch": 0.12175592294286657, "grad_norm": 2.2344613075256348, "learning_rate": 0.0001756629456926193, "loss": 2.127, "step": 5170 }, { "epoch": 0.12199142762940983, "grad_norm": 2.441580057144165, "learning_rate": 0.00017561584475531063, "loss": 2.2126, "step": 5180 }, { "epoch": 0.1222269323159531, "grad_norm": 1.9913771152496338, "learning_rate": 0.00017556874381800198, "loss": 2.22, "step": 5190 }, { "epoch": 0.12246243700249634, "grad_norm": 2.1991429328918457, "learning_rate": 0.00017552164288069333, "loss": 2.2766, "step": 5200 }, { "epoch": 0.12269794168903961, "grad_norm": 2.120680809020996, "learning_rate": 0.00017547454194338468, "loss": 2.2078, "step": 5210 }, { "epoch": 0.12293344637558287, "grad_norm": 1.788309931755066, "learning_rate": 0.00017542744100607603, "loss": 2.2587, "step": 5220 }, { "epoch": 0.12316895106212614, "grad_norm": 2.2423155307769775, "learning_rate": 0.00017538034006876738, "loss": 2.2241, "step": 5230 }, { "epoch": 0.1234044557486694, "grad_norm": 1.9412622451782227, "learning_rate": 0.0001753332391314587, "loss": 2.1715, "step": 5240 }, { "epoch": 0.12363996043521266, "grad_norm": 2.0444014072418213, "learning_rate": 0.00017528613819415008, "loss": 2.1813, "step": 5250 }, { "epoch": 0.12387546512175593, "grad_norm": 1.9657073020935059, "learning_rate": 0.0001752390372568414, "loss": 2.3767, "step": 5260 }, { "epoch": 0.12411096980829918, "grad_norm": 2.156477928161621, "learning_rate": 0.00017519193631953275, "loss": 1.8883, "step": 5270 }, { "epoch": 0.12434647449484244, "grad_norm": 2.735069513320923, "learning_rate": 0.0001751448353822241, "loss": 2.2131, "step": 5280 }, { "epoch": 0.12458197918138571, "grad_norm": 1.98786199092865, "learning_rate": 0.00017509773444491546, "loss": 2.0419, "step": 5290 }, { "epoch": 0.12481748386792897, "grad_norm": 2.619561195373535, "learning_rate": 0.00017505063350760683, "loss": 2.0047, "step": 5300 }, { "epoch": 0.12505298855447222, "grad_norm": 2.2264404296875, "learning_rate": 0.00017500353257029816, "loss": 2.1834, "step": 5310 }, { "epoch": 0.1252884932410155, "grad_norm": 2.1874210834503174, "learning_rate": 0.0001749564316329895, "loss": 2.1197, "step": 5320 }, { "epoch": 0.12552399792755875, "grad_norm": 2.7099130153656006, "learning_rate": 0.00017490933069568086, "loss": 1.9282, "step": 5330 }, { "epoch": 0.12575950261410201, "grad_norm": 2.1280713081359863, "learning_rate": 0.0001748622297583722, "loss": 2.0907, "step": 5340 }, { "epoch": 0.12599500730064528, "grad_norm": 2.1866750717163086, "learning_rate": 0.00017481512882106356, "loss": 1.8259, "step": 5350 }, { "epoch": 0.12623051198718854, "grad_norm": 1.9257513284683228, "learning_rate": 0.0001747680278837549, "loss": 2.1086, "step": 5360 }, { "epoch": 0.1264660166737318, "grad_norm": 1.8964312076568604, "learning_rate": 0.00017472092694644623, "loss": 2.0995, "step": 5370 }, { "epoch": 0.12670152136027507, "grad_norm": 2.228306770324707, "learning_rate": 0.0001746738260091376, "loss": 2.0373, "step": 5380 }, { "epoch": 0.12693702604681834, "grad_norm": 2.723195791244507, "learning_rate": 0.00017462672507182893, "loss": 2.0848, "step": 5390 }, { "epoch": 0.1271725307333616, "grad_norm": 2.1091506481170654, "learning_rate": 0.00017457962413452028, "loss": 2.3576, "step": 5400 }, { "epoch": 0.12740803541990486, "grad_norm": 2.01425838470459, "learning_rate": 0.00017453252319721163, "loss": 2.1692, "step": 5410 }, { "epoch": 0.12764354010644813, "grad_norm": 2.0158488750457764, "learning_rate": 0.00017448542225990298, "loss": 2.1845, "step": 5420 }, { "epoch": 0.1278790447929914, "grad_norm": 1.796004056930542, "learning_rate": 0.00017443832132259433, "loss": 2.1808, "step": 5430 }, { "epoch": 0.12811454947953466, "grad_norm": 2.268319845199585, "learning_rate": 0.00017439122038528568, "loss": 2.1978, "step": 5440 }, { "epoch": 0.1283500541660779, "grad_norm": 2.573340654373169, "learning_rate": 0.000174344119447977, "loss": 2.2889, "step": 5450 }, { "epoch": 0.12858555885262116, "grad_norm": 2.2684388160705566, "learning_rate": 0.00017429701851066838, "loss": 2.0348, "step": 5460 }, { "epoch": 0.12882106353916442, "grad_norm": 2.271415948867798, "learning_rate": 0.0001742499175733597, "loss": 2.3534, "step": 5470 }, { "epoch": 0.12905656822570769, "grad_norm": 1.900991439819336, "learning_rate": 0.00017420281663605108, "loss": 2.1018, "step": 5480 }, { "epoch": 0.12929207291225095, "grad_norm": 2.262239456176758, "learning_rate": 0.0001741557156987424, "loss": 2.2768, "step": 5490 }, { "epoch": 0.1295275775987942, "grad_norm": 2.041602849960327, "learning_rate": 0.00017410861476143376, "loss": 2.3032, "step": 5500 }, { "epoch": 0.12976308228533748, "grad_norm": 1.4872256517410278, "learning_rate": 0.0001740615138241251, "loss": 2.3222, "step": 5510 }, { "epoch": 0.12999858697188074, "grad_norm": 1.8539739847183228, "learning_rate": 0.00017401441288681646, "loss": 2.132, "step": 5520 }, { "epoch": 0.130234091658424, "grad_norm": 2.243079900741577, "learning_rate": 0.0001739673119495078, "loss": 2.1824, "step": 5530 }, { "epoch": 0.13046959634496727, "grad_norm": 2.0111191272735596, "learning_rate": 0.00017392021101219916, "loss": 2.1091, "step": 5540 }, { "epoch": 0.13070510103151053, "grad_norm": 2.0866692066192627, "learning_rate": 0.00017387311007489048, "loss": 2.2156, "step": 5550 }, { "epoch": 0.1309406057180538, "grad_norm": 1.6180267333984375, "learning_rate": 0.00017382600913758186, "loss": 2.2403, "step": 5560 }, { "epoch": 0.13117611040459706, "grad_norm": 1.926936388015747, "learning_rate": 0.00017378361829400408, "loss": 2.268, "step": 5570 }, { "epoch": 0.13141161509114033, "grad_norm": 1.732120156288147, "learning_rate": 0.0001737365173566954, "loss": 1.943, "step": 5580 }, { "epoch": 0.13164711977768356, "grad_norm": 2.2308902740478516, "learning_rate": 0.00017368941641938678, "loss": 2.3538, "step": 5590 }, { "epoch": 0.13188262446422683, "grad_norm": 2.0219004154205322, "learning_rate": 0.0001736423154820781, "loss": 2.1096, "step": 5600 }, { "epoch": 0.1321181291507701, "grad_norm": 2.223156213760376, "learning_rate": 0.00017359521454476945, "loss": 2.138, "step": 5610 }, { "epoch": 0.13235363383731336, "grad_norm": 2.6512413024902344, "learning_rate": 0.0001735481136074608, "loss": 1.9548, "step": 5620 }, { "epoch": 0.13258913852385662, "grad_norm": 1.974619746208191, "learning_rate": 0.00017350101267015215, "loss": 2.1642, "step": 5630 }, { "epoch": 0.13282464321039988, "grad_norm": 2.279487133026123, "learning_rate": 0.0001734539117328435, "loss": 2.1445, "step": 5640 }, { "epoch": 0.13306014789694315, "grad_norm": 1.9778087139129639, "learning_rate": 0.00017340681079553485, "loss": 2.2169, "step": 5650 }, { "epoch": 0.1332956525834864, "grad_norm": 1.9893101453781128, "learning_rate": 0.00017335970985822617, "loss": 2.2353, "step": 5660 }, { "epoch": 0.13353115727002968, "grad_norm": 2.7806546688079834, "learning_rate": 0.00017331260892091755, "loss": 2.1214, "step": 5670 }, { "epoch": 0.13376666195657294, "grad_norm": 2.2366230487823486, "learning_rate": 0.00017326550798360887, "loss": 2.1004, "step": 5680 }, { "epoch": 0.1340021666431162, "grad_norm": 3.1434006690979004, "learning_rate": 0.00017321840704630022, "loss": 2.3682, "step": 5690 }, { "epoch": 0.13423767132965947, "grad_norm": 1.862724781036377, "learning_rate": 0.00017317130610899157, "loss": 2.1701, "step": 5700 }, { "epoch": 0.13447317601620273, "grad_norm": 2.2287230491638184, "learning_rate": 0.00017312420517168292, "loss": 2.2376, "step": 5710 }, { "epoch": 0.134708680702746, "grad_norm": 2.0166916847229004, "learning_rate": 0.00017307710423437428, "loss": 2.2481, "step": 5720 }, { "epoch": 0.13494418538928923, "grad_norm": 2.4379286766052246, "learning_rate": 0.00017303000329706563, "loss": 2.2884, "step": 5730 }, { "epoch": 0.1351796900758325, "grad_norm": 1.923784613609314, "learning_rate": 0.00017298290235975695, "loss": 2.13, "step": 5740 }, { "epoch": 0.13541519476237576, "grad_norm": 2.3278648853302, "learning_rate": 0.00017293580142244833, "loss": 2.4975, "step": 5750 }, { "epoch": 0.13565069944891903, "grad_norm": 2.186098098754883, "learning_rate": 0.00017288870048513965, "loss": 2.4386, "step": 5760 }, { "epoch": 0.1358862041354623, "grad_norm": 1.8894046545028687, "learning_rate": 0.000172841599547831, "loss": 2.1392, "step": 5770 }, { "epoch": 0.13612170882200555, "grad_norm": 2.2432827949523926, "learning_rate": 0.00017279449861052235, "loss": 2.2693, "step": 5780 }, { "epoch": 0.13635721350854882, "grad_norm": 1.860554575920105, "learning_rate": 0.0001727473976732137, "loss": 2.0159, "step": 5790 }, { "epoch": 0.13659271819509208, "grad_norm": 1.6823490858078003, "learning_rate": 0.00017270029673590505, "loss": 2.2196, "step": 5800 }, { "epoch": 0.13682822288163535, "grad_norm": 1.9365490674972534, "learning_rate": 0.0001726531957985964, "loss": 2.2016, "step": 5810 }, { "epoch": 0.1370637275681786, "grad_norm": 2.2856075763702393, "learning_rate": 0.00017260609486128775, "loss": 2.2524, "step": 5820 }, { "epoch": 0.13729923225472188, "grad_norm": 2.667853832244873, "learning_rate": 0.0001725589939239791, "loss": 1.9745, "step": 5830 }, { "epoch": 0.13753473694126514, "grad_norm": 1.9969861507415771, "learning_rate": 0.00017251189298667045, "loss": 2.1179, "step": 5840 }, { "epoch": 0.1377702416278084, "grad_norm": 1.866898536682129, "learning_rate": 0.0001724647920493618, "loss": 2.3985, "step": 5850 }, { "epoch": 0.13800574631435167, "grad_norm": 2.4066002368927, "learning_rate": 0.00017241769111205315, "loss": 2.2191, "step": 5860 }, { "epoch": 0.1382412510008949, "grad_norm": 2.1669511795043945, "learning_rate": 0.00017237059017474447, "loss": 2.07, "step": 5870 }, { "epoch": 0.13847675568743817, "grad_norm": 1.9920525550842285, "learning_rate": 0.00017232348923743585, "loss": 2.1086, "step": 5880 }, { "epoch": 0.13871226037398143, "grad_norm": 2.622621536254883, "learning_rate": 0.00017227638830012717, "loss": 2.1982, "step": 5890 }, { "epoch": 0.1389477650605247, "grad_norm": 2.285757064819336, "learning_rate": 0.00017222928736281853, "loss": 2.2294, "step": 5900 }, { "epoch": 0.13918326974706796, "grad_norm": 2.428218126296997, "learning_rate": 0.00017218218642550988, "loss": 2.3078, "step": 5910 }, { "epoch": 0.13941877443361123, "grad_norm": 1.8226295709609985, "learning_rate": 0.00017213508548820123, "loss": 2.1509, "step": 5920 }, { "epoch": 0.1396542791201545, "grad_norm": 2.660916328430176, "learning_rate": 0.00017208798455089258, "loss": 2.0719, "step": 5930 }, { "epoch": 0.13988978380669775, "grad_norm": 1.9851837158203125, "learning_rate": 0.00017204088361358393, "loss": 2.0906, "step": 5940 }, { "epoch": 0.14012528849324102, "grad_norm": 2.3256642818450928, "learning_rate": 0.00017199378267627525, "loss": 2.0444, "step": 5950 }, { "epoch": 0.14036079317978428, "grad_norm": 1.7920613288879395, "learning_rate": 0.00017194668173896663, "loss": 2.1493, "step": 5960 }, { "epoch": 0.14059629786632755, "grad_norm": 1.760733723640442, "learning_rate": 0.00017189958080165795, "loss": 2.3474, "step": 5970 }, { "epoch": 0.1408318025528708, "grad_norm": 2.066558599472046, "learning_rate": 0.00017185247986434933, "loss": 2.1801, "step": 5980 }, { "epoch": 0.14106730723941407, "grad_norm": 1.9565317630767822, "learning_rate": 0.00017180537892704065, "loss": 2.3232, "step": 5990 }, { "epoch": 0.14130281192595734, "grad_norm": 2.2794132232666016, "learning_rate": 0.000171758277989732, "loss": 2.178, "step": 6000 }, { "epoch": 0.14153831661250058, "grad_norm": 1.8281110525131226, "learning_rate": 0.00017171117705242335, "loss": 2.1067, "step": 6010 }, { "epoch": 0.14177382129904384, "grad_norm": 1.9072505235671997, "learning_rate": 0.0001716640761151147, "loss": 1.9927, "step": 6020 }, { "epoch": 0.1420093259855871, "grad_norm": 2.288691997528076, "learning_rate": 0.00017161697517780605, "loss": 2.0077, "step": 6030 }, { "epoch": 0.14224483067213037, "grad_norm": 2.0994765758514404, "learning_rate": 0.0001715698742404974, "loss": 2.2116, "step": 6040 }, { "epoch": 0.14248033535867363, "grad_norm": 1.807565689086914, "learning_rate": 0.00017152277330318872, "loss": 2.3081, "step": 6050 }, { "epoch": 0.1427158400452169, "grad_norm": 1.8691102266311646, "learning_rate": 0.0001714756723658801, "loss": 2.1234, "step": 6060 }, { "epoch": 0.14295134473176016, "grad_norm": 2.168907403945923, "learning_rate": 0.00017142857142857143, "loss": 2.1899, "step": 6070 }, { "epoch": 0.14318684941830342, "grad_norm": 2.2471699714660645, "learning_rate": 0.00017138147049126278, "loss": 2.3476, "step": 6080 }, { "epoch": 0.1434223541048467, "grad_norm": 1.8426363468170166, "learning_rate": 0.00017133436955395413, "loss": 2.0467, "step": 6090 }, { "epoch": 0.14365785879138995, "grad_norm": 1.6236611604690552, "learning_rate": 0.00017128726861664548, "loss": 2.0523, "step": 6100 }, { "epoch": 0.14389336347793322, "grad_norm": 1.660072922706604, "learning_rate": 0.00017124016767933683, "loss": 2.2243, "step": 6110 }, { "epoch": 0.14412886816447648, "grad_norm": 2.4543228149414062, "learning_rate": 0.00017119306674202818, "loss": 2.1948, "step": 6120 }, { "epoch": 0.14436437285101975, "grad_norm": 1.9436759948730469, "learning_rate": 0.00017114596580471953, "loss": 2.0913, "step": 6130 }, { "epoch": 0.144599877537563, "grad_norm": 2.1361687183380127, "learning_rate": 0.00017109886486741088, "loss": 2.2386, "step": 6140 }, { "epoch": 0.14483538222410625, "grad_norm": 3.258145332336426, "learning_rate": 0.00017105176393010223, "loss": 2.1049, "step": 6150 }, { "epoch": 0.1450708869106495, "grad_norm": 2.3219218254089355, "learning_rate": 0.00017100466299279355, "loss": 2.2702, "step": 6160 }, { "epoch": 0.14530639159719277, "grad_norm": 2.53751277923584, "learning_rate": 0.00017095756205548493, "loss": 2.1055, "step": 6170 }, { "epoch": 0.14554189628373604, "grad_norm": 1.9522231817245483, "learning_rate": 0.00017091046111817625, "loss": 2.1196, "step": 6180 }, { "epoch": 0.1457774009702793, "grad_norm": 1.9058953523635864, "learning_rate": 0.00017086336018086763, "loss": 2.1288, "step": 6190 }, { "epoch": 0.14601290565682257, "grad_norm": 2.3197526931762695, "learning_rate": 0.00017081625924355895, "loss": 2.0571, "step": 6200 }, { "epoch": 0.14624841034336583, "grad_norm": 2.9913275241851807, "learning_rate": 0.0001707691583062503, "loss": 2.042, "step": 6210 }, { "epoch": 0.1464839150299091, "grad_norm": 3.6737120151519775, "learning_rate": 0.00017072205736894165, "loss": 2.09, "step": 6220 }, { "epoch": 0.14671941971645236, "grad_norm": 2.2069551944732666, "learning_rate": 0.000170674956431633, "loss": 2.1867, "step": 6230 }, { "epoch": 0.14695492440299562, "grad_norm": 2.2664968967437744, "learning_rate": 0.00017062785549432435, "loss": 2.2181, "step": 6240 }, { "epoch": 0.1471904290895389, "grad_norm": 1.966537356376648, "learning_rate": 0.0001705807545570157, "loss": 2.0683, "step": 6250 }, { "epoch": 0.14742593377608215, "grad_norm": 1.8831692934036255, "learning_rate": 0.00017053365361970703, "loss": 2.0964, "step": 6260 }, { "epoch": 0.14766143846262542, "grad_norm": 2.0618419647216797, "learning_rate": 0.0001704865526823984, "loss": 2.3007, "step": 6270 }, { "epoch": 0.14789694314916868, "grad_norm": 2.163808584213257, "learning_rate": 0.00017043945174508973, "loss": 2.1509, "step": 6280 }, { "epoch": 0.14813244783571194, "grad_norm": 2.259477138519287, "learning_rate": 0.00017039235080778108, "loss": 2.2863, "step": 6290 }, { "epoch": 0.14836795252225518, "grad_norm": 1.4202922582626343, "learning_rate": 0.00017034524987047243, "loss": 1.9795, "step": 6300 }, { "epoch": 0.14860345720879845, "grad_norm": 2.2261738777160645, "learning_rate": 0.00017029814893316378, "loss": 2.0105, "step": 6310 }, { "epoch": 0.1488389618953417, "grad_norm": 2.284642457962036, "learning_rate": 0.00017025104799585513, "loss": 2.1962, "step": 6320 }, { "epoch": 0.14907446658188497, "grad_norm": 1.6194367408752441, "learning_rate": 0.00017020394705854648, "loss": 2.0346, "step": 6330 }, { "epoch": 0.14930997126842824, "grad_norm": 2.0232205390930176, "learning_rate": 0.0001701568461212378, "loss": 2.2708, "step": 6340 }, { "epoch": 0.1495454759549715, "grad_norm": 2.325477361679077, "learning_rate": 0.00017010974518392918, "loss": 2.1813, "step": 6350 }, { "epoch": 0.14978098064151477, "grad_norm": 2.309537172317505, "learning_rate": 0.0001700626442466205, "loss": 2.1677, "step": 6360 }, { "epoch": 0.15001648532805803, "grad_norm": 2.201800584793091, "learning_rate": 0.00017001554330931185, "loss": 2.1772, "step": 6370 }, { "epoch": 0.1502519900146013, "grad_norm": 1.840621829032898, "learning_rate": 0.0001699684423720032, "loss": 2.1918, "step": 6380 }, { "epoch": 0.15048749470114456, "grad_norm": 2.601182222366333, "learning_rate": 0.00016992134143469455, "loss": 2.1371, "step": 6390 }, { "epoch": 0.15072299938768782, "grad_norm": 1.4714235067367554, "learning_rate": 0.00016987424049738593, "loss": 2.0602, "step": 6400 }, { "epoch": 0.1509585040742311, "grad_norm": 2.3133628368377686, "learning_rate": 0.00016982713956007725, "loss": 2.3063, "step": 6410 }, { "epoch": 0.15119400876077435, "grad_norm": 1.9627376794815063, "learning_rate": 0.0001697800386227686, "loss": 1.9214, "step": 6420 }, { "epoch": 0.15142951344731762, "grad_norm": 2.3527040481567383, "learning_rate": 0.00016973293768545995, "loss": 2.1646, "step": 6430 }, { "epoch": 0.15166501813386085, "grad_norm": 2.2275426387786865, "learning_rate": 0.0001696858367481513, "loss": 2.1793, "step": 6440 }, { "epoch": 0.15190052282040412, "grad_norm": 2.1824982166290283, "learning_rate": 0.00016963873581084265, "loss": 2.2669, "step": 6450 }, { "epoch": 0.15213602750694738, "grad_norm": 2.512240171432495, "learning_rate": 0.000169591634873534, "loss": 2.1601, "step": 6460 }, { "epoch": 0.15237153219349064, "grad_norm": 2.1347885131835938, "learning_rate": 0.00016954453393622533, "loss": 2.1572, "step": 6470 }, { "epoch": 0.1526070368800339, "grad_norm": 1.7296582460403442, "learning_rate": 0.0001694974329989167, "loss": 2.2337, "step": 6480 }, { "epoch": 0.15284254156657717, "grad_norm": 2.2184410095214844, "learning_rate": 0.00016945033206160803, "loss": 2.1873, "step": 6490 }, { "epoch": 0.15307804625312044, "grad_norm": 1.787880778312683, "learning_rate": 0.00016940323112429938, "loss": 2.0755, "step": 6500 }, { "epoch": 0.1533135509396637, "grad_norm": 1.9352377653121948, "learning_rate": 0.00016935613018699073, "loss": 2.2166, "step": 6510 }, { "epoch": 0.15354905562620697, "grad_norm": 2.27699875831604, "learning_rate": 0.00016930902924968208, "loss": 2.2189, "step": 6520 }, { "epoch": 0.15378456031275023, "grad_norm": 1.4749678373336792, "learning_rate": 0.00016926192831237343, "loss": 1.9943, "step": 6530 }, { "epoch": 0.1540200649992935, "grad_norm": 2.7707648277282715, "learning_rate": 0.00016921482737506478, "loss": 2.3423, "step": 6540 }, { "epoch": 0.15425556968583676, "grad_norm": 2.5478742122650146, "learning_rate": 0.0001691677264377561, "loss": 2.1751, "step": 6550 }, { "epoch": 0.15449107437238002, "grad_norm": 2.3068339824676514, "learning_rate": 0.00016912062550044748, "loss": 2.2444, "step": 6560 }, { "epoch": 0.15472657905892329, "grad_norm": 1.8547791242599487, "learning_rate": 0.0001690735245631388, "loss": 2.436, "step": 6570 }, { "epoch": 0.15496208374546652, "grad_norm": 2.1598117351531982, "learning_rate": 0.00016902642362583018, "loss": 2.0398, "step": 6580 }, { "epoch": 0.1551975884320098, "grad_norm": 2.000115156173706, "learning_rate": 0.0001689793226885215, "loss": 1.9789, "step": 6590 }, { "epoch": 0.15543309311855305, "grad_norm": 2.3992674350738525, "learning_rate": 0.00016893222175121285, "loss": 2.2092, "step": 6600 }, { "epoch": 0.15566859780509631, "grad_norm": 2.2532436847686768, "learning_rate": 0.0001688851208139042, "loss": 2.0844, "step": 6610 }, { "epoch": 0.15590410249163958, "grad_norm": 1.8847017288208008, "learning_rate": 0.00016883801987659555, "loss": 2.0509, "step": 6620 }, { "epoch": 0.15613960717818284, "grad_norm": 2.4338490962982178, "learning_rate": 0.0001687909189392869, "loss": 2.229, "step": 6630 }, { "epoch": 0.1563751118647261, "grad_norm": 1.6963534355163574, "learning_rate": 0.00016874381800197825, "loss": 2.3512, "step": 6640 }, { "epoch": 0.15661061655126937, "grad_norm": 2.159989356994629, "learning_rate": 0.00016869671706466958, "loss": 2.1962, "step": 6650 }, { "epoch": 0.15684612123781264, "grad_norm": 2.163346290588379, "learning_rate": 0.00016864961612736095, "loss": 2.2327, "step": 6660 }, { "epoch": 0.1570816259243559, "grad_norm": 3.266700029373169, "learning_rate": 0.00016860251519005228, "loss": 2.0636, "step": 6670 }, { "epoch": 0.15731713061089916, "grad_norm": 2.0337531566619873, "learning_rate": 0.00016855541425274363, "loss": 2.2385, "step": 6680 }, { "epoch": 0.15755263529744243, "grad_norm": 2.7667040824890137, "learning_rate": 0.000168508313315435, "loss": 2.1956, "step": 6690 }, { "epoch": 0.1577881399839857, "grad_norm": 2.1070220470428467, "learning_rate": 0.00016846121237812633, "loss": 2.1798, "step": 6700 }, { "epoch": 0.15802364467052896, "grad_norm": 1.6910346746444702, "learning_rate": 0.00016841411144081768, "loss": 2.1594, "step": 6710 }, { "epoch": 0.1582591493570722, "grad_norm": 2.147885799407959, "learning_rate": 0.00016836701050350903, "loss": 2.2545, "step": 6720 }, { "epoch": 0.15849465404361546, "grad_norm": 2.54560923576355, "learning_rate": 0.00016831990956620038, "loss": 2.0461, "step": 6730 }, { "epoch": 0.15873015873015872, "grad_norm": 2.0715444087982178, "learning_rate": 0.00016827280862889173, "loss": 2.3273, "step": 6740 }, { "epoch": 0.15896566341670199, "grad_norm": 2.0344667434692383, "learning_rate": 0.00016822570769158308, "loss": 2.2495, "step": 6750 }, { "epoch": 0.15920116810324525, "grad_norm": 2.010301113128662, "learning_rate": 0.0001681786067542744, "loss": 2.2644, "step": 6760 }, { "epoch": 0.1594366727897885, "grad_norm": 2.423848867416382, "learning_rate": 0.00016813150581696578, "loss": 1.9635, "step": 6770 }, { "epoch": 0.15967217747633178, "grad_norm": 1.817819595336914, "learning_rate": 0.0001680844048796571, "loss": 2.1507, "step": 6780 }, { "epoch": 0.15990768216287504, "grad_norm": 2.4440290927886963, "learning_rate": 0.00016803730394234848, "loss": 2.641, "step": 6790 }, { "epoch": 0.1601431868494183, "grad_norm": 1.7758880853652954, "learning_rate": 0.0001679902030050398, "loss": 2.0812, "step": 6800 }, { "epoch": 0.16037869153596157, "grad_norm": 2.6482930183410645, "learning_rate": 0.00016794310206773115, "loss": 2.0167, "step": 6810 }, { "epoch": 0.16061419622250483, "grad_norm": 2.0901808738708496, "learning_rate": 0.0001678960011304225, "loss": 1.9824, "step": 6820 }, { "epoch": 0.1608497009090481, "grad_norm": 2.458341121673584, "learning_rate": 0.00016784890019311385, "loss": 2.0419, "step": 6830 }, { "epoch": 0.16108520559559136, "grad_norm": 1.7757121324539185, "learning_rate": 0.0001678017992558052, "loss": 2.1314, "step": 6840 }, { "epoch": 0.16132071028213463, "grad_norm": 2.2329952716827393, "learning_rate": 0.00016775469831849655, "loss": 2.0286, "step": 6850 }, { "epoch": 0.16155621496867786, "grad_norm": 2.1423516273498535, "learning_rate": 0.00016770759738118788, "loss": 2.21, "step": 6860 }, { "epoch": 0.16179171965522113, "grad_norm": 1.869727611541748, "learning_rate": 0.00016766049644387925, "loss": 2.0268, "step": 6870 }, { "epoch": 0.1620272243417644, "grad_norm": 2.514774799346924, "learning_rate": 0.00016761339550657058, "loss": 2.164, "step": 6880 }, { "epoch": 0.16226272902830766, "grad_norm": 1.9911197423934937, "learning_rate": 0.00016756629456926193, "loss": 2.0608, "step": 6890 }, { "epoch": 0.16249823371485092, "grad_norm": 1.9480197429656982, "learning_rate": 0.00016751919363195328, "loss": 2.1584, "step": 6900 }, { "epoch": 0.16273373840139418, "grad_norm": 3.6419789791107178, "learning_rate": 0.00016747209269464463, "loss": 2.0359, "step": 6910 }, { "epoch": 0.16296924308793745, "grad_norm": 1.879197597503662, "learning_rate": 0.00016742499175733598, "loss": 1.9777, "step": 6920 }, { "epoch": 0.1632047477744807, "grad_norm": 2.217308759689331, "learning_rate": 0.00016737789082002733, "loss": 2.3454, "step": 6930 }, { "epoch": 0.16344025246102398, "grad_norm": 2.226469039916992, "learning_rate": 0.00016733078988271865, "loss": 2.1253, "step": 6940 }, { "epoch": 0.16367575714756724, "grad_norm": 2.5227324962615967, "learning_rate": 0.00016728368894541003, "loss": 2.0713, "step": 6950 }, { "epoch": 0.1639112618341105, "grad_norm": 1.8012442588806152, "learning_rate": 0.00016723658800810138, "loss": 1.9799, "step": 6960 }, { "epoch": 0.16414676652065377, "grad_norm": 3.06243634223938, "learning_rate": 0.0001671894870707927, "loss": 2.3178, "step": 6970 }, { "epoch": 0.16438227120719703, "grad_norm": 1.4060784578323364, "learning_rate": 0.00016714238613348408, "loss": 2.149, "step": 6980 }, { "epoch": 0.1646177758937403, "grad_norm": 1.7966259717941284, "learning_rate": 0.0001670952851961754, "loss": 2.2267, "step": 6990 }, { "epoch": 0.16485328058028353, "grad_norm": 1.7830440998077393, "learning_rate": 0.00016704818425886678, "loss": 2.1592, "step": 7000 }, { "epoch": 0.1650887852668268, "grad_norm": 1.8834739923477173, "learning_rate": 0.0001670010833215581, "loss": 2.0868, "step": 7010 }, { "epoch": 0.16532428995337006, "grad_norm": 2.2466180324554443, "learning_rate": 0.00016695398238424945, "loss": 2.1263, "step": 7020 }, { "epoch": 0.16555979463991333, "grad_norm": 2.24786639213562, "learning_rate": 0.0001669068814469408, "loss": 2.0783, "step": 7030 }, { "epoch": 0.1657952993264566, "grad_norm": 2.1359407901763916, "learning_rate": 0.00016685978050963215, "loss": 2.2205, "step": 7040 }, { "epoch": 0.16603080401299986, "grad_norm": 1.937888741493225, "learning_rate": 0.0001668126795723235, "loss": 2.191, "step": 7050 }, { "epoch": 0.16626630869954312, "grad_norm": 2.25390625, "learning_rate": 0.00016676557863501486, "loss": 2.0718, "step": 7060 }, { "epoch": 0.16650181338608638, "grad_norm": 1.8093719482421875, "learning_rate": 0.00016671847769770618, "loss": 2.133, "step": 7070 }, { "epoch": 0.16673731807262965, "grad_norm": 2.38726806640625, "learning_rate": 0.00016667137676039756, "loss": 2.2872, "step": 7080 }, { "epoch": 0.1669728227591729, "grad_norm": 1.934637188911438, "learning_rate": 0.00016662427582308888, "loss": 2.3178, "step": 7090 }, { "epoch": 0.16720832744571618, "grad_norm": 2.438570976257324, "learning_rate": 0.00016657717488578023, "loss": 1.9654, "step": 7100 }, { "epoch": 0.16744383213225944, "grad_norm": 2.067674398422241, "learning_rate": 0.00016653007394847158, "loss": 2.2299, "step": 7110 }, { "epoch": 0.1676793368188027, "grad_norm": 2.227522373199463, "learning_rate": 0.00016648297301116293, "loss": 2.2692, "step": 7120 }, { "epoch": 0.16791484150534597, "grad_norm": 4.050379753112793, "learning_rate": 0.00016643587207385428, "loss": 2.2243, "step": 7130 }, { "epoch": 0.1681503461918892, "grad_norm": 2.220202922821045, "learning_rate": 0.00016638877113654563, "loss": 2.2231, "step": 7140 }, { "epoch": 0.16838585087843247, "grad_norm": 2.7050681114196777, "learning_rate": 0.00016634167019923695, "loss": 2.1957, "step": 7150 }, { "epoch": 0.16862135556497573, "grad_norm": 2.3848469257354736, "learning_rate": 0.00016629456926192833, "loss": 2.0928, "step": 7160 }, { "epoch": 0.168856860251519, "grad_norm": 2.007478713989258, "learning_rate": 0.00016624746832461965, "loss": 2.1006, "step": 7170 }, { "epoch": 0.16909236493806226, "grad_norm": 2.2802834510803223, "learning_rate": 0.00016620036738731103, "loss": 2.2195, "step": 7180 }, { "epoch": 0.16932786962460553, "grad_norm": 2.2520174980163574, "learning_rate": 0.00016615326645000235, "loss": 2.169, "step": 7190 }, { "epoch": 0.1695633743111488, "grad_norm": 2.1334917545318604, "learning_rate": 0.0001661061655126937, "loss": 2.3443, "step": 7200 }, { "epoch": 0.16979887899769205, "grad_norm": 2.5005075931549072, "learning_rate": 0.00016605906457538505, "loss": 2.1891, "step": 7210 }, { "epoch": 0.17003438368423532, "grad_norm": 2.019739866256714, "learning_rate": 0.0001660119636380764, "loss": 2.1287, "step": 7220 }, { "epoch": 0.17026988837077858, "grad_norm": 1.7916017770767212, "learning_rate": 0.00016596486270076776, "loss": 2.1305, "step": 7230 }, { "epoch": 0.17050539305732185, "grad_norm": 1.9927102327346802, "learning_rate": 0.0001659177617634591, "loss": 2.0284, "step": 7240 }, { "epoch": 0.1707408977438651, "grad_norm": 2.6178619861602783, "learning_rate": 0.00016587066082615046, "loss": 2.2635, "step": 7250 }, { "epoch": 0.17097640243040838, "grad_norm": 2.3916609287261963, "learning_rate": 0.0001658235598888418, "loss": 2.243, "step": 7260 }, { "epoch": 0.17121190711695164, "grad_norm": 2.027822494506836, "learning_rate": 0.00016577645895153316, "loss": 2.1836, "step": 7270 }, { "epoch": 0.17144741180349488, "grad_norm": 2.018761157989502, "learning_rate": 0.00016572935801422448, "loss": 2.1472, "step": 7280 }, { "epoch": 0.17168291649003814, "grad_norm": 1.9133548736572266, "learning_rate": 0.00016568225707691586, "loss": 2.1242, "step": 7290 }, { "epoch": 0.1719184211765814, "grad_norm": 4.398839473724365, "learning_rate": 0.00016563515613960718, "loss": 2.3578, "step": 7300 }, { "epoch": 0.17215392586312467, "grad_norm": 1.720661997795105, "learning_rate": 0.00016558805520229853, "loss": 2.2148, "step": 7310 }, { "epoch": 0.17238943054966793, "grad_norm": 2.4706451892852783, "learning_rate": 0.00016554095426498988, "loss": 2.0974, "step": 7320 }, { "epoch": 0.1726249352362112, "grad_norm": 2.2956461906433105, "learning_rate": 0.00016549385332768123, "loss": 2.1426, "step": 7330 }, { "epoch": 0.17286043992275446, "grad_norm": 2.160515069961548, "learning_rate": 0.00016544675239037258, "loss": 2.1641, "step": 7340 }, { "epoch": 0.17309594460929772, "grad_norm": 2.1589348316192627, "learning_rate": 0.00016539965145306393, "loss": 2.2921, "step": 7350 }, { "epoch": 0.173331449295841, "grad_norm": 2.136258125305176, "learning_rate": 0.00016535255051575525, "loss": 2.2571, "step": 7360 }, { "epoch": 0.17356695398238425, "grad_norm": 3.0886502265930176, "learning_rate": 0.00016530544957844663, "loss": 2.3636, "step": 7370 }, { "epoch": 0.17380245866892752, "grad_norm": 1.8283052444458008, "learning_rate": 0.00016525834864113795, "loss": 2.2302, "step": 7380 }, { "epoch": 0.17403796335547078, "grad_norm": 2.7342529296875, "learning_rate": 0.00016521124770382933, "loss": 2.32, "step": 7390 }, { "epoch": 0.17427346804201405, "grad_norm": 2.8123838901519775, "learning_rate": 0.00016516414676652066, "loss": 2.1852, "step": 7400 }, { "epoch": 0.1745089727285573, "grad_norm": 1.7874890565872192, "learning_rate": 0.000165117045829212, "loss": 2.1981, "step": 7410 }, { "epoch": 0.17474447741510057, "grad_norm": 2.6356823444366455, "learning_rate": 0.00016506994489190336, "loss": 2.4149, "step": 7420 }, { "epoch": 0.1749799821016438, "grad_norm": 2.738905906677246, "learning_rate": 0.00016502755404832557, "loss": 2.0254, "step": 7430 }, { "epoch": 0.17521548678818707, "grad_norm": 1.7247503995895386, "learning_rate": 0.0001649804531110169, "loss": 2.2169, "step": 7440 }, { "epoch": 0.17545099147473034, "grad_norm": 2.0531630516052246, "learning_rate": 0.00016493335217370827, "loss": 2.2372, "step": 7450 }, { "epoch": 0.1756864961612736, "grad_norm": 2.0215067863464355, "learning_rate": 0.0001648862512363996, "loss": 1.8726, "step": 7460 }, { "epoch": 0.17592200084781687, "grad_norm": 2.5239405632019043, "learning_rate": 0.00016483915029909095, "loss": 1.8519, "step": 7470 }, { "epoch": 0.17615750553436013, "grad_norm": 2.1495165824890137, "learning_rate": 0.0001647920493617823, "loss": 2.1613, "step": 7480 }, { "epoch": 0.1763930102209034, "grad_norm": 1.7801376581192017, "learning_rate": 0.00016474494842447365, "loss": 2.0669, "step": 7490 }, { "epoch": 0.17662851490744666, "grad_norm": 2.3613669872283936, "learning_rate": 0.00016469784748716503, "loss": 2.1535, "step": 7500 }, { "epoch": 0.17686401959398992, "grad_norm": 1.718843936920166, "learning_rate": 0.00016465074654985635, "loss": 2.0689, "step": 7510 }, { "epoch": 0.1770995242805332, "grad_norm": 1.8729385137557983, "learning_rate": 0.0001646036456125477, "loss": 2.0943, "step": 7520 }, { "epoch": 0.17733502896707645, "grad_norm": 2.1967837810516357, "learning_rate": 0.00016455654467523905, "loss": 1.9327, "step": 7530 }, { "epoch": 0.17757053365361972, "grad_norm": 2.0140233039855957, "learning_rate": 0.0001645094437379304, "loss": 2.1792, "step": 7540 }, { "epoch": 0.17780603834016298, "grad_norm": 1.8183659315109253, "learning_rate": 0.00016446234280062175, "loss": 2.1262, "step": 7550 }, { "epoch": 0.17804154302670624, "grad_norm": 2.026036500930786, "learning_rate": 0.0001644152418633131, "loss": 1.9423, "step": 7560 }, { "epoch": 0.17827704771324948, "grad_norm": 3.2327709197998047, "learning_rate": 0.00016436814092600442, "loss": 2.1003, "step": 7570 }, { "epoch": 0.17851255239979275, "grad_norm": 2.2476227283477783, "learning_rate": 0.0001643210399886958, "loss": 2.1822, "step": 7580 }, { "epoch": 0.178748057086336, "grad_norm": 2.49985933303833, "learning_rate": 0.00016427393905138712, "loss": 2.1069, "step": 7590 }, { "epoch": 0.17898356177287927, "grad_norm": 1.9173133373260498, "learning_rate": 0.00016422683811407847, "loss": 2.2727, "step": 7600 }, { "epoch": 0.17921906645942254, "grad_norm": 2.5073535442352295, "learning_rate": 0.00016417973717676982, "loss": 2.1479, "step": 7610 }, { "epoch": 0.1794545711459658, "grad_norm": 1.6549034118652344, "learning_rate": 0.00016413263623946117, "loss": 2.1405, "step": 7620 }, { "epoch": 0.17969007583250907, "grad_norm": 2.1289401054382324, "learning_rate": 0.00016408553530215252, "loss": 2.1708, "step": 7630 }, { "epoch": 0.17992558051905233, "grad_norm": 2.2387378215789795, "learning_rate": 0.00016403843436484387, "loss": 2.0243, "step": 7640 }, { "epoch": 0.1801610852055956, "grad_norm": 1.905182123184204, "learning_rate": 0.0001639913334275352, "loss": 2.2722, "step": 7650 }, { "epoch": 0.18039658989213886, "grad_norm": 2.26442289352417, "learning_rate": 0.00016394423249022658, "loss": 2.1395, "step": 7660 }, { "epoch": 0.18063209457868212, "grad_norm": 2.4635813236236572, "learning_rate": 0.0001638971315529179, "loss": 2.0492, "step": 7670 }, { "epoch": 0.1808675992652254, "grad_norm": 2.116964101791382, "learning_rate": 0.00016385003061560928, "loss": 2.1899, "step": 7680 }, { "epoch": 0.18110310395176865, "grad_norm": 2.431391477584839, "learning_rate": 0.0001638029296783006, "loss": 2.0025, "step": 7690 }, { "epoch": 0.18133860863831192, "grad_norm": 2.341212749481201, "learning_rate": 0.00016375582874099195, "loss": 2.1425, "step": 7700 }, { "epoch": 0.18157411332485515, "grad_norm": 1.8115952014923096, "learning_rate": 0.0001637087278036833, "loss": 2.038, "step": 7710 }, { "epoch": 0.18180961801139842, "grad_norm": 1.9707318544387817, "learning_rate": 0.00016366162686637465, "loss": 2.1576, "step": 7720 }, { "epoch": 0.18204512269794168, "grad_norm": 2.6205427646636963, "learning_rate": 0.000163614525929066, "loss": 2.2071, "step": 7730 }, { "epoch": 0.18228062738448494, "grad_norm": 2.613771677017212, "learning_rate": 0.00016356742499175735, "loss": 2.2306, "step": 7740 }, { "epoch": 0.1825161320710282, "grad_norm": 1.9211950302124023, "learning_rate": 0.00016352032405444867, "loss": 2.1495, "step": 7750 }, { "epoch": 0.18275163675757147, "grad_norm": 2.385100841522217, "learning_rate": 0.00016347322311714005, "loss": 2.1676, "step": 7760 }, { "epoch": 0.18298714144411474, "grad_norm": 2.1002583503723145, "learning_rate": 0.00016342612217983137, "loss": 2.2365, "step": 7770 }, { "epoch": 0.183222646130658, "grad_norm": 2.102348566055298, "learning_rate": 0.00016337902124252272, "loss": 2.1476, "step": 7780 }, { "epoch": 0.18345815081720127, "grad_norm": 2.3299968242645264, "learning_rate": 0.0001633319203052141, "loss": 2.1953, "step": 7790 }, { "epoch": 0.18369365550374453, "grad_norm": 1.9959839582443237, "learning_rate": 0.00016328481936790542, "loss": 2.0821, "step": 7800 }, { "epoch": 0.1839291601902878, "grad_norm": 2.4026405811309814, "learning_rate": 0.00016323771843059677, "loss": 2.1747, "step": 7810 }, { "epoch": 0.18416466487683106, "grad_norm": 2.223411798477173, "learning_rate": 0.00016319061749328812, "loss": 2.0871, "step": 7820 }, { "epoch": 0.18440016956337432, "grad_norm": 2.1610796451568604, "learning_rate": 0.00016314351655597948, "loss": 2.1532, "step": 7830 }, { "epoch": 0.1846356742499176, "grad_norm": 2.226919174194336, "learning_rate": 0.00016309641561867083, "loss": 2.2085, "step": 7840 }, { "epoch": 0.18487117893646082, "grad_norm": 2.2085070610046387, "learning_rate": 0.00016304931468136218, "loss": 2.2106, "step": 7850 }, { "epoch": 0.1851066836230041, "grad_norm": 2.343294858932495, "learning_rate": 0.0001630022137440535, "loss": 2.128, "step": 7860 }, { "epoch": 0.18534218830954735, "grad_norm": 1.8624409437179565, "learning_rate": 0.00016295511280674488, "loss": 2.1655, "step": 7870 }, { "epoch": 0.18557769299609062, "grad_norm": 2.0223777294158936, "learning_rate": 0.0001629080118694362, "loss": 2.1523, "step": 7880 }, { "epoch": 0.18581319768263388, "grad_norm": 2.1489696502685547, "learning_rate": 0.00016286091093212758, "loss": 2.1188, "step": 7890 }, { "epoch": 0.18604870236917714, "grad_norm": 2.2076354026794434, "learning_rate": 0.0001628138099948189, "loss": 1.9737, "step": 7900 }, { "epoch": 0.1862842070557204, "grad_norm": 2.566239595413208, "learning_rate": 0.00016276670905751025, "loss": 2.2254, "step": 7910 }, { "epoch": 0.18651971174226367, "grad_norm": 2.0568361282348633, "learning_rate": 0.0001627196081202016, "loss": 2.1747, "step": 7920 }, { "epoch": 0.18675521642880694, "grad_norm": 2.4507033824920654, "learning_rate": 0.00016267250718289295, "loss": 2.3182, "step": 7930 }, { "epoch": 0.1869907211153502, "grad_norm": 2.1586856842041016, "learning_rate": 0.0001626254062455843, "loss": 2.1965, "step": 7940 }, { "epoch": 0.18722622580189346, "grad_norm": 1.9273881912231445, "learning_rate": 0.00016257830530827565, "loss": 2.2813, "step": 7950 }, { "epoch": 0.18746173048843673, "grad_norm": 1.7580167055130005, "learning_rate": 0.00016253120437096697, "loss": 2.1578, "step": 7960 }, { "epoch": 0.18769723517498, "grad_norm": 2.0554792881011963, "learning_rate": 0.00016248410343365835, "loss": 2.0389, "step": 7970 }, { "epoch": 0.18793273986152326, "grad_norm": 1.872406005859375, "learning_rate": 0.00016243700249634967, "loss": 2.1407, "step": 7980 }, { "epoch": 0.1881682445480665, "grad_norm": 2.1764724254608154, "learning_rate": 0.00016238990155904102, "loss": 2.135, "step": 7990 }, { "epoch": 0.18840374923460976, "grad_norm": 1.8471111059188843, "learning_rate": 0.00016234280062173237, "loss": 2.1204, "step": 8000 }, { "epoch": 0.18863925392115302, "grad_norm": 2.7460036277770996, "learning_rate": 0.00016229569968442373, "loss": 2.1958, "step": 8010 }, { "epoch": 0.18887475860769629, "grad_norm": 1.8986886739730835, "learning_rate": 0.00016224859874711508, "loss": 2.2771, "step": 8020 }, { "epoch": 0.18911026329423955, "grad_norm": 2.3261477947235107, "learning_rate": 0.00016220149780980643, "loss": 2.2911, "step": 8030 }, { "epoch": 0.18934576798078281, "grad_norm": 3.4282009601593018, "learning_rate": 0.00016215439687249775, "loss": 2.1043, "step": 8040 }, { "epoch": 0.18958127266732608, "grad_norm": 1.8405457735061646, "learning_rate": 0.00016210729593518913, "loss": 2.211, "step": 8050 }, { "epoch": 0.18981677735386934, "grad_norm": 2.0625762939453125, "learning_rate": 0.00016206019499788045, "loss": 2.0193, "step": 8060 }, { "epoch": 0.1900522820404126, "grad_norm": 1.9741464853286743, "learning_rate": 0.0001620130940605718, "loss": 2.1448, "step": 8070 }, { "epoch": 0.19028778672695587, "grad_norm": 2.270921468734741, "learning_rate": 0.00016196599312326318, "loss": 2.3037, "step": 8080 }, { "epoch": 0.19052329141349914, "grad_norm": 1.8715896606445312, "learning_rate": 0.0001619188921859545, "loss": 2.027, "step": 8090 }, { "epoch": 0.1907587961000424, "grad_norm": 2.515016794204712, "learning_rate": 0.00016187179124864588, "loss": 1.9652, "step": 8100 }, { "epoch": 0.19099430078658566, "grad_norm": 1.9395673274993896, "learning_rate": 0.0001618246903113372, "loss": 2.2969, "step": 8110 }, { "epoch": 0.19122980547312893, "grad_norm": 2.1764769554138184, "learning_rate": 0.00016177758937402855, "loss": 2.1232, "step": 8120 }, { "epoch": 0.19146531015967216, "grad_norm": 2.594571113586426, "learning_rate": 0.0001617304884367199, "loss": 2.1088, "step": 8130 }, { "epoch": 0.19170081484621543, "grad_norm": 2.394315242767334, "learning_rate": 0.00016168338749941125, "loss": 2.0626, "step": 8140 }, { "epoch": 0.1919363195327587, "grad_norm": 1.8806692361831665, "learning_rate": 0.0001616362865621026, "loss": 2.0899, "step": 8150 }, { "epoch": 0.19217182421930196, "grad_norm": 2.2161996364593506, "learning_rate": 0.00016158918562479395, "loss": 2.2571, "step": 8160 }, { "epoch": 0.19240732890584522, "grad_norm": 3.4953277111053467, "learning_rate": 0.00016154208468748527, "loss": 2.0105, "step": 8170 }, { "epoch": 0.19264283359238848, "grad_norm": 2.8051047325134277, "learning_rate": 0.00016149498375017665, "loss": 2.1364, "step": 8180 }, { "epoch": 0.19287833827893175, "grad_norm": 2.5728232860565186, "learning_rate": 0.00016144788281286798, "loss": 2.0509, "step": 8190 }, { "epoch": 0.193113842965475, "grad_norm": 2.346428394317627, "learning_rate": 0.00016140078187555933, "loss": 2.2152, "step": 8200 }, { "epoch": 0.19334934765201828, "grad_norm": 1.9895967245101929, "learning_rate": 0.00016135368093825068, "loss": 2.1382, "step": 8210 }, { "epoch": 0.19358485233856154, "grad_norm": 2.0366828441619873, "learning_rate": 0.00016130658000094203, "loss": 2.1697, "step": 8220 }, { "epoch": 0.1938203570251048, "grad_norm": 2.5013720989227295, "learning_rate": 0.00016125947906363338, "loss": 1.9348, "step": 8230 }, { "epoch": 0.19405586171164807, "grad_norm": 2.338641881942749, "learning_rate": 0.00016121237812632473, "loss": 2.3867, "step": 8240 }, { "epoch": 0.19429136639819133, "grad_norm": 2.367546319961548, "learning_rate": 0.00016116527718901605, "loss": 2.1588, "step": 8250 }, { "epoch": 0.1945268710847346, "grad_norm": 2.366485118865967, "learning_rate": 0.00016111817625170743, "loss": 2.3467, "step": 8260 }, { "epoch": 0.19476237577127783, "grad_norm": 1.8073631525039673, "learning_rate": 0.00016107107531439875, "loss": 2.0071, "step": 8270 }, { "epoch": 0.1949978804578211, "grad_norm": 1.8550753593444824, "learning_rate": 0.00016102397437709013, "loss": 2.1192, "step": 8280 }, { "epoch": 0.19523338514436436, "grad_norm": 2.834643602371216, "learning_rate": 0.00016097687343978145, "loss": 2.1614, "step": 8290 }, { "epoch": 0.19546888983090763, "grad_norm": 1.961897850036621, "learning_rate": 0.0001609297725024728, "loss": 2.1747, "step": 8300 }, { "epoch": 0.1957043945174509, "grad_norm": 2.972184658050537, "learning_rate": 0.00016088267156516415, "loss": 2.1692, "step": 8310 }, { "epoch": 0.19593989920399416, "grad_norm": 2.2215375900268555, "learning_rate": 0.0001608355706278555, "loss": 2.1288, "step": 8320 }, { "epoch": 0.19617540389053742, "grad_norm": 1.6455684900283813, "learning_rate": 0.00016078846969054685, "loss": 2.0821, "step": 8330 }, { "epoch": 0.19641090857708068, "grad_norm": 3.122081995010376, "learning_rate": 0.0001607413687532382, "loss": 2.2687, "step": 8340 }, { "epoch": 0.19664641326362395, "grad_norm": 2.8040692806243896, "learning_rate": 0.00016069426781592955, "loss": 2.3317, "step": 8350 }, { "epoch": 0.1968819179501672, "grad_norm": 2.200100898742676, "learning_rate": 0.0001606471668786209, "loss": 2.1458, "step": 8360 }, { "epoch": 0.19711742263671048, "grad_norm": 1.9488954544067383, "learning_rate": 0.00016060006594131225, "loss": 1.9935, "step": 8370 }, { "epoch": 0.19735292732325374, "grad_norm": 2.2211225032806396, "learning_rate": 0.00016055296500400358, "loss": 2.3222, "step": 8380 }, { "epoch": 0.197588432009797, "grad_norm": 2.1487669944763184, "learning_rate": 0.00016050586406669495, "loss": 2.1131, "step": 8390 }, { "epoch": 0.19782393669634027, "grad_norm": 2.1569080352783203, "learning_rate": 0.00016045876312938628, "loss": 2.184, "step": 8400 }, { "epoch": 0.19805944138288353, "grad_norm": 1.7691841125488281, "learning_rate": 0.00016041166219207763, "loss": 2.2211, "step": 8410 }, { "epoch": 0.19829494606942677, "grad_norm": 2.2615177631378174, "learning_rate": 0.00016036456125476898, "loss": 2.171, "step": 8420 }, { "epoch": 0.19853045075597003, "grad_norm": 1.9963940382003784, "learning_rate": 0.00016031746031746033, "loss": 2.0384, "step": 8430 }, { "epoch": 0.1987659554425133, "grad_norm": 2.1423399448394775, "learning_rate": 0.00016027035938015168, "loss": 2.1735, "step": 8440 }, { "epoch": 0.19900146012905656, "grad_norm": 2.056474447250366, "learning_rate": 0.00016022325844284303, "loss": 2.2922, "step": 8450 }, { "epoch": 0.19923696481559983, "grad_norm": 1.6666935682296753, "learning_rate": 0.00016017615750553435, "loss": 2.1775, "step": 8460 }, { "epoch": 0.1994724695021431, "grad_norm": 2.2236125469207764, "learning_rate": 0.00016012905656822573, "loss": 1.9681, "step": 8470 }, { "epoch": 0.19970797418868635, "grad_norm": 2.01289701461792, "learning_rate": 0.00016008195563091705, "loss": 1.9393, "step": 8480 }, { "epoch": 0.19994347887522962, "grad_norm": 2.205111503601074, "learning_rate": 0.00016003485469360843, "loss": 1.9858, "step": 8490 }, { "epoch": 0.20017898356177288, "grad_norm": 3.152418375015259, "learning_rate": 0.00015998775375629975, "loss": 2.241, "step": 8500 }, { "epoch": 0.20041448824831615, "grad_norm": 2.2211573123931885, "learning_rate": 0.0001599406528189911, "loss": 2.0439, "step": 8510 }, { "epoch": 0.2006499929348594, "grad_norm": 1.9449644088745117, "learning_rate": 0.00015989355188168245, "loss": 2.0677, "step": 8520 }, { "epoch": 0.20088549762140268, "grad_norm": 3.1177256107330322, "learning_rate": 0.0001598464509443738, "loss": 2.1057, "step": 8530 }, { "epoch": 0.20112100230794594, "grad_norm": 3.570312023162842, "learning_rate": 0.00015979935000706515, "loss": 2.0658, "step": 8540 }, { "epoch": 0.2013565069944892, "grad_norm": 1.7642455101013184, "learning_rate": 0.0001597522490697565, "loss": 2.2294, "step": 8550 }, { "epoch": 0.20159201168103244, "grad_norm": 1.7832077741622925, "learning_rate": 0.00015970514813244783, "loss": 2.2795, "step": 8560 }, { "epoch": 0.2018275163675757, "grad_norm": 2.377683639526367, "learning_rate": 0.0001596580471951392, "loss": 2.0018, "step": 8570 }, { "epoch": 0.20206302105411897, "grad_norm": 2.7530715465545654, "learning_rate": 0.00015961094625783053, "loss": 2.2075, "step": 8580 }, { "epoch": 0.20229852574066223, "grad_norm": 2.924802303314209, "learning_rate": 0.00015956384532052188, "loss": 2.2061, "step": 8590 }, { "epoch": 0.2025340304272055, "grad_norm": 2.2290501594543457, "learning_rate": 0.00015951674438321323, "loss": 1.9976, "step": 8600 }, { "epoch": 0.20276953511374876, "grad_norm": 2.3307456970214844, "learning_rate": 0.00015946964344590458, "loss": 2.1666, "step": 8610 }, { "epoch": 0.20300503980029203, "grad_norm": 2.272770643234253, "learning_rate": 0.00015942254250859593, "loss": 2.1245, "step": 8620 }, { "epoch": 0.2032405444868353, "grad_norm": 2.5355868339538574, "learning_rate": 0.00015937544157128728, "loss": 2.2273, "step": 8630 }, { "epoch": 0.20347604917337855, "grad_norm": 1.9374130964279175, "learning_rate": 0.00015932834063397863, "loss": 1.9849, "step": 8640 }, { "epoch": 0.20371155385992182, "grad_norm": 2.0715346336364746, "learning_rate": 0.00015928123969666998, "loss": 1.9397, "step": 8650 }, { "epoch": 0.20394705854646508, "grad_norm": 2.3634729385375977, "learning_rate": 0.00015923413875936133, "loss": 2.1056, "step": 8660 }, { "epoch": 0.20418256323300835, "grad_norm": 2.003783702850342, "learning_rate": 0.00015918703782205265, "loss": 1.8101, "step": 8670 }, { "epoch": 0.2044180679195516, "grad_norm": 1.9855163097381592, "learning_rate": 0.00015913993688474403, "loss": 1.8513, "step": 8680 }, { "epoch": 0.20465357260609487, "grad_norm": 2.3215885162353516, "learning_rate": 0.00015909283594743535, "loss": 2.2075, "step": 8690 }, { "epoch": 0.2048890772926381, "grad_norm": 2.110724687576294, "learning_rate": 0.00015904573501012673, "loss": 2.0604, "step": 8700 }, { "epoch": 0.20512458197918138, "grad_norm": 2.33134388923645, "learning_rate": 0.00015899863407281805, "loss": 2.1537, "step": 8710 }, { "epoch": 0.20536008666572464, "grad_norm": 2.0391716957092285, "learning_rate": 0.0001589515331355094, "loss": 1.9752, "step": 8720 }, { "epoch": 0.2055955913522679, "grad_norm": 2.1670148372650146, "learning_rate": 0.00015890443219820075, "loss": 2.1016, "step": 8730 }, { "epoch": 0.20583109603881117, "grad_norm": 2.2209887504577637, "learning_rate": 0.0001588573312608921, "loss": 2.1446, "step": 8740 }, { "epoch": 0.20606660072535443, "grad_norm": 2.245386838912964, "learning_rate": 0.00015881023032358345, "loss": 1.9711, "step": 8750 }, { "epoch": 0.2063021054118977, "grad_norm": 2.2087981700897217, "learning_rate": 0.0001587631293862748, "loss": 2.157, "step": 8760 }, { "epoch": 0.20653761009844096, "grad_norm": 1.9615005254745483, "learning_rate": 0.00015871602844896613, "loss": 2.0809, "step": 8770 }, { "epoch": 0.20677311478498422, "grad_norm": 2.1941707134246826, "learning_rate": 0.0001586689275116575, "loss": 2.0107, "step": 8780 }, { "epoch": 0.2070086194715275, "grad_norm": 1.8866039514541626, "learning_rate": 0.00015862182657434883, "loss": 2.0821, "step": 8790 }, { "epoch": 0.20724412415807075, "grad_norm": 2.4562177658081055, "learning_rate": 0.00015857472563704018, "loss": 2.2769, "step": 8800 }, { "epoch": 0.20747962884461402, "grad_norm": 1.8872411251068115, "learning_rate": 0.00015852762469973153, "loss": 2.307, "step": 8810 }, { "epoch": 0.20771513353115728, "grad_norm": 1.9673904180526733, "learning_rate": 0.00015848052376242288, "loss": 2.242, "step": 8820 }, { "epoch": 0.20795063821770055, "grad_norm": 2.0321238040924072, "learning_rate": 0.00015843342282511423, "loss": 2.0609, "step": 8830 }, { "epoch": 0.20818614290424378, "grad_norm": 2.2742855548858643, "learning_rate": 0.00015838632188780558, "loss": 2.3143, "step": 8840 }, { "epoch": 0.20842164759078705, "grad_norm": 2.290383815765381, "learning_rate": 0.0001583392209504969, "loss": 2.2528, "step": 8850 }, { "epoch": 0.2086571522773303, "grad_norm": 2.4600648880004883, "learning_rate": 0.00015829212001318828, "loss": 2.3136, "step": 8860 }, { "epoch": 0.20889265696387357, "grad_norm": 2.9521021842956543, "learning_rate": 0.0001582450190758796, "loss": 2.3804, "step": 8870 }, { "epoch": 0.20912816165041684, "grad_norm": 2.5371804237365723, "learning_rate": 0.00015819791813857098, "loss": 2.008, "step": 8880 }, { "epoch": 0.2093636663369601, "grad_norm": 1.9548083543777466, "learning_rate": 0.0001581508172012623, "loss": 2.2387, "step": 8890 }, { "epoch": 0.20959917102350337, "grad_norm": 2.33406925201416, "learning_rate": 0.00015810371626395365, "loss": 1.9671, "step": 8900 }, { "epoch": 0.20983467571004663, "grad_norm": 2.2755494117736816, "learning_rate": 0.000158056615326645, "loss": 2.1114, "step": 8910 }, { "epoch": 0.2100701803965899, "grad_norm": 2.0145130157470703, "learning_rate": 0.00015800951438933635, "loss": 2.1826, "step": 8920 }, { "epoch": 0.21030568508313316, "grad_norm": 2.4278318881988525, "learning_rate": 0.0001579624134520277, "loss": 2.2274, "step": 8930 }, { "epoch": 0.21054118976967642, "grad_norm": 2.743621826171875, "learning_rate": 0.00015791531251471905, "loss": 2.03, "step": 8940 }, { "epoch": 0.2107766944562197, "grad_norm": 2.0403552055358887, "learning_rate": 0.0001578682115774104, "loss": 1.9389, "step": 8950 }, { "epoch": 0.21101219914276295, "grad_norm": 1.9980547428131104, "learning_rate": 0.00015782111064010175, "loss": 2.207, "step": 8960 }, { "epoch": 0.21124770382930622, "grad_norm": 2.2395660877227783, "learning_rate": 0.0001577740097027931, "loss": 2.0992, "step": 8970 }, { "epoch": 0.21148320851584945, "grad_norm": 2.130779266357422, "learning_rate": 0.00015772690876548443, "loss": 2.1155, "step": 8980 }, { "epoch": 0.21171871320239272, "grad_norm": 2.452732801437378, "learning_rate": 0.0001576798078281758, "loss": 2.1982, "step": 8990 }, { "epoch": 0.21195421788893598, "grad_norm": 1.9832158088684082, "learning_rate": 0.00015763270689086713, "loss": 2.0777, "step": 9000 }, { "epoch": 0.21218972257547924, "grad_norm": 2.609037160873413, "learning_rate": 0.00015758560595355848, "loss": 2.2989, "step": 9010 }, { "epoch": 0.2124252272620225, "grad_norm": 2.5825107097625732, "learning_rate": 0.00015753850501624983, "loss": 2.1168, "step": 9020 }, { "epoch": 0.21266073194856577, "grad_norm": 2.1648106575012207, "learning_rate": 0.00015749140407894118, "loss": 2.1504, "step": 9030 }, { "epoch": 0.21289623663510904, "grad_norm": 2.132033586502075, "learning_rate": 0.00015744430314163253, "loss": 2.2418, "step": 9040 }, { "epoch": 0.2131317413216523, "grad_norm": 2.439497470855713, "learning_rate": 0.00015739720220432388, "loss": 2.2041, "step": 9050 }, { "epoch": 0.21336724600819557, "grad_norm": 2.2189526557922363, "learning_rate": 0.0001573501012670152, "loss": 2.1985, "step": 9060 }, { "epoch": 0.21360275069473883, "grad_norm": 2.7584781646728516, "learning_rate": 0.00015730300032970658, "loss": 2.2038, "step": 9070 }, { "epoch": 0.2138382553812821, "grad_norm": 2.2705740928649902, "learning_rate": 0.0001572558993923979, "loss": 2.1067, "step": 9080 }, { "epoch": 0.21407376006782536, "grad_norm": 3.012639045715332, "learning_rate": 0.00015720879845508928, "loss": 2.3559, "step": 9090 }, { "epoch": 0.21430926475436862, "grad_norm": 3.75484299659729, "learning_rate": 0.0001571616975177806, "loss": 2.1761, "step": 9100 }, { "epoch": 0.2145447694409119, "grad_norm": 2.1686441898345947, "learning_rate": 0.00015711459658047195, "loss": 2.1525, "step": 9110 }, { "epoch": 0.21478027412745512, "grad_norm": 2.0703907012939453, "learning_rate": 0.0001570674956431633, "loss": 2.3426, "step": 9120 }, { "epoch": 0.2150157788139984, "grad_norm": 2.633394956588745, "learning_rate": 0.00015702039470585465, "loss": 2.101, "step": 9130 }, { "epoch": 0.21525128350054165, "grad_norm": 1.6758605241775513, "learning_rate": 0.000156973293768546, "loss": 2.422, "step": 9140 }, { "epoch": 0.21548678818708492, "grad_norm": 2.618814468383789, "learning_rate": 0.00015692619283123735, "loss": 2.2528, "step": 9150 }, { "epoch": 0.21572229287362818, "grad_norm": 1.8236844539642334, "learning_rate": 0.00015687909189392868, "loss": 2.075, "step": 9160 }, { "epoch": 0.21595779756017144, "grad_norm": 2.2664949893951416, "learning_rate": 0.00015683199095662006, "loss": 1.9622, "step": 9170 }, { "epoch": 0.2161933022467147, "grad_norm": 2.9308884143829346, "learning_rate": 0.00015678489001931138, "loss": 2.0478, "step": 9180 }, { "epoch": 0.21642880693325797, "grad_norm": 2.6162636280059814, "learning_rate": 0.00015673778908200273, "loss": 2.2457, "step": 9190 }, { "epoch": 0.21666431161980124, "grad_norm": 2.512709617614746, "learning_rate": 0.0001566906881446941, "loss": 2.1442, "step": 9200 }, { "epoch": 0.2168998163063445, "grad_norm": 1.8639780282974243, "learning_rate": 0.00015664358720738543, "loss": 2.0677, "step": 9210 }, { "epoch": 0.21713532099288776, "grad_norm": 2.1955885887145996, "learning_rate": 0.00015659648627007678, "loss": 2.3215, "step": 9220 }, { "epoch": 0.21737082567943103, "grad_norm": 2.1745858192443848, "learning_rate": 0.00015654938533276813, "loss": 2.2667, "step": 9230 }, { "epoch": 0.2176063303659743, "grad_norm": 2.6070613861083984, "learning_rate": 0.00015650228439545948, "loss": 2.2632, "step": 9240 }, { "epoch": 0.21784183505251756, "grad_norm": 3.0261740684509277, "learning_rate": 0.00015645518345815083, "loss": 2.0795, "step": 9250 }, { "epoch": 0.2180773397390608, "grad_norm": 2.801222324371338, "learning_rate": 0.00015640808252084218, "loss": 2.2413, "step": 9260 }, { "epoch": 0.21831284442560406, "grad_norm": 3.1285176277160645, "learning_rate": 0.0001563609815835335, "loss": 2.2592, "step": 9270 }, { "epoch": 0.21854834911214732, "grad_norm": 2.106144666671753, "learning_rate": 0.00015631388064622488, "loss": 2.1891, "step": 9280 }, { "epoch": 0.2187838537986906, "grad_norm": 1.9326168298721313, "learning_rate": 0.0001562667797089162, "loss": 2.0664, "step": 9290 }, { "epoch": 0.21901935848523385, "grad_norm": 1.709635853767395, "learning_rate": 0.00015621967877160758, "loss": 2.1744, "step": 9300 }, { "epoch": 0.21925486317177711, "grad_norm": 2.1247639656066895, "learning_rate": 0.0001561725778342989, "loss": 2.1967, "step": 9310 }, { "epoch": 0.21949036785832038, "grad_norm": 2.582542657852173, "learning_rate": 0.00015612547689699025, "loss": 2.2411, "step": 9320 }, { "epoch": 0.21972587254486364, "grad_norm": 2.743621349334717, "learning_rate": 0.0001560783759596816, "loss": 2.3035, "step": 9330 }, { "epoch": 0.2199613772314069, "grad_norm": 2.67402720451355, "learning_rate": 0.00015603127502237296, "loss": 2.2365, "step": 9340 }, { "epoch": 0.22019688191795017, "grad_norm": 2.4903061389923096, "learning_rate": 0.0001559841740850643, "loss": 2.0761, "step": 9350 }, { "epoch": 0.22043238660449344, "grad_norm": 2.3421287536621094, "learning_rate": 0.00015593707314775566, "loss": 2.1829, "step": 9360 }, { "epoch": 0.2206678912910367, "grad_norm": 2.735867977142334, "learning_rate": 0.00015588997221044698, "loss": 2.2231, "step": 9370 }, { "epoch": 0.22090339597757996, "grad_norm": 1.992004156112671, "learning_rate": 0.00015584287127313836, "loss": 2.2848, "step": 9380 }, { "epoch": 0.22113890066412323, "grad_norm": 2.300267457962036, "learning_rate": 0.00015579577033582968, "loss": 2.1919, "step": 9390 }, { "epoch": 0.22137440535066646, "grad_norm": 1.9108951091766357, "learning_rate": 0.00015574866939852103, "loss": 2.0605, "step": 9400 }, { "epoch": 0.22160991003720973, "grad_norm": 2.468258857727051, "learning_rate": 0.00015570156846121238, "loss": 2.2323, "step": 9410 }, { "epoch": 0.221845414723753, "grad_norm": 2.3677616119384766, "learning_rate": 0.00015565446752390373, "loss": 2.0305, "step": 9420 }, { "epoch": 0.22208091941029626, "grad_norm": 2.333322286605835, "learning_rate": 0.00015560736658659508, "loss": 2.3494, "step": 9430 }, { "epoch": 0.22231642409683952, "grad_norm": 2.4163379669189453, "learning_rate": 0.00015556026564928643, "loss": 2.2143, "step": 9440 }, { "epoch": 0.22255192878338279, "grad_norm": 2.2694649696350098, "learning_rate": 0.00015551316471197775, "loss": 2.1862, "step": 9450 }, { "epoch": 0.22278743346992605, "grad_norm": 1.8725945949554443, "learning_rate": 0.00015546606377466913, "loss": 2.1275, "step": 9460 }, { "epoch": 0.2230229381564693, "grad_norm": 2.3583409786224365, "learning_rate": 0.00015541896283736045, "loss": 2.2181, "step": 9470 }, { "epoch": 0.22325844284301258, "grad_norm": 1.9677271842956543, "learning_rate": 0.00015537186190005183, "loss": 2.1267, "step": 9480 }, { "epoch": 0.22349394752955584, "grad_norm": 1.9935779571533203, "learning_rate": 0.00015532476096274318, "loss": 2.3531, "step": 9490 }, { "epoch": 0.2237294522160991, "grad_norm": 3.433013439178467, "learning_rate": 0.0001552776600254345, "loss": 2.1142, "step": 9500 }, { "epoch": 0.22396495690264237, "grad_norm": 2.3903839588165283, "learning_rate": 0.00015523055908812588, "loss": 2.0263, "step": 9510 }, { "epoch": 0.22420046158918563, "grad_norm": 2.319390296936035, "learning_rate": 0.0001551834581508172, "loss": 2.2263, "step": 9520 }, { "epoch": 0.2244359662757289, "grad_norm": 1.99445641040802, "learning_rate": 0.00015513635721350856, "loss": 1.9634, "step": 9530 }, { "epoch": 0.22467147096227216, "grad_norm": 2.1183362007141113, "learning_rate": 0.0001550892562761999, "loss": 2.1869, "step": 9540 }, { "epoch": 0.2249069756488154, "grad_norm": 2.5018179416656494, "learning_rate": 0.00015504215533889126, "loss": 2.0954, "step": 9550 }, { "epoch": 0.22514248033535866, "grad_norm": 2.222480297088623, "learning_rate": 0.0001549950544015826, "loss": 2.0068, "step": 9560 }, { "epoch": 0.22537798502190193, "grad_norm": 2.6220204830169678, "learning_rate": 0.00015494795346427396, "loss": 2.19, "step": 9570 }, { "epoch": 0.2256134897084452, "grad_norm": 2.385401964187622, "learning_rate": 0.00015490085252696528, "loss": 2.1428, "step": 9580 }, { "epoch": 0.22584899439498846, "grad_norm": 2.0146987438201904, "learning_rate": 0.00015485375158965666, "loss": 2.1736, "step": 9590 }, { "epoch": 0.22608449908153172, "grad_norm": 2.207779884338379, "learning_rate": 0.00015480665065234798, "loss": 2.0437, "step": 9600 }, { "epoch": 0.22632000376807498, "grad_norm": 2.220184803009033, "learning_rate": 0.00015475954971503933, "loss": 2.0954, "step": 9610 }, { "epoch": 0.22655550845461825, "grad_norm": 1.9735349416732788, "learning_rate": 0.00015471244877773068, "loss": 2.0907, "step": 9620 }, { "epoch": 0.2267910131411615, "grad_norm": 2.214447259902954, "learning_rate": 0.00015466534784042203, "loss": 2.0783, "step": 9630 }, { "epoch": 0.22702651782770478, "grad_norm": 2.104156255722046, "learning_rate": 0.00015461824690311338, "loss": 2.3976, "step": 9640 }, { "epoch": 0.22726202251424804, "grad_norm": 2.283184289932251, "learning_rate": 0.00015457114596580473, "loss": 2.1239, "step": 9650 }, { "epoch": 0.2274975272007913, "grad_norm": 1.9349390268325806, "learning_rate": 0.00015452404502849605, "loss": 2.2178, "step": 9660 }, { "epoch": 0.22773303188733457, "grad_norm": 2.752654552459717, "learning_rate": 0.00015447694409118743, "loss": 2.2175, "step": 9670 }, { "epoch": 0.22796853657387783, "grad_norm": 1.937559723854065, "learning_rate": 0.00015442984315387876, "loss": 2.112, "step": 9680 }, { "epoch": 0.22820404126042107, "grad_norm": 2.8238909244537354, "learning_rate": 0.00015438274221657013, "loss": 2.0023, "step": 9690 }, { "epoch": 0.22843954594696433, "grad_norm": 2.258000373840332, "learning_rate": 0.00015433564127926146, "loss": 1.946, "step": 9700 }, { "epoch": 0.2286750506335076, "grad_norm": 3.562777519226074, "learning_rate": 0.0001542885403419528, "loss": 2.0746, "step": 9710 }, { "epoch": 0.22891055532005086, "grad_norm": 2.3150315284729004, "learning_rate": 0.00015424143940464416, "loss": 2.1203, "step": 9720 }, { "epoch": 0.22914606000659413, "grad_norm": 3.4082276821136475, "learning_rate": 0.0001541943384673355, "loss": 2.3191, "step": 9730 }, { "epoch": 0.2293815646931374, "grad_norm": 2.0063388347625732, "learning_rate": 0.00015414723753002686, "loss": 2.1138, "step": 9740 }, { "epoch": 0.22961706937968066, "grad_norm": 1.8203511238098145, "learning_rate": 0.0001541001365927182, "loss": 2.0288, "step": 9750 }, { "epoch": 0.22985257406622392, "grad_norm": 2.3845832347869873, "learning_rate": 0.00015405303565540956, "loss": 2.1712, "step": 9760 }, { "epoch": 0.23008807875276718, "grad_norm": 1.6893136501312256, "learning_rate": 0.0001540059347181009, "loss": 2.215, "step": 9770 }, { "epoch": 0.23032358343931045, "grad_norm": 2.3593993186950684, "learning_rate": 0.00015395883378079226, "loss": 2.081, "step": 9780 }, { "epoch": 0.2305590881258537, "grad_norm": 2.105539321899414, "learning_rate": 0.00015391173284348358, "loss": 2.1697, "step": 9790 }, { "epoch": 0.23079459281239698, "grad_norm": 1.891050100326538, "learning_rate": 0.00015386463190617496, "loss": 1.9764, "step": 9800 }, { "epoch": 0.23103009749894024, "grad_norm": 4.2726263999938965, "learning_rate": 0.00015381753096886628, "loss": 2.3325, "step": 9810 }, { "epoch": 0.2312656021854835, "grad_norm": 1.6610772609710693, "learning_rate": 0.00015377043003155763, "loss": 2.3659, "step": 9820 }, { "epoch": 0.23150110687202674, "grad_norm": 2.202704906463623, "learning_rate": 0.00015372332909424898, "loss": 2.1231, "step": 9830 }, { "epoch": 0.23173661155857, "grad_norm": 1.6619096994400024, "learning_rate": 0.00015367622815694033, "loss": 2.0609, "step": 9840 }, { "epoch": 0.23197211624511327, "grad_norm": 2.1274075508117676, "learning_rate": 0.00015362912721963168, "loss": 2.1627, "step": 9850 }, { "epoch": 0.23220762093165653, "grad_norm": 2.303713321685791, "learning_rate": 0.00015358202628232303, "loss": 2.168, "step": 9860 }, { "epoch": 0.2324431256181998, "grad_norm": 1.953169822692871, "learning_rate": 0.00015353492534501436, "loss": 2.0576, "step": 9870 }, { "epoch": 0.23267863030474306, "grad_norm": 2.4101409912109375, "learning_rate": 0.00015348782440770573, "loss": 2.094, "step": 9880 }, { "epoch": 0.23291413499128633, "grad_norm": 2.0362141132354736, "learning_rate": 0.00015344072347039706, "loss": 2.162, "step": 9890 }, { "epoch": 0.2331496396778296, "grad_norm": 1.7066439390182495, "learning_rate": 0.00015339362253308843, "loss": 2.0753, "step": 9900 }, { "epoch": 0.23338514436437285, "grad_norm": 2.0691280364990234, "learning_rate": 0.00015334652159577976, "loss": 2.0622, "step": 9910 }, { "epoch": 0.23362064905091612, "grad_norm": 2.5985186100006104, "learning_rate": 0.0001532994206584711, "loss": 2.1825, "step": 9920 }, { "epoch": 0.23385615373745938, "grad_norm": 1.8621069192886353, "learning_rate": 0.00015325231972116246, "loss": 2.3242, "step": 9930 }, { "epoch": 0.23409165842400265, "grad_norm": 1.9276299476623535, "learning_rate": 0.0001532052187838538, "loss": 1.991, "step": 9940 }, { "epoch": 0.2343271631105459, "grad_norm": 2.965895414352417, "learning_rate": 0.00015315811784654516, "loss": 2.0144, "step": 9950 }, { "epoch": 0.23456266779708917, "grad_norm": 2.115572214126587, "learning_rate": 0.0001531110169092365, "loss": 2.3183, "step": 9960 }, { "epoch": 0.2347981724836324, "grad_norm": 2.177835702896118, "learning_rate": 0.00015306391597192783, "loss": 1.9841, "step": 9970 }, { "epoch": 0.23503367717017568, "grad_norm": 1.9404809474945068, "learning_rate": 0.0001530168150346192, "loss": 2.1265, "step": 9980 }, { "epoch": 0.23526918185671894, "grad_norm": 2.3881819248199463, "learning_rate": 0.00015296971409731053, "loss": 2.2287, "step": 9990 }, { "epoch": 0.2355046865432622, "grad_norm": 2.5311830043792725, "learning_rate": 0.00015292261316000188, "loss": 2.1156, "step": 10000 }, { "epoch": 0.23574019122980547, "grad_norm": 1.957363247871399, "learning_rate": 0.00015287551222269323, "loss": 2.0976, "step": 10010 }, { "epoch": 0.23597569591634873, "grad_norm": 2.139197587966919, "learning_rate": 0.00015282841128538458, "loss": 2.1844, "step": 10020 }, { "epoch": 0.236211200602892, "grad_norm": 2.341768741607666, "learning_rate": 0.00015278131034807593, "loss": 2.3208, "step": 10030 }, { "epoch": 0.23644670528943526, "grad_norm": 2.2145462036132812, "learning_rate": 0.00015273420941076728, "loss": 2.225, "step": 10040 }, { "epoch": 0.23668220997597852, "grad_norm": 2.0473461151123047, "learning_rate": 0.00015268710847345863, "loss": 2.3011, "step": 10050 }, { "epoch": 0.2369177146625218, "grad_norm": 2.0674173831939697, "learning_rate": 0.00015264000753614998, "loss": 2.0881, "step": 10060 }, { "epoch": 0.23715321934906505, "grad_norm": 2.2629573345184326, "learning_rate": 0.00015259290659884133, "loss": 2.2819, "step": 10070 }, { "epoch": 0.23738872403560832, "grad_norm": 2.272273063659668, "learning_rate": 0.00015254580566153268, "loss": 2.0528, "step": 10080 }, { "epoch": 0.23762422872215158, "grad_norm": 2.418661117553711, "learning_rate": 0.00015249870472422403, "loss": 2.1313, "step": 10090 }, { "epoch": 0.23785973340869485, "grad_norm": 2.5303127765655518, "learning_rate": 0.00015245160378691536, "loss": 2.2907, "step": 10100 }, { "epoch": 0.23809523809523808, "grad_norm": 2.464144229888916, "learning_rate": 0.00015240450284960673, "loss": 1.9191, "step": 10110 }, { "epoch": 0.23833074278178135, "grad_norm": 1.8093903064727783, "learning_rate": 0.00015235740191229806, "loss": 2.0152, "step": 10120 }, { "epoch": 0.2385662474683246, "grad_norm": 1.9383940696716309, "learning_rate": 0.0001523103009749894, "loss": 2.1197, "step": 10130 }, { "epoch": 0.23880175215486787, "grad_norm": 2.671933650970459, "learning_rate": 0.00015226320003768076, "loss": 1.9837, "step": 10140 }, { "epoch": 0.23903725684141114, "grad_norm": 2.083766222000122, "learning_rate": 0.0001522160991003721, "loss": 2.0238, "step": 10150 }, { "epoch": 0.2392727615279544, "grad_norm": 2.2363474369049072, "learning_rate": 0.00015216899816306346, "loss": 2.1516, "step": 10160 }, { "epoch": 0.23950826621449767, "grad_norm": 2.276669979095459, "learning_rate": 0.0001521218972257548, "loss": 1.9849, "step": 10170 }, { "epoch": 0.23974377090104093, "grad_norm": 2.018583059310913, "learning_rate": 0.00015207479628844613, "loss": 2.2545, "step": 10180 }, { "epoch": 0.2399792755875842, "grad_norm": 2.3326144218444824, "learning_rate": 0.0001520276953511375, "loss": 2.2914, "step": 10190 }, { "epoch": 0.24021478027412746, "grad_norm": 2.2414534091949463, "learning_rate": 0.00015198059441382883, "loss": 2.0861, "step": 10200 }, { "epoch": 0.24045028496067072, "grad_norm": 2.206983804702759, "learning_rate": 0.00015193349347652018, "loss": 2.1644, "step": 10210 }, { "epoch": 0.240685789647214, "grad_norm": 1.9949047565460205, "learning_rate": 0.00015188639253921153, "loss": 2.2007, "step": 10220 }, { "epoch": 0.24092129433375725, "grad_norm": 1.8927348852157593, "learning_rate": 0.00015183929160190288, "loss": 2.0746, "step": 10230 }, { "epoch": 0.24115679902030052, "grad_norm": 1.8133490085601807, "learning_rate": 0.00015179219066459423, "loss": 1.9463, "step": 10240 }, { "epoch": 0.24139230370684375, "grad_norm": 2.7200329303741455, "learning_rate": 0.00015174508972728558, "loss": 2.0079, "step": 10250 }, { "epoch": 0.24162780839338702, "grad_norm": 2.4350547790527344, "learning_rate": 0.0001516979887899769, "loss": 2.2918, "step": 10260 }, { "epoch": 0.24186331307993028, "grad_norm": 1.9246656894683838, "learning_rate": 0.00015165088785266828, "loss": 2.117, "step": 10270 }, { "epoch": 0.24209881776647355, "grad_norm": 2.2206227779388428, "learning_rate": 0.0001516037869153596, "loss": 2.225, "step": 10280 }, { "epoch": 0.2423343224530168, "grad_norm": 2.1294167041778564, "learning_rate": 0.00015155668597805098, "loss": 2.0306, "step": 10290 }, { "epoch": 0.24256982713956007, "grad_norm": 2.4349210262298584, "learning_rate": 0.0001515095850407423, "loss": 2.1363, "step": 10300 }, { "epoch": 0.24280533182610334, "grad_norm": 1.8255261182785034, "learning_rate": 0.00015146248410343366, "loss": 2.2975, "step": 10310 }, { "epoch": 0.2430408365126466, "grad_norm": 2.2579727172851562, "learning_rate": 0.000151415383166125, "loss": 2.0691, "step": 10320 }, { "epoch": 0.24327634119918987, "grad_norm": 2.279855966567993, "learning_rate": 0.00015136828222881636, "loss": 2.1189, "step": 10330 }, { "epoch": 0.24351184588573313, "grad_norm": 2.1615757942199707, "learning_rate": 0.0001513211812915077, "loss": 2.2606, "step": 10340 }, { "epoch": 0.2437473505722764, "grad_norm": 2.1494510173797607, "learning_rate": 0.00015127408035419906, "loss": 2.0028, "step": 10350 }, { "epoch": 0.24398285525881966, "grad_norm": 1.856366753578186, "learning_rate": 0.0001512269794168904, "loss": 2.015, "step": 10360 }, { "epoch": 0.24421835994536292, "grad_norm": 2.0949535369873047, "learning_rate": 0.00015117987847958176, "loss": 1.9954, "step": 10370 }, { "epoch": 0.2444538646319062, "grad_norm": 2.0382297039031982, "learning_rate": 0.0001511327775422731, "loss": 2.0136, "step": 10380 }, { "epoch": 0.24468936931844942, "grad_norm": 3.209582567214966, "learning_rate": 0.00015108567660496443, "loss": 2.2051, "step": 10390 }, { "epoch": 0.2449248740049927, "grad_norm": 2.9670584201812744, "learning_rate": 0.0001510385756676558, "loss": 2.0755, "step": 10400 }, { "epoch": 0.24516037869153595, "grad_norm": 2.0415966510772705, "learning_rate": 0.00015099147473034713, "loss": 2.0836, "step": 10410 }, { "epoch": 0.24539588337807922, "grad_norm": 2.374818801879883, "learning_rate": 0.00015094437379303848, "loss": 2.1602, "step": 10420 }, { "epoch": 0.24563138806462248, "grad_norm": 2.551715850830078, "learning_rate": 0.00015089727285572983, "loss": 2.2528, "step": 10430 }, { "epoch": 0.24586689275116574, "grad_norm": 1.8446077108383179, "learning_rate": 0.00015085017191842118, "loss": 2.2517, "step": 10440 }, { "epoch": 0.246102397437709, "grad_norm": 2.6373045444488525, "learning_rate": 0.00015080307098111253, "loss": 2.0872, "step": 10450 }, { "epoch": 0.24633790212425227, "grad_norm": 2.356964588165283, "learning_rate": 0.00015075597004380388, "loss": 2.0906, "step": 10460 }, { "epoch": 0.24657340681079554, "grad_norm": 2.3910388946533203, "learning_rate": 0.0001507088691064952, "loss": 2.2638, "step": 10470 }, { "epoch": 0.2468089114973388, "grad_norm": 2.311750650405884, "learning_rate": 0.00015066176816918659, "loss": 1.9749, "step": 10480 }, { "epoch": 0.24704441618388207, "grad_norm": 2.235476016998291, "learning_rate": 0.0001506146672318779, "loss": 1.8174, "step": 10490 }, { "epoch": 0.24727992087042533, "grad_norm": 2.281172275543213, "learning_rate": 0.00015056756629456929, "loss": 1.8255, "step": 10500 }, { "epoch": 0.2475154255569686, "grad_norm": 2.279736042022705, "learning_rate": 0.0001505204653572606, "loss": 2.1736, "step": 10510 }, { "epoch": 0.24775093024351186, "grad_norm": 2.279602289199829, "learning_rate": 0.00015047336441995196, "loss": 2.0634, "step": 10520 }, { "epoch": 0.24798643493005512, "grad_norm": 2.272345542907715, "learning_rate": 0.0001504262634826433, "loss": 2.2241, "step": 10530 }, { "epoch": 0.24822193961659836, "grad_norm": 1.9404467344284058, "learning_rate": 0.00015037916254533466, "loss": 2.2235, "step": 10540 }, { "epoch": 0.24845744430314162, "grad_norm": 2.3488924503326416, "learning_rate": 0.000150332061608026, "loss": 2.2408, "step": 10550 }, { "epoch": 0.2486929489896849, "grad_norm": 1.9650462865829468, "learning_rate": 0.00015028496067071736, "loss": 1.9652, "step": 10560 }, { "epoch": 0.24892845367622815, "grad_norm": 2.162095785140991, "learning_rate": 0.00015023785973340868, "loss": 2.2118, "step": 10570 }, { "epoch": 0.24916395836277141, "grad_norm": 2.4109175205230713, "learning_rate": 0.00015019075879610006, "loss": 2.1926, "step": 10580 }, { "epoch": 0.24939946304931468, "grad_norm": 2.6191952228546143, "learning_rate": 0.00015014365785879138, "loss": 2.1851, "step": 10590 }, { "epoch": 0.24963496773585794, "grad_norm": 2.78340220451355, "learning_rate": 0.00015009655692148273, "loss": 2.1431, "step": 10600 }, { "epoch": 0.2498704724224012, "grad_norm": 1.8790323734283447, "learning_rate": 0.0001500494559841741, "loss": 2.088, "step": 10610 }, { "epoch": 0.25010597710894444, "grad_norm": 2.0652871131896973, "learning_rate": 0.00015000235504686543, "loss": 2.1255, "step": 10620 }, { "epoch": 0.2503414817954877, "grad_norm": 1.9595967531204224, "learning_rate": 0.00014995525410955678, "loss": 2.026, "step": 10630 }, { "epoch": 0.250576986482031, "grad_norm": 2.293111562728882, "learning_rate": 0.00014990815317224813, "loss": 2.2182, "step": 10640 }, { "epoch": 0.25081249116857424, "grad_norm": 1.931410789489746, "learning_rate": 0.00014986105223493948, "loss": 2.2731, "step": 10650 }, { "epoch": 0.2510479958551175, "grad_norm": 2.6181063652038574, "learning_rate": 0.00014981395129763084, "loss": 2.2646, "step": 10660 }, { "epoch": 0.25128350054166076, "grad_norm": 1.9321035146713257, "learning_rate": 0.00014976685036032219, "loss": 2.1447, "step": 10670 }, { "epoch": 0.25151900522820403, "grad_norm": 3.149609327316284, "learning_rate": 0.00014971974942301354, "loss": 2.1219, "step": 10680 }, { "epoch": 0.2517545099147473, "grad_norm": 1.9660497903823853, "learning_rate": 0.00014967264848570489, "loss": 2.2534, "step": 10690 }, { "epoch": 0.25199001460129056, "grad_norm": 2.55802845954895, "learning_rate": 0.0001496255475483962, "loss": 2.2268, "step": 10700 }, { "epoch": 0.2522255192878338, "grad_norm": 1.8749346733093262, "learning_rate": 0.0001495784466110876, "loss": 2.0165, "step": 10710 }, { "epoch": 0.2524610239743771, "grad_norm": 1.9412798881530762, "learning_rate": 0.0001495313456737789, "loss": 2.2144, "step": 10720 }, { "epoch": 0.25269652866092035, "grad_norm": 2.201239585876465, "learning_rate": 0.00014948424473647026, "loss": 2.1653, "step": 10730 }, { "epoch": 0.2529320333474636, "grad_norm": 1.9565176963806152, "learning_rate": 0.0001494371437991616, "loss": 2.1649, "step": 10740 }, { "epoch": 0.2531675380340069, "grad_norm": 2.220141649246216, "learning_rate": 0.00014939004286185296, "loss": 1.8715, "step": 10750 }, { "epoch": 0.25340304272055014, "grad_norm": 2.311554193496704, "learning_rate": 0.0001493429419245443, "loss": 2.2611, "step": 10760 }, { "epoch": 0.2536385474070934, "grad_norm": 2.3438363075256348, "learning_rate": 0.00014929584098723566, "loss": 2.1026, "step": 10770 }, { "epoch": 0.25387405209363667, "grad_norm": 1.9707456827163696, "learning_rate": 0.00014924874004992698, "loss": 2.0283, "step": 10780 }, { "epoch": 0.25410955678017993, "grad_norm": 3.091794729232788, "learning_rate": 0.00014920163911261836, "loss": 2.3363, "step": 10790 }, { "epoch": 0.2543450614667232, "grad_norm": 2.3337857723236084, "learning_rate": 0.00014915453817530968, "loss": 2.1223, "step": 10800 }, { "epoch": 0.25458056615326646, "grad_norm": 1.6483640670776367, "learning_rate": 0.00014910743723800103, "loss": 2.2096, "step": 10810 }, { "epoch": 0.2548160708398097, "grad_norm": 2.229172945022583, "learning_rate": 0.00014906033630069238, "loss": 2.1877, "step": 10820 }, { "epoch": 0.255051575526353, "grad_norm": 1.7530523538589478, "learning_rate": 0.00014901323536338374, "loss": 1.9589, "step": 10830 }, { "epoch": 0.25528708021289626, "grad_norm": 1.962538242340088, "learning_rate": 0.00014896613442607509, "loss": 2.2333, "step": 10840 }, { "epoch": 0.2555225848994395, "grad_norm": 3.094045877456665, "learning_rate": 0.00014891903348876644, "loss": 2.1465, "step": 10850 }, { "epoch": 0.2557580895859828, "grad_norm": 1.9345427751541138, "learning_rate": 0.00014887193255145776, "loss": 2.1342, "step": 10860 }, { "epoch": 0.25599359427252605, "grad_norm": 1.932407259941101, "learning_rate": 0.00014882483161414914, "loss": 2.0455, "step": 10870 }, { "epoch": 0.2562290989590693, "grad_norm": 2.0643908977508545, "learning_rate": 0.00014877773067684046, "loss": 2.0413, "step": 10880 }, { "epoch": 0.2564646036456125, "grad_norm": 2.0406455993652344, "learning_rate": 0.00014873062973953184, "loss": 2.2408, "step": 10890 }, { "epoch": 0.2567001083321558, "grad_norm": 2.9993858337402344, "learning_rate": 0.0001486835288022232, "loss": 2.1404, "step": 10900 }, { "epoch": 0.25693561301869905, "grad_norm": 2.122169017791748, "learning_rate": 0.0001486364278649145, "loss": 2.3794, "step": 10910 }, { "epoch": 0.2571711177052423, "grad_norm": 2.0623533725738525, "learning_rate": 0.0001485893269276059, "loss": 2.089, "step": 10920 }, { "epoch": 0.2574066223917856, "grad_norm": 2.2135274410247803, "learning_rate": 0.0001485422259902972, "loss": 2.0781, "step": 10930 }, { "epoch": 0.25764212707832884, "grad_norm": 3.7605085372924805, "learning_rate": 0.00014849512505298856, "loss": 2.3425, "step": 10940 }, { "epoch": 0.2578776317648721, "grad_norm": 2.248478412628174, "learning_rate": 0.0001484480241156799, "loss": 2.0831, "step": 10950 }, { "epoch": 0.25811313645141537, "grad_norm": 2.040879249572754, "learning_rate": 0.00014840092317837126, "loss": 2.1721, "step": 10960 }, { "epoch": 0.25834864113795863, "grad_norm": 2.4752190113067627, "learning_rate": 0.0001483538222410626, "loss": 1.8606, "step": 10970 }, { "epoch": 0.2585841458245019, "grad_norm": 2.079008102416992, "learning_rate": 0.00014830672130375396, "loss": 2.1368, "step": 10980 }, { "epoch": 0.25881965051104516, "grad_norm": 2.152157783508301, "learning_rate": 0.00014825962036644528, "loss": 2.2405, "step": 10990 }, { "epoch": 0.2590551551975884, "grad_norm": 1.8461806774139404, "learning_rate": 0.00014821251942913666, "loss": 2.2071, "step": 11000 }, { "epoch": 0.2592906598841317, "grad_norm": 2.190112590789795, "learning_rate": 0.00014816541849182799, "loss": 2.1474, "step": 11010 }, { "epoch": 0.25952616457067496, "grad_norm": 1.828783631324768, "learning_rate": 0.00014811831755451934, "loss": 2.0584, "step": 11020 }, { "epoch": 0.2597616692572182, "grad_norm": 2.171999454498291, "learning_rate": 0.00014807121661721069, "loss": 2.2661, "step": 11030 }, { "epoch": 0.2599971739437615, "grad_norm": 2.530381679534912, "learning_rate": 0.00014802411567990204, "loss": 2.3015, "step": 11040 }, { "epoch": 0.26023267863030475, "grad_norm": 2.840162515640259, "learning_rate": 0.00014797701474259339, "loss": 2.0476, "step": 11050 }, { "epoch": 0.260468183316848, "grad_norm": 2.931011438369751, "learning_rate": 0.00014792991380528474, "loss": 2.1905, "step": 11060 }, { "epoch": 0.2607036880033913, "grad_norm": 2.2790048122406006, "learning_rate": 0.00014788281286797606, "loss": 1.9775, "step": 11070 }, { "epoch": 0.26093919268993454, "grad_norm": 2.563983678817749, "learning_rate": 0.00014783571193066744, "loss": 2.0517, "step": 11080 }, { "epoch": 0.2611746973764778, "grad_norm": 1.9197920560836792, "learning_rate": 0.00014779332108708963, "loss": 2.0083, "step": 11090 }, { "epoch": 0.26141020206302107, "grad_norm": 2.238065242767334, "learning_rate": 0.00014774622014978098, "loss": 2.1118, "step": 11100 }, { "epoch": 0.26164570674956433, "grad_norm": 2.00567626953125, "learning_rate": 0.00014769911921247233, "loss": 2.0841, "step": 11110 }, { "epoch": 0.2618812114361076, "grad_norm": 1.9711300134658813, "learning_rate": 0.00014765201827516368, "loss": 2.1229, "step": 11120 }, { "epoch": 0.26211671612265086, "grad_norm": 2.36851167678833, "learning_rate": 0.00014760491733785503, "loss": 1.9788, "step": 11130 }, { "epoch": 0.2623522208091941, "grad_norm": 2.2312638759613037, "learning_rate": 0.00014755781640054638, "loss": 2.1221, "step": 11140 }, { "epoch": 0.2625877254957374, "grad_norm": 1.9578697681427002, "learning_rate": 0.00014751071546323773, "loss": 1.9844, "step": 11150 }, { "epoch": 0.26282323018228065, "grad_norm": 2.0937769412994385, "learning_rate": 0.00014746361452592908, "loss": 2.1305, "step": 11160 }, { "epoch": 0.26305873486882386, "grad_norm": 1.9958505630493164, "learning_rate": 0.00014741651358862043, "loss": 2.2201, "step": 11170 }, { "epoch": 0.2632942395553671, "grad_norm": 2.8454930782318115, "learning_rate": 0.00014736941265131178, "loss": 2.1756, "step": 11180 }, { "epoch": 0.2635297442419104, "grad_norm": 1.7872885465621948, "learning_rate": 0.00014732231171400313, "loss": 1.788, "step": 11190 }, { "epoch": 0.26376524892845365, "grad_norm": 2.458237886428833, "learning_rate": 0.00014727521077669445, "loss": 2.1815, "step": 11200 }, { "epoch": 0.2640007536149969, "grad_norm": 2.0657920837402344, "learning_rate": 0.00014722810983938583, "loss": 2.0694, "step": 11210 }, { "epoch": 0.2642362583015402, "grad_norm": 2.7617380619049072, "learning_rate": 0.00014718100890207715, "loss": 2.196, "step": 11220 }, { "epoch": 0.26447176298808345, "grad_norm": 2.043877601623535, "learning_rate": 0.0001471339079647685, "loss": 2.2654, "step": 11230 }, { "epoch": 0.2647072676746267, "grad_norm": 2.868586540222168, "learning_rate": 0.00014708680702745985, "loss": 2.1117, "step": 11240 }, { "epoch": 0.26494277236117, "grad_norm": 2.223196268081665, "learning_rate": 0.0001470397060901512, "loss": 2.3975, "step": 11250 }, { "epoch": 0.26517827704771324, "grad_norm": 2.0349698066711426, "learning_rate": 0.00014699260515284255, "loss": 2.047, "step": 11260 }, { "epoch": 0.2654137817342565, "grad_norm": 1.827580451965332, "learning_rate": 0.0001469455042155339, "loss": 2.0944, "step": 11270 }, { "epoch": 0.26564928642079977, "grad_norm": 2.8184313774108887, "learning_rate": 0.00014689840327822523, "loss": 2.0817, "step": 11280 }, { "epoch": 0.26588479110734303, "grad_norm": 2.1076481342315674, "learning_rate": 0.0001468513023409166, "loss": 2.153, "step": 11290 }, { "epoch": 0.2661202957938863, "grad_norm": 1.8298046588897705, "learning_rate": 0.00014680420140360793, "loss": 2.2125, "step": 11300 }, { "epoch": 0.26635580048042956, "grad_norm": 1.8453890085220337, "learning_rate": 0.00014675710046629928, "loss": 2.1606, "step": 11310 }, { "epoch": 0.2665913051669728, "grad_norm": 2.0864975452423096, "learning_rate": 0.00014670999952899063, "loss": 2.0076, "step": 11320 }, { "epoch": 0.2668268098535161, "grad_norm": 2.20062255859375, "learning_rate": 0.00014666289859168198, "loss": 1.9654, "step": 11330 }, { "epoch": 0.26706231454005935, "grad_norm": 1.914099097251892, "learning_rate": 0.00014661579765437333, "loss": 2.0639, "step": 11340 }, { "epoch": 0.2672978192266026, "grad_norm": 1.8117254972457886, "learning_rate": 0.00014656869671706468, "loss": 2.0437, "step": 11350 }, { "epoch": 0.2675333239131459, "grad_norm": 2.076619863510132, "learning_rate": 0.000146521595779756, "loss": 2.0552, "step": 11360 }, { "epoch": 0.26776882859968915, "grad_norm": 1.752306580543518, "learning_rate": 0.00014647449484244738, "loss": 1.9671, "step": 11370 }, { "epoch": 0.2680043332862324, "grad_norm": 2.3570029735565186, "learning_rate": 0.0001464273939051387, "loss": 1.8996, "step": 11380 }, { "epoch": 0.2682398379727757, "grad_norm": 1.9335840940475464, "learning_rate": 0.00014638029296783008, "loss": 2.2257, "step": 11390 }, { "epoch": 0.26847534265931894, "grad_norm": 2.8043274879455566, "learning_rate": 0.0001463331920305214, "loss": 2.3472, "step": 11400 }, { "epoch": 0.2687108473458622, "grad_norm": 1.8869709968566895, "learning_rate": 0.00014628609109321275, "loss": 2.3359, "step": 11410 }, { "epoch": 0.26894635203240547, "grad_norm": 3.413790225982666, "learning_rate": 0.0001462389901559041, "loss": 2.2504, "step": 11420 }, { "epoch": 0.26918185671894873, "grad_norm": 2.251716136932373, "learning_rate": 0.00014619188921859545, "loss": 2.1951, "step": 11430 }, { "epoch": 0.269417361405492, "grad_norm": 2.0624914169311523, "learning_rate": 0.0001461447882812868, "loss": 2.0907, "step": 11440 }, { "epoch": 0.2696528660920352, "grad_norm": 2.2086572647094727, "learning_rate": 0.00014609768734397816, "loss": 2.0458, "step": 11450 }, { "epoch": 0.26988837077857847, "grad_norm": 2.272899627685547, "learning_rate": 0.0001460505864066695, "loss": 2.0175, "step": 11460 }, { "epoch": 0.27012387546512173, "grad_norm": 1.962472677230835, "learning_rate": 0.00014600348546936086, "loss": 2.2018, "step": 11470 }, { "epoch": 0.270359380151665, "grad_norm": 1.8349707126617432, "learning_rate": 0.0001459563845320522, "loss": 2.1957, "step": 11480 }, { "epoch": 0.27059488483820826, "grad_norm": 2.317610740661621, "learning_rate": 0.00014590928359474353, "loss": 2.1421, "step": 11490 }, { "epoch": 0.2708303895247515, "grad_norm": 3.183609962463379, "learning_rate": 0.0001458621826574349, "loss": 2.0522, "step": 11500 }, { "epoch": 0.2710658942112948, "grad_norm": 2.0786678791046143, "learning_rate": 0.00014581508172012623, "loss": 2.0592, "step": 11510 }, { "epoch": 0.27130139889783805, "grad_norm": 1.7079672813415527, "learning_rate": 0.00014576798078281758, "loss": 1.9237, "step": 11520 }, { "epoch": 0.2715369035843813, "grad_norm": 1.9276167154312134, "learning_rate": 0.00014572087984550893, "loss": 2.4222, "step": 11530 }, { "epoch": 0.2717724082709246, "grad_norm": 2.461214303970337, "learning_rate": 0.00014567377890820028, "loss": 2.2114, "step": 11540 }, { "epoch": 0.27200791295746785, "grad_norm": 1.96316659450531, "learning_rate": 0.00014562667797089163, "loss": 2.2535, "step": 11550 }, { "epoch": 0.2722434176440111, "grad_norm": 2.1543939113616943, "learning_rate": 0.00014557957703358298, "loss": 2.2106, "step": 11560 }, { "epoch": 0.2724789223305544, "grad_norm": 1.8701822757720947, "learning_rate": 0.0001455324760962743, "loss": 1.9426, "step": 11570 }, { "epoch": 0.27271442701709764, "grad_norm": 2.2132885456085205, "learning_rate": 0.00014548537515896568, "loss": 2.1604, "step": 11580 }, { "epoch": 0.2729499317036409, "grad_norm": 2.094003200531006, "learning_rate": 0.000145438274221657, "loss": 2.1061, "step": 11590 }, { "epoch": 0.27318543639018417, "grad_norm": 2.395878314971924, "learning_rate": 0.00014539117328434838, "loss": 2.0564, "step": 11600 }, { "epoch": 0.27342094107672743, "grad_norm": 2.653024911880493, "learning_rate": 0.0001453440723470397, "loss": 2.3041, "step": 11610 }, { "epoch": 0.2736564457632707, "grad_norm": 2.932143449783325, "learning_rate": 0.00014529697140973106, "loss": 1.9723, "step": 11620 }, { "epoch": 0.27389195044981396, "grad_norm": 2.500082492828369, "learning_rate": 0.0001452498704724224, "loss": 1.9417, "step": 11630 }, { "epoch": 0.2741274551363572, "grad_norm": 3.0111424922943115, "learning_rate": 0.00014520276953511376, "loss": 2.1019, "step": 11640 }, { "epoch": 0.2743629598229005, "grad_norm": 2.4497432708740234, "learning_rate": 0.0001451556685978051, "loss": 2.0508, "step": 11650 }, { "epoch": 0.27459846450944375, "grad_norm": 2.2490170001983643, "learning_rate": 0.00014510856766049646, "loss": 2.1141, "step": 11660 }, { "epoch": 0.274833969195987, "grad_norm": 2.196950674057007, "learning_rate": 0.00014506146672318778, "loss": 2.0565, "step": 11670 }, { "epoch": 0.2750694738825303, "grad_norm": 3.2146339416503906, "learning_rate": 0.00014501436578587916, "loss": 2.0995, "step": 11680 }, { "epoch": 0.27530497856907354, "grad_norm": 2.50426983833313, "learning_rate": 0.00014496726484857048, "loss": 2.166, "step": 11690 }, { "epoch": 0.2755404832556168, "grad_norm": 2.0356388092041016, "learning_rate": 0.00014492016391126183, "loss": 2.1584, "step": 11700 }, { "epoch": 0.2757759879421601, "grad_norm": 2.600349187850952, "learning_rate": 0.0001448730629739532, "loss": 2.1718, "step": 11710 }, { "epoch": 0.27601149262870334, "grad_norm": 3.562185764312744, "learning_rate": 0.00014482596203664453, "loss": 2.2295, "step": 11720 }, { "epoch": 0.2762469973152466, "grad_norm": 1.928037405014038, "learning_rate": 0.00014477886109933588, "loss": 1.9995, "step": 11730 }, { "epoch": 0.2764825020017898, "grad_norm": 1.4152915477752686, "learning_rate": 0.00014473176016202723, "loss": 2.1623, "step": 11740 }, { "epoch": 0.2767180066883331, "grad_norm": 1.9397757053375244, "learning_rate": 0.00014468465922471858, "loss": 2.0217, "step": 11750 }, { "epoch": 0.27695351137487634, "grad_norm": 1.7832798957824707, "learning_rate": 0.00014463755828740993, "loss": 2.0114, "step": 11760 }, { "epoch": 0.2771890160614196, "grad_norm": 2.282412528991699, "learning_rate": 0.00014459045735010128, "loss": 2.1579, "step": 11770 }, { "epoch": 0.27742452074796287, "grad_norm": 2.4182028770446777, "learning_rate": 0.00014454335641279263, "loss": 2.0862, "step": 11780 }, { "epoch": 0.27766002543450613, "grad_norm": 2.954577922821045, "learning_rate": 0.00014449625547548398, "loss": 2.184, "step": 11790 }, { "epoch": 0.2778955301210494, "grad_norm": 1.8422027826309204, "learning_rate": 0.0001444491545381753, "loss": 2.1267, "step": 11800 }, { "epoch": 0.27813103480759266, "grad_norm": 1.9264764785766602, "learning_rate": 0.00014440205360086668, "loss": 2.2146, "step": 11810 }, { "epoch": 0.2783665394941359, "grad_norm": 1.719955563545227, "learning_rate": 0.000144354952663558, "loss": 1.9979, "step": 11820 }, { "epoch": 0.2786020441806792, "grad_norm": 2.0132710933685303, "learning_rate": 0.00014430785172624936, "loss": 2.1951, "step": 11830 }, { "epoch": 0.27883754886722245, "grad_norm": 2.1974024772644043, "learning_rate": 0.0001442607507889407, "loss": 2.3713, "step": 11840 }, { "epoch": 0.2790730535537657, "grad_norm": 2.18841814994812, "learning_rate": 0.00014421364985163206, "loss": 2.1615, "step": 11850 }, { "epoch": 0.279308558240309, "grad_norm": 2.1697840690612793, "learning_rate": 0.0001441665489143234, "loss": 2.1027, "step": 11860 }, { "epoch": 0.27954406292685224, "grad_norm": 2.2310283184051514, "learning_rate": 0.00014411944797701476, "loss": 2.3225, "step": 11870 }, { "epoch": 0.2797795676133955, "grad_norm": 2.3003742694854736, "learning_rate": 0.00014407234703970608, "loss": 2.1095, "step": 11880 }, { "epoch": 0.28001507229993877, "grad_norm": 1.808599591255188, "learning_rate": 0.00014402524610239746, "loss": 2.0941, "step": 11890 }, { "epoch": 0.28025057698648204, "grad_norm": 2.0506701469421387, "learning_rate": 0.00014397814516508878, "loss": 2.3607, "step": 11900 }, { "epoch": 0.2804860816730253, "grad_norm": 1.990747332572937, "learning_rate": 0.00014393104422778013, "loss": 2.0992, "step": 11910 }, { "epoch": 0.28072158635956856, "grad_norm": 2.9683446884155273, "learning_rate": 0.00014388394329047148, "loss": 1.9899, "step": 11920 }, { "epoch": 0.28095709104611183, "grad_norm": 2.2459323406219482, "learning_rate": 0.00014383684235316283, "loss": 1.9907, "step": 11930 }, { "epoch": 0.2811925957326551, "grad_norm": 2.313927173614502, "learning_rate": 0.00014378974141585418, "loss": 2.1981, "step": 11940 }, { "epoch": 0.28142810041919836, "grad_norm": 2.565319299697876, "learning_rate": 0.00014374264047854553, "loss": 2.2282, "step": 11950 }, { "epoch": 0.2816636051057416, "grad_norm": 2.329613208770752, "learning_rate": 0.00014369553954123686, "loss": 2.0866, "step": 11960 }, { "epoch": 0.2818991097922849, "grad_norm": 1.7780147790908813, "learning_rate": 0.00014364843860392823, "loss": 2.1109, "step": 11970 }, { "epoch": 0.28213461447882815, "grad_norm": 2.3526809215545654, "learning_rate": 0.00014360133766661956, "loss": 2.0509, "step": 11980 }, { "epoch": 0.2823701191653714, "grad_norm": 2.9026174545288086, "learning_rate": 0.00014355423672931093, "loss": 2.1536, "step": 11990 }, { "epoch": 0.2826056238519147, "grad_norm": 2.3151416778564453, "learning_rate": 0.00014350713579200228, "loss": 2.2772, "step": 12000 }, { "epoch": 0.28284112853845794, "grad_norm": 2.0522875785827637, "learning_rate": 0.0001434600348546936, "loss": 1.9419, "step": 12010 }, { "epoch": 0.28307663322500115, "grad_norm": 1.7886687517166138, "learning_rate": 0.00014341293391738498, "loss": 2.1908, "step": 12020 }, { "epoch": 0.2833121379115444, "grad_norm": 2.5073275566101074, "learning_rate": 0.0001433658329800763, "loss": 2.2582, "step": 12030 }, { "epoch": 0.2835476425980877, "grad_norm": 1.9218273162841797, "learning_rate": 0.00014331873204276766, "loss": 2.1327, "step": 12040 }, { "epoch": 0.28378314728463094, "grad_norm": 1.9071509838104248, "learning_rate": 0.000143271631105459, "loss": 2.1697, "step": 12050 }, { "epoch": 0.2840186519711742, "grad_norm": 1.9115699529647827, "learning_rate": 0.00014322453016815036, "loss": 2.1689, "step": 12060 }, { "epoch": 0.28425415665771747, "grad_norm": 3.1587018966674805, "learning_rate": 0.0001431774292308417, "loss": 2.2849, "step": 12070 }, { "epoch": 0.28448966134426074, "grad_norm": 2.1622958183288574, "learning_rate": 0.00014313032829353306, "loss": 1.9378, "step": 12080 }, { "epoch": 0.284725166030804, "grad_norm": 2.525998830795288, "learning_rate": 0.00014308322735622438, "loss": 2.277, "step": 12090 }, { "epoch": 0.28496067071734726, "grad_norm": 1.5249601602554321, "learning_rate": 0.00014303612641891576, "loss": 1.9813, "step": 12100 }, { "epoch": 0.28519617540389053, "grad_norm": 2.085453748703003, "learning_rate": 0.00014298902548160708, "loss": 2.3223, "step": 12110 }, { "epoch": 0.2854316800904338, "grad_norm": 1.8140358924865723, "learning_rate": 0.00014294192454429843, "loss": 2.1393, "step": 12120 }, { "epoch": 0.28566718477697706, "grad_norm": 2.5443224906921387, "learning_rate": 0.00014289482360698978, "loss": 2.2873, "step": 12130 }, { "epoch": 0.2859026894635203, "grad_norm": 2.006328821182251, "learning_rate": 0.00014284772266968113, "loss": 2.1113, "step": 12140 }, { "epoch": 0.2861381941500636, "grad_norm": 1.8038783073425293, "learning_rate": 0.00014280062173237248, "loss": 2.3057, "step": 12150 }, { "epoch": 0.28637369883660685, "grad_norm": 2.357002019882202, "learning_rate": 0.00014275352079506383, "loss": 2.0994, "step": 12160 }, { "epoch": 0.2866092035231501, "grad_norm": 2.0927555561065674, "learning_rate": 0.00014270641985775516, "loss": 1.9388, "step": 12170 }, { "epoch": 0.2868447082096934, "grad_norm": 3.126340866088867, "learning_rate": 0.00014265931892044653, "loss": 2.2587, "step": 12180 }, { "epoch": 0.28708021289623664, "grad_norm": 2.3898532390594482, "learning_rate": 0.00014261221798313786, "loss": 2.0545, "step": 12190 }, { "epoch": 0.2873157175827799, "grad_norm": 2.6160011291503906, "learning_rate": 0.00014256511704582923, "loss": 1.9734, "step": 12200 }, { "epoch": 0.28755122226932317, "grad_norm": 2.259218692779541, "learning_rate": 0.00014251801610852056, "loss": 2.0344, "step": 12210 }, { "epoch": 0.28778672695586643, "grad_norm": 2.0822460651397705, "learning_rate": 0.0001424709151712119, "loss": 2.2014, "step": 12220 }, { "epoch": 0.2880222316424097, "grad_norm": 2.0669522285461426, "learning_rate": 0.00014242381423390326, "loss": 2.0617, "step": 12230 }, { "epoch": 0.28825773632895296, "grad_norm": 1.9552747011184692, "learning_rate": 0.0001423767132965946, "loss": 2.1155, "step": 12240 }, { "epoch": 0.2884932410154962, "grad_norm": 2.3942112922668457, "learning_rate": 0.00014232961235928596, "loss": 2.0574, "step": 12250 }, { "epoch": 0.2887287457020395, "grad_norm": 2.205756187438965, "learning_rate": 0.0001422825114219773, "loss": 2.1403, "step": 12260 }, { "epoch": 0.28896425038858276, "grad_norm": 2.649390697479248, "learning_rate": 0.00014223541048466863, "loss": 2.1289, "step": 12270 }, { "epoch": 0.289199755075126, "grad_norm": 2.3708388805389404, "learning_rate": 0.00014218830954736, "loss": 2.06, "step": 12280 }, { "epoch": 0.2894352597616693, "grad_norm": 2.051314115524292, "learning_rate": 0.00014214120861005136, "loss": 2.2359, "step": 12290 }, { "epoch": 0.2896707644482125, "grad_norm": 1.698848009109497, "learning_rate": 0.00014209410767274268, "loss": 2.0647, "step": 12300 }, { "epoch": 0.28990626913475576, "grad_norm": 1.9865273237228394, "learning_rate": 0.00014204700673543406, "loss": 1.9384, "step": 12310 }, { "epoch": 0.290141773821299, "grad_norm": 2.2270126342773438, "learning_rate": 0.00014199990579812538, "loss": 2.1169, "step": 12320 }, { "epoch": 0.2903772785078423, "grad_norm": 1.6230186223983765, "learning_rate": 0.00014195280486081673, "loss": 1.9432, "step": 12330 }, { "epoch": 0.29061278319438555, "grad_norm": 1.7483242750167847, "learning_rate": 0.00014190570392350808, "loss": 2.1609, "step": 12340 }, { "epoch": 0.2908482878809288, "grad_norm": 1.7598001956939697, "learning_rate": 0.00014185860298619943, "loss": 2.396, "step": 12350 }, { "epoch": 0.2910837925674721, "grad_norm": 2.14711856842041, "learning_rate": 0.00014181150204889078, "loss": 2.092, "step": 12360 }, { "epoch": 0.29131929725401534, "grad_norm": 1.9929486513137817, "learning_rate": 0.00014176440111158213, "loss": 2.0726, "step": 12370 }, { "epoch": 0.2915548019405586, "grad_norm": 1.9438811540603638, "learning_rate": 0.00014171730017427348, "loss": 1.9861, "step": 12380 }, { "epoch": 0.29179030662710187, "grad_norm": 1.812244176864624, "learning_rate": 0.00014167019923696483, "loss": 2.2, "step": 12390 }, { "epoch": 0.29202581131364513, "grad_norm": 1.8583556413650513, "learning_rate": 0.00014162309829965616, "loss": 2.0777, "step": 12400 }, { "epoch": 0.2922613160001884, "grad_norm": 2.8509726524353027, "learning_rate": 0.00014157599736234753, "loss": 2.112, "step": 12410 }, { "epoch": 0.29249682068673166, "grad_norm": 2.370173931121826, "learning_rate": 0.00014152889642503886, "loss": 2.1104, "step": 12420 }, { "epoch": 0.2927323253732749, "grad_norm": 2.105886459350586, "learning_rate": 0.0001414817954877302, "loss": 2.1593, "step": 12430 }, { "epoch": 0.2929678300598182, "grad_norm": 2.6540844440460205, "learning_rate": 0.00014143469455042156, "loss": 2.034, "step": 12440 }, { "epoch": 0.29320333474636145, "grad_norm": 1.7813196182250977, "learning_rate": 0.0001413875936131129, "loss": 1.899, "step": 12450 }, { "epoch": 0.2934388394329047, "grad_norm": 2.747343063354492, "learning_rate": 0.00014134049267580426, "loss": 2.1251, "step": 12460 }, { "epoch": 0.293674344119448, "grad_norm": 2.120857000350952, "learning_rate": 0.0001412933917384956, "loss": 1.9557, "step": 12470 }, { "epoch": 0.29390984880599125, "grad_norm": 2.8175089359283447, "learning_rate": 0.00014124629080118693, "loss": 2.0522, "step": 12480 }, { "epoch": 0.2941453534925345, "grad_norm": 3.126023292541504, "learning_rate": 0.0001411991898638783, "loss": 2.06, "step": 12490 }, { "epoch": 0.2943808581790778, "grad_norm": 2.139693260192871, "learning_rate": 0.00014115208892656963, "loss": 2.2009, "step": 12500 }, { "epoch": 0.29461636286562104, "grad_norm": 2.007183790206909, "learning_rate": 0.00014110498798926098, "loss": 2.1605, "step": 12510 }, { "epoch": 0.2948518675521643, "grad_norm": 1.9379314184188843, "learning_rate": 0.00014105788705195233, "loss": 2.1381, "step": 12520 }, { "epoch": 0.29508737223870757, "grad_norm": 2.096290349960327, "learning_rate": 0.00014101078611464368, "loss": 2.2027, "step": 12530 }, { "epoch": 0.29532287692525083, "grad_norm": 2.452683925628662, "learning_rate": 0.00014096368517733503, "loss": 2.1766, "step": 12540 }, { "epoch": 0.2955583816117941, "grad_norm": 2.5896694660186768, "learning_rate": 0.00014091658424002638, "loss": 2.1109, "step": 12550 }, { "epoch": 0.29579388629833736, "grad_norm": 2.125363349914551, "learning_rate": 0.00014086948330271773, "loss": 2.3477, "step": 12560 }, { "epoch": 0.2960293909848806, "grad_norm": 1.9147471189498901, "learning_rate": 0.00014082238236540908, "loss": 2.4074, "step": 12570 }, { "epoch": 0.2962648956714239, "grad_norm": 2.466128349304199, "learning_rate": 0.00014077528142810043, "loss": 2.161, "step": 12580 }, { "epoch": 0.2965004003579671, "grad_norm": 2.0209436416625977, "learning_rate": 0.00014072818049079179, "loss": 2.0503, "step": 12590 }, { "epoch": 0.29673590504451036, "grad_norm": 2.1198041439056396, "learning_rate": 0.00014068107955348314, "loss": 2.166, "step": 12600 }, { "epoch": 0.2969714097310536, "grad_norm": 1.7354202270507812, "learning_rate": 0.00014063397861617446, "loss": 2.0945, "step": 12610 }, { "epoch": 0.2972069144175969, "grad_norm": 1.8669902086257935, "learning_rate": 0.00014058687767886584, "loss": 1.9682, "step": 12620 }, { "epoch": 0.29744241910414015, "grad_norm": 2.1740376949310303, "learning_rate": 0.00014053977674155716, "loss": 2.1025, "step": 12630 }, { "epoch": 0.2976779237906834, "grad_norm": 2.275982141494751, "learning_rate": 0.0001404926758042485, "loss": 2.2597, "step": 12640 }, { "epoch": 0.2979134284772267, "grad_norm": 2.172459125518799, "learning_rate": 0.00014044557486693986, "loss": 2.1002, "step": 12650 }, { "epoch": 0.29814893316376995, "grad_norm": 2.4488515853881836, "learning_rate": 0.0001403984739296312, "loss": 1.9405, "step": 12660 }, { "epoch": 0.2983844378503132, "grad_norm": 2.1682851314544678, "learning_rate": 0.00014035137299232256, "loss": 2.1472, "step": 12670 }, { "epoch": 0.2986199425368565, "grad_norm": 1.9250439405441284, "learning_rate": 0.0001403042720550139, "loss": 2.1314, "step": 12680 }, { "epoch": 0.29885544722339974, "grad_norm": 3.761707305908203, "learning_rate": 0.00014025717111770523, "loss": 2.1523, "step": 12690 }, { "epoch": 0.299090951909943, "grad_norm": 2.87754487991333, "learning_rate": 0.0001402100701803966, "loss": 1.9867, "step": 12700 }, { "epoch": 0.29932645659648627, "grad_norm": 2.3228402137756348, "learning_rate": 0.00014016296924308793, "loss": 2.015, "step": 12710 }, { "epoch": 0.29956196128302953, "grad_norm": 1.869566798210144, "learning_rate": 0.00014011586830577928, "loss": 2.2132, "step": 12720 }, { "epoch": 0.2997974659695728, "grad_norm": 2.2412784099578857, "learning_rate": 0.00014006876736847063, "loss": 2.3368, "step": 12730 }, { "epoch": 0.30003297065611606, "grad_norm": 1.911073923110962, "learning_rate": 0.00014002166643116198, "loss": 2.0563, "step": 12740 }, { "epoch": 0.3002684753426593, "grad_norm": 2.371375799179077, "learning_rate": 0.00013997456549385333, "loss": 2.1771, "step": 12750 }, { "epoch": 0.3005039800292026, "grad_norm": 2.312708854675293, "learning_rate": 0.00013992746455654469, "loss": 2.158, "step": 12760 }, { "epoch": 0.30073948471574585, "grad_norm": 2.721620559692383, "learning_rate": 0.000139880363619236, "loss": 2.2242, "step": 12770 }, { "epoch": 0.3009749894022891, "grad_norm": 2.6075215339660645, "learning_rate": 0.00013983326268192739, "loss": 2.029, "step": 12780 }, { "epoch": 0.3012104940888324, "grad_norm": 2.001995086669922, "learning_rate": 0.0001397861617446187, "loss": 2.1364, "step": 12790 }, { "epoch": 0.30144599877537565, "grad_norm": 1.792315125465393, "learning_rate": 0.00013973906080731009, "loss": 2.0791, "step": 12800 }, { "epoch": 0.3016815034619189, "grad_norm": 2.6156537532806396, "learning_rate": 0.0001396919598700014, "loss": 2.131, "step": 12810 }, { "epoch": 0.3019170081484622, "grad_norm": 2.277663230895996, "learning_rate": 0.00013964485893269276, "loss": 2.03, "step": 12820 }, { "epoch": 0.30215251283500544, "grad_norm": 2.09621000289917, "learning_rate": 0.0001395977579953841, "loss": 2.2744, "step": 12830 }, { "epoch": 0.3023880175215487, "grad_norm": 2.4095897674560547, "learning_rate": 0.00013955065705807546, "loss": 1.8946, "step": 12840 }, { "epoch": 0.30262352220809197, "grad_norm": 2.2622735500335693, "learning_rate": 0.0001395035561207668, "loss": 2.1998, "step": 12850 }, { "epoch": 0.30285902689463523, "grad_norm": 2.8324317932128906, "learning_rate": 0.00013945645518345816, "loss": 2.4248, "step": 12860 }, { "epoch": 0.30309453158117844, "grad_norm": 2.2162587642669678, "learning_rate": 0.0001394093542461495, "loss": 2.0623, "step": 12870 }, { "epoch": 0.3033300362677217, "grad_norm": 2.325887441635132, "learning_rate": 0.00013936225330884086, "loss": 2.2047, "step": 12880 }, { "epoch": 0.30356554095426497, "grad_norm": 2.152468681335449, "learning_rate": 0.0001393151523715322, "loss": 2.1984, "step": 12890 }, { "epoch": 0.30380104564080823, "grad_norm": 2.0017037391662598, "learning_rate": 0.00013926805143422353, "loss": 2.1574, "step": 12900 }, { "epoch": 0.3040365503273515, "grad_norm": 2.003835439682007, "learning_rate": 0.0001392209504969149, "loss": 2.2094, "step": 12910 }, { "epoch": 0.30427205501389476, "grad_norm": 2.4444828033447266, "learning_rate": 0.00013917384955960623, "loss": 2.0709, "step": 12920 }, { "epoch": 0.304507559700438, "grad_norm": 2.4966955184936523, "learning_rate": 0.00013912674862229758, "loss": 2.0336, "step": 12930 }, { "epoch": 0.3047430643869813, "grad_norm": 2.560464859008789, "learning_rate": 0.00013907964768498894, "loss": 2.0573, "step": 12940 }, { "epoch": 0.30497856907352455, "grad_norm": 2.135472059249878, "learning_rate": 0.00013903254674768029, "loss": 2.0721, "step": 12950 }, { "epoch": 0.3052140737600678, "grad_norm": 1.9246833324432373, "learning_rate": 0.00013898544581037164, "loss": 2.1015, "step": 12960 }, { "epoch": 0.3054495784466111, "grad_norm": 2.2237212657928467, "learning_rate": 0.00013893834487306299, "loss": 1.9604, "step": 12970 }, { "epoch": 0.30568508313315434, "grad_norm": 2.493617057800293, "learning_rate": 0.00013889124393575434, "loss": 2.278, "step": 12980 }, { "epoch": 0.3059205878196976, "grad_norm": 3.1759326457977295, "learning_rate": 0.00013884414299844569, "loss": 2.1974, "step": 12990 }, { "epoch": 0.3061560925062409, "grad_norm": 2.2904551029205322, "learning_rate": 0.000138797042061137, "loss": 2.0009, "step": 13000 }, { "epoch": 0.30639159719278414, "grad_norm": 2.1130900382995605, "learning_rate": 0.0001387499411238284, "loss": 2.1561, "step": 13010 }, { "epoch": 0.3066271018793274, "grad_norm": 2.156583786010742, "learning_rate": 0.0001387028401865197, "loss": 2.1903, "step": 13020 }, { "epoch": 0.30686260656587067, "grad_norm": 2.4466586112976074, "learning_rate": 0.00013865573924921106, "loss": 2.1301, "step": 13030 }, { "epoch": 0.30709811125241393, "grad_norm": 2.5151896476745605, "learning_rate": 0.0001386086383119024, "loss": 2.2717, "step": 13040 }, { "epoch": 0.3073336159389572, "grad_norm": 2.063638210296631, "learning_rate": 0.00013856153737459376, "loss": 2.0034, "step": 13050 }, { "epoch": 0.30756912062550046, "grad_norm": 2.0933945178985596, "learning_rate": 0.0001385144364372851, "loss": 2.024, "step": 13060 }, { "epoch": 0.3078046253120437, "grad_norm": 2.3742902278900146, "learning_rate": 0.00013846733549997646, "loss": 1.9053, "step": 13070 }, { "epoch": 0.308040129998587, "grad_norm": 2.2515957355499268, "learning_rate": 0.00013842023456266778, "loss": 2.139, "step": 13080 }, { "epoch": 0.30827563468513025, "grad_norm": 2.533169984817505, "learning_rate": 0.00013837313362535916, "loss": 2.2177, "step": 13090 }, { "epoch": 0.3085111393716735, "grad_norm": 2.0766286849975586, "learning_rate": 0.00013832603268805048, "loss": 2.0354, "step": 13100 }, { "epoch": 0.3087466440582168, "grad_norm": 2.1932218074798584, "learning_rate": 0.00013827893175074184, "loss": 2.0149, "step": 13110 }, { "epoch": 0.30898214874476004, "grad_norm": 2.7338597774505615, "learning_rate": 0.00013823183081343319, "loss": 2.0193, "step": 13120 }, { "epoch": 0.3092176534313033, "grad_norm": 2.09232497215271, "learning_rate": 0.00013818472987612454, "loss": 2.1508, "step": 13130 }, { "epoch": 0.30945315811784657, "grad_norm": 2.1449930667877197, "learning_rate": 0.0001381376289388159, "loss": 1.9638, "step": 13140 }, { "epoch": 0.3096886628043898, "grad_norm": 2.363706350326538, "learning_rate": 0.00013809052800150724, "loss": 2.3092, "step": 13150 }, { "epoch": 0.30992416749093304, "grad_norm": 2.566270589828491, "learning_rate": 0.00013804342706419859, "loss": 2.0075, "step": 13160 }, { "epoch": 0.3101596721774763, "grad_norm": 2.4858806133270264, "learning_rate": 0.00013799632612688994, "loss": 2.1862, "step": 13170 }, { "epoch": 0.3103951768640196, "grad_norm": 2.256286144256592, "learning_rate": 0.0001379492251895813, "loss": 1.9553, "step": 13180 }, { "epoch": 0.31063068155056284, "grad_norm": 2.1771228313446045, "learning_rate": 0.00013790212425227264, "loss": 2.0341, "step": 13190 }, { "epoch": 0.3108661862371061, "grad_norm": 2.82309889793396, "learning_rate": 0.000137855023314964, "loss": 2.3205, "step": 13200 }, { "epoch": 0.31110169092364937, "grad_norm": 2.4577043056488037, "learning_rate": 0.0001378079223776553, "loss": 2.0357, "step": 13210 }, { "epoch": 0.31133719561019263, "grad_norm": 2.040903091430664, "learning_rate": 0.0001377608214403467, "loss": 2.0322, "step": 13220 }, { "epoch": 0.3115727002967359, "grad_norm": 2.3443174362182617, "learning_rate": 0.000137713720503038, "loss": 2.0852, "step": 13230 }, { "epoch": 0.31180820498327916, "grad_norm": 2.266204357147217, "learning_rate": 0.00013766661956572936, "loss": 2.3327, "step": 13240 }, { "epoch": 0.3120437096698224, "grad_norm": 2.3548195362091064, "learning_rate": 0.0001376195186284207, "loss": 2.1512, "step": 13250 }, { "epoch": 0.3122792143563657, "grad_norm": 1.9592384099960327, "learning_rate": 0.00013757241769111206, "loss": 2.0161, "step": 13260 }, { "epoch": 0.31251471904290895, "grad_norm": 2.306349754333496, "learning_rate": 0.0001375253167538034, "loss": 2.1769, "step": 13270 }, { "epoch": 0.3127502237294522, "grad_norm": 2.76021146774292, "learning_rate": 0.00013747821581649476, "loss": 2.3517, "step": 13280 }, { "epoch": 0.3129857284159955, "grad_norm": 3.3570475578308105, "learning_rate": 0.00013743111487918609, "loss": 2.1244, "step": 13290 }, { "epoch": 0.31322123310253874, "grad_norm": 2.5103135108947754, "learning_rate": 0.00013738401394187746, "loss": 2.057, "step": 13300 }, { "epoch": 0.313456737789082, "grad_norm": 2.2112882137298584, "learning_rate": 0.00013733691300456879, "loss": 1.9779, "step": 13310 }, { "epoch": 0.31369224247562527, "grad_norm": 2.6416406631469727, "learning_rate": 0.00013728981206726014, "loss": 2.1394, "step": 13320 }, { "epoch": 0.31392774716216854, "grad_norm": 1.99900484085083, "learning_rate": 0.00013724271112995149, "loss": 2.3333, "step": 13330 }, { "epoch": 0.3141632518487118, "grad_norm": 2.0924324989318848, "learning_rate": 0.00013719561019264284, "loss": 2.0693, "step": 13340 }, { "epoch": 0.31439875653525506, "grad_norm": 2.4140758514404297, "learning_rate": 0.0001371485092553342, "loss": 2.2516, "step": 13350 }, { "epoch": 0.31463426122179833, "grad_norm": 2.1066081523895264, "learning_rate": 0.00013710140831802554, "loss": 2.1283, "step": 13360 }, { "epoch": 0.3148697659083416, "grad_norm": 1.8612606525421143, "learning_rate": 0.00013705430738071686, "loss": 2.0633, "step": 13370 }, { "epoch": 0.31510527059488486, "grad_norm": 2.5588574409484863, "learning_rate": 0.00013700720644340824, "loss": 2.1888, "step": 13380 }, { "epoch": 0.3153407752814281, "grad_norm": 1.7056317329406738, "learning_rate": 0.00013696010550609956, "loss": 1.9666, "step": 13390 }, { "epoch": 0.3155762799679714, "grad_norm": 1.8611016273498535, "learning_rate": 0.00013691300456879094, "loss": 2.182, "step": 13400 }, { "epoch": 0.31581178465451465, "grad_norm": 2.0268967151641846, "learning_rate": 0.0001368659036314823, "loss": 1.8033, "step": 13410 }, { "epoch": 0.3160472893410579, "grad_norm": 1.7649638652801514, "learning_rate": 0.0001368188026941736, "loss": 1.9693, "step": 13420 }, { "epoch": 0.3162827940276011, "grad_norm": 3.1262307167053223, "learning_rate": 0.000136771701756865, "loss": 2.1137, "step": 13430 }, { "epoch": 0.3165182987141444, "grad_norm": 2.176802158355713, "learning_rate": 0.0001367246008195563, "loss": 2.0533, "step": 13440 }, { "epoch": 0.31675380340068765, "grad_norm": 2.7924866676330566, "learning_rate": 0.00013667749988224766, "loss": 1.9473, "step": 13450 }, { "epoch": 0.3169893080872309, "grad_norm": 2.8177592754364014, "learning_rate": 0.000136630398944939, "loss": 2.0061, "step": 13460 }, { "epoch": 0.3172248127737742, "grad_norm": 3.690781354904175, "learning_rate": 0.00013658329800763036, "loss": 2.2083, "step": 13470 }, { "epoch": 0.31746031746031744, "grad_norm": 2.0106966495513916, "learning_rate": 0.0001365361970703217, "loss": 1.9977, "step": 13480 }, { "epoch": 0.3176958221468607, "grad_norm": 2.4227206707000732, "learning_rate": 0.00013648909613301306, "loss": 2.0599, "step": 13490 }, { "epoch": 0.31793132683340397, "grad_norm": 2.597426176071167, "learning_rate": 0.00013644199519570439, "loss": 2.0478, "step": 13500 }, { "epoch": 0.31816683151994724, "grad_norm": 2.364302158355713, "learning_rate": 0.00013639489425839576, "loss": 2.1535, "step": 13510 }, { "epoch": 0.3184023362064905, "grad_norm": 1.8173555135726929, "learning_rate": 0.0001363477933210871, "loss": 2.0284, "step": 13520 }, { "epoch": 0.31863784089303376, "grad_norm": 2.0568530559539795, "learning_rate": 0.00013630069238377844, "loss": 1.9601, "step": 13530 }, { "epoch": 0.318873345579577, "grad_norm": 2.300703525543213, "learning_rate": 0.0001362535914464698, "loss": 2.1593, "step": 13540 }, { "epoch": 0.3191088502661203, "grad_norm": 2.1626505851745605, "learning_rate": 0.00013620649050916114, "loss": 2.0686, "step": 13550 }, { "epoch": 0.31934435495266356, "grad_norm": 2.2925024032592773, "learning_rate": 0.0001361593895718525, "loss": 2.0937, "step": 13560 }, { "epoch": 0.3195798596392068, "grad_norm": 2.871372938156128, "learning_rate": 0.00013611228863454384, "loss": 2.2753, "step": 13570 }, { "epoch": 0.3198153643257501, "grad_norm": 2.3106420040130615, "learning_rate": 0.0001360651876972352, "loss": 2.2251, "step": 13580 }, { "epoch": 0.32005086901229335, "grad_norm": 2.192394256591797, "learning_rate": 0.00013601808675992654, "loss": 2.1868, "step": 13590 }, { "epoch": 0.3202863736988366, "grad_norm": 2.862781286239624, "learning_rate": 0.00013597098582261786, "loss": 2.1841, "step": 13600 }, { "epoch": 0.3205218783853799, "grad_norm": 2.3184902667999268, "learning_rate": 0.00013592388488530924, "loss": 2.3279, "step": 13610 }, { "epoch": 0.32075738307192314, "grad_norm": 2.3771584033966064, "learning_rate": 0.00013587678394800056, "loss": 2.2126, "step": 13620 }, { "epoch": 0.3209928877584664, "grad_norm": 1.6889021396636963, "learning_rate": 0.0001358296830106919, "loss": 2.0039, "step": 13630 }, { "epoch": 0.32122839244500967, "grad_norm": 2.208984613418579, "learning_rate": 0.00013578258207338326, "loss": 1.9929, "step": 13640 }, { "epoch": 0.32146389713155293, "grad_norm": 2.5552730560302734, "learning_rate": 0.0001357354811360746, "loss": 2.0772, "step": 13650 }, { "epoch": 0.3216994018180962, "grad_norm": 2.576536178588867, "learning_rate": 0.00013568838019876596, "loss": 2.0566, "step": 13660 }, { "epoch": 0.32193490650463946, "grad_norm": 3.376354694366455, "learning_rate": 0.0001356412792614573, "loss": 2.0092, "step": 13670 }, { "epoch": 0.3221704111911827, "grad_norm": 2.312920331954956, "learning_rate": 0.00013559417832414864, "loss": 2.1, "step": 13680 }, { "epoch": 0.322405915877726, "grad_norm": 2.1818175315856934, "learning_rate": 0.00013554707738684001, "loss": 1.9808, "step": 13690 }, { "epoch": 0.32264142056426925, "grad_norm": 2.547168493270874, "learning_rate": 0.00013549997644953136, "loss": 2.1154, "step": 13700 }, { "epoch": 0.3228769252508125, "grad_norm": 1.7971171140670776, "learning_rate": 0.0001354528755122227, "loss": 2.126, "step": 13710 }, { "epoch": 0.3231124299373557, "grad_norm": 2.0792088508605957, "learning_rate": 0.00013540577457491406, "loss": 1.9622, "step": 13720 }, { "epoch": 0.323347934623899, "grad_norm": 1.9572780132293701, "learning_rate": 0.0001353586736376054, "loss": 1.9715, "step": 13730 }, { "epoch": 0.32358343931044226, "grad_norm": 2.749486207962036, "learning_rate": 0.00013531157270029676, "loss": 2.2712, "step": 13740 }, { "epoch": 0.3238189439969855, "grad_norm": 2.157297372817993, "learning_rate": 0.0001352644717629881, "loss": 2.2583, "step": 13750 }, { "epoch": 0.3240544486835288, "grad_norm": 2.2807443141937256, "learning_rate": 0.00013521737082567944, "loss": 2.1891, "step": 13760 }, { "epoch": 0.32428995337007205, "grad_norm": 1.8038594722747803, "learning_rate": 0.0001351702698883708, "loss": 1.9942, "step": 13770 }, { "epoch": 0.3245254580566153, "grad_norm": 2.015502691268921, "learning_rate": 0.00013512316895106214, "loss": 1.9229, "step": 13780 }, { "epoch": 0.3247609627431586, "grad_norm": 2.0219409465789795, "learning_rate": 0.0001350760680137535, "loss": 2.0995, "step": 13790 }, { "epoch": 0.32499646742970184, "grad_norm": 2.005467176437378, "learning_rate": 0.00013502896707644484, "loss": 2.196, "step": 13800 }, { "epoch": 0.3252319721162451, "grad_norm": 2.505066394805908, "learning_rate": 0.00013498186613913616, "loss": 2.0781, "step": 13810 }, { "epoch": 0.32546747680278837, "grad_norm": 2.1414785385131836, "learning_rate": 0.00013493476520182754, "loss": 2.0771, "step": 13820 }, { "epoch": 0.32570298148933163, "grad_norm": 2.4025304317474365, "learning_rate": 0.00013488766426451886, "loss": 2.2001, "step": 13830 }, { "epoch": 0.3259384861758749, "grad_norm": 2.1574246883392334, "learning_rate": 0.0001348405633272102, "loss": 2.0484, "step": 13840 }, { "epoch": 0.32617399086241816, "grad_norm": 2.238400936126709, "learning_rate": 0.00013479346238990156, "loss": 2.0084, "step": 13850 }, { "epoch": 0.3264094955489614, "grad_norm": 1.7974083423614502, "learning_rate": 0.00013474636145259291, "loss": 2.1113, "step": 13860 }, { "epoch": 0.3266450002355047, "grad_norm": 2.17846941947937, "learning_rate": 0.00013469926051528426, "loss": 2.3447, "step": 13870 }, { "epoch": 0.32688050492204795, "grad_norm": 2.0620272159576416, "learning_rate": 0.00013465215957797561, "loss": 2.1059, "step": 13880 }, { "epoch": 0.3271160096085912, "grad_norm": 2.4582271575927734, "learning_rate": 0.00013460505864066694, "loss": 1.8575, "step": 13890 }, { "epoch": 0.3273515142951345, "grad_norm": 2.7777035236358643, "learning_rate": 0.00013455795770335831, "loss": 2.1932, "step": 13900 }, { "epoch": 0.32758701898167775, "grad_norm": 1.6070411205291748, "learning_rate": 0.00013451085676604964, "loss": 1.9579, "step": 13910 }, { "epoch": 0.327822523668221, "grad_norm": 2.466210126876831, "learning_rate": 0.000134463755828741, "loss": 1.9896, "step": 13920 }, { "epoch": 0.3280580283547643, "grad_norm": 1.9184893369674683, "learning_rate": 0.00013441665489143234, "loss": 2.2178, "step": 13930 }, { "epoch": 0.32829353304130754, "grad_norm": 2.1110756397247314, "learning_rate": 0.0001343695539541237, "loss": 2.1961, "step": 13940 }, { "epoch": 0.3285290377278508, "grad_norm": 2.830181121826172, "learning_rate": 0.00013432245301681504, "loss": 2.1987, "step": 13950 }, { "epoch": 0.32876454241439407, "grad_norm": 2.1129209995269775, "learning_rate": 0.0001342753520795064, "loss": 2.0927, "step": 13960 }, { "epoch": 0.32900004710093733, "grad_norm": 2.1881232261657715, "learning_rate": 0.0001342282511421977, "loss": 1.8427, "step": 13970 }, { "epoch": 0.3292355517874806, "grad_norm": 2.1353397369384766, "learning_rate": 0.0001341811502048891, "loss": 1.9289, "step": 13980 }, { "epoch": 0.32947105647402386, "grad_norm": 3.527761459350586, "learning_rate": 0.00013413404926758044, "loss": 2.2088, "step": 13990 }, { "epoch": 0.32970656116056707, "grad_norm": 2.0877022743225098, "learning_rate": 0.0001340869483302718, "loss": 2.1233, "step": 14000 }, { "epoch": 0.32994206584711033, "grad_norm": 1.560817837715149, "learning_rate": 0.00013403984739296314, "loss": 2.0715, "step": 14010 }, { "epoch": 0.3301775705336536, "grad_norm": 2.216294765472412, "learning_rate": 0.00013399274645565446, "loss": 2.0866, "step": 14020 }, { "epoch": 0.33041307522019686, "grad_norm": 2.3411684036254883, "learning_rate": 0.00013394564551834584, "loss": 2.1824, "step": 14030 }, { "epoch": 0.3306485799067401, "grad_norm": 2.613354206085205, "learning_rate": 0.00013389854458103716, "loss": 2.3151, "step": 14040 }, { "epoch": 0.3308840845932834, "grad_norm": 2.107166290283203, "learning_rate": 0.00013385144364372851, "loss": 2.091, "step": 14050 }, { "epoch": 0.33111958927982665, "grad_norm": 2.1655850410461426, "learning_rate": 0.00013380434270641986, "loss": 2.2143, "step": 14060 }, { "epoch": 0.3313550939663699, "grad_norm": 1.9823925495147705, "learning_rate": 0.00013375724176911121, "loss": 2.1066, "step": 14070 }, { "epoch": 0.3315905986529132, "grad_norm": 2.26995849609375, "learning_rate": 0.00013371014083180256, "loss": 1.9626, "step": 14080 }, { "epoch": 0.33182610333945645, "grad_norm": 1.9416958093643188, "learning_rate": 0.00013366303989449392, "loss": 2.2461, "step": 14090 }, { "epoch": 0.3320616080259997, "grad_norm": 2.685157060623169, "learning_rate": 0.00013361593895718524, "loss": 1.9703, "step": 14100 }, { "epoch": 0.332297112712543, "grad_norm": 2.3456876277923584, "learning_rate": 0.00013356883801987662, "loss": 2.2025, "step": 14110 }, { "epoch": 0.33253261739908624, "grad_norm": 2.766294002532959, "learning_rate": 0.00013352173708256794, "loss": 2.2302, "step": 14120 }, { "epoch": 0.3327681220856295, "grad_norm": 2.4374420642852783, "learning_rate": 0.0001334746361452593, "loss": 2.1317, "step": 14130 }, { "epoch": 0.33300362677217277, "grad_norm": 1.8526066541671753, "learning_rate": 0.00013342753520795064, "loss": 2.0103, "step": 14140 }, { "epoch": 0.33323913145871603, "grad_norm": 2.461101770401001, "learning_rate": 0.000133380434270642, "loss": 2.0952, "step": 14150 }, { "epoch": 0.3334746361452593, "grad_norm": 1.950975775718689, "learning_rate": 0.00013333333333333334, "loss": 2.0181, "step": 14160 }, { "epoch": 0.33371014083180256, "grad_norm": 2.2419090270996094, "learning_rate": 0.0001332862323960247, "loss": 2.226, "step": 14170 }, { "epoch": 0.3339456455183458, "grad_norm": 2.077028751373291, "learning_rate": 0.000133239131458716, "loss": 2.0689, "step": 14180 }, { "epoch": 0.3341811502048891, "grad_norm": 1.9552679061889648, "learning_rate": 0.0001331920305214074, "loss": 2.219, "step": 14190 }, { "epoch": 0.33441665489143235, "grad_norm": 1.9980392456054688, "learning_rate": 0.00013314492958409871, "loss": 2.1455, "step": 14200 }, { "epoch": 0.3346521595779756, "grad_norm": 2.1066527366638184, "learning_rate": 0.0001330978286467901, "loss": 2.413, "step": 14210 }, { "epoch": 0.3348876642645189, "grad_norm": 2.242450475692749, "learning_rate": 0.00013305072770948141, "loss": 2.0, "step": 14220 }, { "epoch": 0.33512316895106214, "grad_norm": 2.308070659637451, "learning_rate": 0.00013300362677217276, "loss": 1.9766, "step": 14230 }, { "epoch": 0.3353586736376054, "grad_norm": 3.0701427459716797, "learning_rate": 0.00013295652583486411, "loss": 2.0551, "step": 14240 }, { "epoch": 0.3355941783241487, "grad_norm": 2.204287052154541, "learning_rate": 0.00013290942489755546, "loss": 2.1141, "step": 14250 }, { "epoch": 0.33582968301069194, "grad_norm": 2.2963192462921143, "learning_rate": 0.00013286232396024682, "loss": 1.8639, "step": 14260 }, { "epoch": 0.3360651876972352, "grad_norm": 1.9334232807159424, "learning_rate": 0.00013281522302293817, "loss": 1.9872, "step": 14270 }, { "epoch": 0.3363006923837784, "grad_norm": 2.3119375705718994, "learning_rate": 0.00013276812208562952, "loss": 2.2071, "step": 14280 }, { "epoch": 0.3365361970703217, "grad_norm": 1.685998797416687, "learning_rate": 0.00013272102114832087, "loss": 2.2305, "step": 14290 }, { "epoch": 0.33677170175686494, "grad_norm": 1.8673522472381592, "learning_rate": 0.00013267392021101222, "loss": 2.0198, "step": 14300 }, { "epoch": 0.3370072064434082, "grad_norm": 2.057691812515259, "learning_rate": 0.00013262681927370354, "loss": 2.1267, "step": 14310 }, { "epoch": 0.33724271112995147, "grad_norm": 2.4289650917053223, "learning_rate": 0.00013257971833639492, "loss": 2.1753, "step": 14320 }, { "epoch": 0.33747821581649473, "grad_norm": 2.0615077018737793, "learning_rate": 0.00013253261739908624, "loss": 2.1745, "step": 14330 }, { "epoch": 0.337713720503038, "grad_norm": 1.9922257661819458, "learning_rate": 0.0001324855164617776, "loss": 2.041, "step": 14340 }, { "epoch": 0.33794922518958126, "grad_norm": 2.150548219680786, "learning_rate": 0.00013243841552446894, "loss": 2.054, "step": 14350 }, { "epoch": 0.3381847298761245, "grad_norm": 1.9815877676010132, "learning_rate": 0.0001323913145871603, "loss": 2.2345, "step": 14360 }, { "epoch": 0.3384202345626678, "grad_norm": 3.1625378131866455, "learning_rate": 0.00013234421364985164, "loss": 2.0449, "step": 14370 }, { "epoch": 0.33865573924921105, "grad_norm": 2.5849571228027344, "learning_rate": 0.000132297112712543, "loss": 2.1497, "step": 14380 }, { "epoch": 0.3388912439357543, "grad_norm": 3.0751826763153076, "learning_rate": 0.00013225001177523434, "loss": 2.0963, "step": 14390 }, { "epoch": 0.3391267486222976, "grad_norm": 1.572991967201233, "learning_rate": 0.0001322029108379257, "loss": 1.9016, "step": 14400 }, { "epoch": 0.33936225330884084, "grad_norm": 2.5276403427124023, "learning_rate": 0.00013215580990061701, "loss": 2.3332, "step": 14410 }, { "epoch": 0.3395977579953841, "grad_norm": 1.7656186819076538, "learning_rate": 0.0001321087089633084, "loss": 2.0629, "step": 14420 }, { "epoch": 0.3398332626819274, "grad_norm": 1.8778975009918213, "learning_rate": 0.00013206160802599972, "loss": 2.2139, "step": 14430 }, { "epoch": 0.34006876736847064, "grad_norm": 2.696181535720825, "learning_rate": 0.00013201450708869107, "loss": 1.9954, "step": 14440 }, { "epoch": 0.3403042720550139, "grad_norm": 1.6417945623397827, "learning_rate": 0.00013196740615138242, "loss": 1.9359, "step": 14450 }, { "epoch": 0.34053977674155717, "grad_norm": 2.1908366680145264, "learning_rate": 0.00013192030521407377, "loss": 1.9072, "step": 14460 }, { "epoch": 0.34077528142810043, "grad_norm": 2.7889277935028076, "learning_rate": 0.00013187320427676512, "loss": 2.0709, "step": 14470 }, { "epoch": 0.3410107861146437, "grad_norm": 2.474809408187866, "learning_rate": 0.00013182610333945647, "loss": 2.155, "step": 14480 }, { "epoch": 0.34124629080118696, "grad_norm": 2.2163426876068115, "learning_rate": 0.0001317790024021478, "loss": 2.1198, "step": 14490 }, { "epoch": 0.3414817954877302, "grad_norm": 2.4669189453125, "learning_rate": 0.00013173190146483917, "loss": 2.1101, "step": 14500 }, { "epoch": 0.3417173001742735, "grad_norm": 2.4319045543670654, "learning_rate": 0.0001316848005275305, "loss": 2.208, "step": 14510 }, { "epoch": 0.34195280486081675, "grad_norm": 2.244265556335449, "learning_rate": 0.00013163769959022184, "loss": 2.0715, "step": 14520 }, { "epoch": 0.34218830954736, "grad_norm": 2.1139960289001465, "learning_rate": 0.0001315905986529132, "loss": 1.9087, "step": 14530 }, { "epoch": 0.3424238142339033, "grad_norm": 2.030888080596924, "learning_rate": 0.00013154349771560454, "loss": 2.0678, "step": 14540 }, { "epoch": 0.34265931892044654, "grad_norm": 3.3537919521331787, "learning_rate": 0.00013149639677829592, "loss": 2.1527, "step": 14550 }, { "epoch": 0.34289482360698975, "grad_norm": 2.137396812438965, "learning_rate": 0.00013144929584098724, "loss": 2.2981, "step": 14560 }, { "epoch": 0.343130328293533, "grad_norm": 3.0466394424438477, "learning_rate": 0.0001314021949036786, "loss": 2.1918, "step": 14570 }, { "epoch": 0.3433658329800763, "grad_norm": 2.6642491817474365, "learning_rate": 0.00013135509396636994, "loss": 2.2627, "step": 14580 }, { "epoch": 0.34360133766661954, "grad_norm": 1.9378482103347778, "learning_rate": 0.0001313079930290613, "loss": 2.0302, "step": 14590 }, { "epoch": 0.3438368423531628, "grad_norm": 2.130795955657959, "learning_rate": 0.00013126089209175264, "loss": 2.0724, "step": 14600 }, { "epoch": 0.3440723470397061, "grad_norm": 3.243426561355591, "learning_rate": 0.000131213791154444, "loss": 2.1017, "step": 14610 }, { "epoch": 0.34430785172624934, "grad_norm": 2.094165086746216, "learning_rate": 0.00013116669021713532, "loss": 2.2293, "step": 14620 }, { "epoch": 0.3445433564127926, "grad_norm": 2.4611902236938477, "learning_rate": 0.0001311195892798267, "loss": 2.0473, "step": 14630 }, { "epoch": 0.34477886109933586, "grad_norm": 2.694459915161133, "learning_rate": 0.00013107248834251802, "loss": 2.0456, "step": 14640 }, { "epoch": 0.34501436578587913, "grad_norm": 1.9303042888641357, "learning_rate": 0.00013102538740520937, "loss": 2.0716, "step": 14650 }, { "epoch": 0.3452498704724224, "grad_norm": 2.9043257236480713, "learning_rate": 0.00013097828646790072, "loss": 2.1234, "step": 14660 }, { "epoch": 0.34548537515896566, "grad_norm": 2.1518394947052, "learning_rate": 0.00013093118553059207, "loss": 2.1043, "step": 14670 }, { "epoch": 0.3457208798455089, "grad_norm": 2.1524922847747803, "learning_rate": 0.00013088408459328342, "loss": 2.2017, "step": 14680 }, { "epoch": 0.3459563845320522, "grad_norm": 3.270179510116577, "learning_rate": 0.00013083698365597477, "loss": 2.0904, "step": 14690 }, { "epoch": 0.34619188921859545, "grad_norm": 2.349952220916748, "learning_rate": 0.0001307898827186661, "loss": 1.9607, "step": 14700 }, { "epoch": 0.3464273939051387, "grad_norm": 2.087453842163086, "learning_rate": 0.00013074278178135747, "loss": 2.0567, "step": 14710 }, { "epoch": 0.346662898591682, "grad_norm": 1.739876389503479, "learning_rate": 0.0001306956808440488, "loss": 1.9899, "step": 14720 }, { "epoch": 0.34689840327822524, "grad_norm": 2.5640616416931152, "learning_rate": 0.00013064857990674014, "loss": 2.126, "step": 14730 }, { "epoch": 0.3471339079647685, "grad_norm": 2.1961350440979004, "learning_rate": 0.0001306014789694315, "loss": 2.2033, "step": 14740 }, { "epoch": 0.34736941265131177, "grad_norm": 2.4550042152404785, "learning_rate": 0.00013055437803212284, "loss": 2.2094, "step": 14750 }, { "epoch": 0.34760491733785503, "grad_norm": 2.9043076038360596, "learning_rate": 0.0001305072770948142, "loss": 2.1208, "step": 14760 }, { "epoch": 0.3478404220243983, "grad_norm": 2.2261176109313965, "learning_rate": 0.00013046017615750554, "loss": 2.3374, "step": 14770 }, { "epoch": 0.34807592671094156, "grad_norm": 3.6236088275909424, "learning_rate": 0.00013041307522019687, "loss": 2.1553, "step": 14780 }, { "epoch": 0.3483114313974848, "grad_norm": 1.7490025758743286, "learning_rate": 0.00013036597428288824, "loss": 2.0613, "step": 14790 }, { "epoch": 0.3485469360840281, "grad_norm": 1.8174374103546143, "learning_rate": 0.00013031887334557957, "loss": 1.9779, "step": 14800 }, { "epoch": 0.34878244077057136, "grad_norm": 2.0383830070495605, "learning_rate": 0.00013027177240827094, "loss": 2.0438, "step": 14810 }, { "epoch": 0.3490179454571146, "grad_norm": 2.6745285987854004, "learning_rate": 0.0001302246714709623, "loss": 2.0689, "step": 14820 }, { "epoch": 0.3492534501436579, "grad_norm": 2.264727830886841, "learning_rate": 0.00013017757053365362, "loss": 2.3443, "step": 14830 }, { "epoch": 0.34948895483020115, "grad_norm": 1.7950100898742676, "learning_rate": 0.000130130469596345, "loss": 2.101, "step": 14840 }, { "epoch": 0.34972445951674436, "grad_norm": 2.4111218452453613, "learning_rate": 0.00013008336865903632, "loss": 1.9223, "step": 14850 }, { "epoch": 0.3499599642032876, "grad_norm": 2.078031539916992, "learning_rate": 0.00013003626772172767, "loss": 2.1652, "step": 14860 }, { "epoch": 0.3501954688898309, "grad_norm": 2.400324583053589, "learning_rate": 0.00012998916678441902, "loss": 1.9705, "step": 14870 }, { "epoch": 0.35043097357637415, "grad_norm": 1.9067310094833374, "learning_rate": 0.00012994206584711037, "loss": 2.0825, "step": 14880 }, { "epoch": 0.3506664782629174, "grad_norm": 2.236149549484253, "learning_rate": 0.00012989496490980172, "loss": 2.2749, "step": 14890 }, { "epoch": 0.3509019829494607, "grad_norm": 1.9904561042785645, "learning_rate": 0.00012984786397249307, "loss": 2.0971, "step": 14900 }, { "epoch": 0.35113748763600394, "grad_norm": 2.2129764556884766, "learning_rate": 0.0001298007630351844, "loss": 2.2559, "step": 14910 }, { "epoch": 0.3513729923225472, "grad_norm": 2.020630121231079, "learning_rate": 0.00012975366209787577, "loss": 1.968, "step": 14920 }, { "epoch": 0.35160849700909047, "grad_norm": 3.8411476612091064, "learning_rate": 0.0001297065611605671, "loss": 2.0724, "step": 14930 }, { "epoch": 0.35184400169563373, "grad_norm": 4.267243385314941, "learning_rate": 0.00012965946022325844, "loss": 1.9732, "step": 14940 }, { "epoch": 0.352079506382177, "grad_norm": 2.2827847003936768, "learning_rate": 0.0001296123592859498, "loss": 2.3295, "step": 14950 }, { "epoch": 0.35231501106872026, "grad_norm": 2.276663064956665, "learning_rate": 0.00012956525834864114, "loss": 2.2312, "step": 14960 }, { "epoch": 0.3525505157552635, "grad_norm": 2.2448136806488037, "learning_rate": 0.0001295181574113325, "loss": 2.0823, "step": 14970 }, { "epoch": 0.3527860204418068, "grad_norm": 2.307004690170288, "learning_rate": 0.00012947105647402384, "loss": 1.9903, "step": 14980 }, { "epoch": 0.35302152512835006, "grad_norm": 2.2261812686920166, "learning_rate": 0.0001294239555367152, "loss": 2.0342, "step": 14990 }, { "epoch": 0.3532570298148933, "grad_norm": 2.9633865356445312, "learning_rate": 0.00012937685459940654, "loss": 1.9893, "step": 15000 }, { "epoch": 0.3534925345014366, "grad_norm": 2.297091007232666, "learning_rate": 0.00012932975366209787, "loss": 2.258, "step": 15010 }, { "epoch": 0.35372803918797985, "grad_norm": 3.055051803588867, "learning_rate": 0.00012928265272478924, "loss": 2.0553, "step": 15020 }, { "epoch": 0.3539635438745231, "grad_norm": 1.8271070718765259, "learning_rate": 0.00012923555178748057, "loss": 2.1569, "step": 15030 }, { "epoch": 0.3541990485610664, "grad_norm": 2.4504764080047607, "learning_rate": 0.00012918845085017192, "loss": 2.1445, "step": 15040 }, { "epoch": 0.35443455324760964, "grad_norm": 3.338831901550293, "learning_rate": 0.00012914134991286327, "loss": 2.151, "step": 15050 }, { "epoch": 0.3546700579341529, "grad_norm": 1.9933847188949585, "learning_rate": 0.00012909424897555462, "loss": 1.9229, "step": 15060 }, { "epoch": 0.35490556262069617, "grad_norm": 1.8507821559906006, "learning_rate": 0.00012904714803824597, "loss": 1.8161, "step": 15070 }, { "epoch": 0.35514106730723943, "grad_norm": 2.110309362411499, "learning_rate": 0.00012900004710093732, "loss": 2.107, "step": 15080 }, { "epoch": 0.3553765719937827, "grad_norm": 2.447514057159424, "learning_rate": 0.00012895294616362864, "loss": 2.0256, "step": 15090 }, { "epoch": 0.35561207668032596, "grad_norm": 2.630833864212036, "learning_rate": 0.00012890584522632002, "loss": 2.2369, "step": 15100 }, { "epoch": 0.3558475813668692, "grad_norm": 2.210677146911621, "learning_rate": 0.00012885874428901137, "loss": 2.3155, "step": 15110 }, { "epoch": 0.3560830860534125, "grad_norm": 2.5040535926818848, "learning_rate": 0.0001288116433517027, "loss": 2.1744, "step": 15120 }, { "epoch": 0.3563185907399557, "grad_norm": 2.41166615486145, "learning_rate": 0.00012876454241439407, "loss": 1.9641, "step": 15130 }, { "epoch": 0.35655409542649896, "grad_norm": 2.1737725734710693, "learning_rate": 0.0001287174414770854, "loss": 2.0927, "step": 15140 }, { "epoch": 0.3567896001130422, "grad_norm": 2.3262150287628174, "learning_rate": 0.00012867034053977677, "loss": 1.8485, "step": 15150 }, { "epoch": 0.3570251047995855, "grad_norm": 2.214301586151123, "learning_rate": 0.0001286232396024681, "loss": 2.0541, "step": 15160 }, { "epoch": 0.35726060948612876, "grad_norm": 3.0727179050445557, "learning_rate": 0.0001285808487588903, "loss": 2.2335, "step": 15170 }, { "epoch": 0.357496114172672, "grad_norm": 1.55754816532135, "learning_rate": 0.00012853374782158166, "loss": 1.7324, "step": 15180 }, { "epoch": 0.3577316188592153, "grad_norm": 2.219801425933838, "learning_rate": 0.000128486646884273, "loss": 2.1554, "step": 15190 }, { "epoch": 0.35796712354575855, "grad_norm": 2.0826430320739746, "learning_rate": 0.00012843954594696433, "loss": 2.0151, "step": 15200 }, { "epoch": 0.3582026282323018, "grad_norm": 2.083486557006836, "learning_rate": 0.0001283924450096557, "loss": 2.1411, "step": 15210 }, { "epoch": 0.3584381329188451, "grad_norm": 2.3072783946990967, "learning_rate": 0.00012834534407234704, "loss": 2.006, "step": 15220 }, { "epoch": 0.35867363760538834, "grad_norm": 2.440856456756592, "learning_rate": 0.00012829824313503839, "loss": 2.0854, "step": 15230 }, { "epoch": 0.3589091422919316, "grad_norm": 2.845301866531372, "learning_rate": 0.00012825114219772974, "loss": 2.3434, "step": 15240 }, { "epoch": 0.35914464697847487, "grad_norm": 2.953895330429077, "learning_rate": 0.00012820404126042109, "loss": 2.0988, "step": 15250 }, { "epoch": 0.35938015166501813, "grad_norm": 2.4946932792663574, "learning_rate": 0.00012815694032311244, "loss": 2.0624, "step": 15260 }, { "epoch": 0.3596156563515614, "grad_norm": 2.2791571617126465, "learning_rate": 0.00012810983938580379, "loss": 2.0612, "step": 15270 }, { "epoch": 0.35985116103810466, "grad_norm": 2.0594568252563477, "learning_rate": 0.00012806273844849514, "loss": 2.2905, "step": 15280 }, { "epoch": 0.3600866657246479, "grad_norm": 1.937827467918396, "learning_rate": 0.0001280156375111865, "loss": 2.2477, "step": 15290 }, { "epoch": 0.3603221704111912, "grad_norm": 2.1612651348114014, "learning_rate": 0.0001279685365738778, "loss": 2.064, "step": 15300 }, { "epoch": 0.36055767509773445, "grad_norm": 2.1727564334869385, "learning_rate": 0.0001279214356365692, "loss": 2.1897, "step": 15310 }, { "epoch": 0.3607931797842777, "grad_norm": 1.9623788595199585, "learning_rate": 0.0001278743346992605, "loss": 2.0795, "step": 15320 }, { "epoch": 0.361028684470821, "grad_norm": 2.5158348083496094, "learning_rate": 0.00012782723376195186, "loss": 2.3262, "step": 15330 }, { "epoch": 0.36126418915736425, "grad_norm": 1.673125982284546, "learning_rate": 0.0001277801328246432, "loss": 2.0423, "step": 15340 }, { "epoch": 0.3614996938439075, "grad_norm": 2.2631309032440186, "learning_rate": 0.00012773303188733456, "loss": 1.9951, "step": 15350 }, { "epoch": 0.3617351985304508, "grad_norm": 2.103239059448242, "learning_rate": 0.0001276859309500259, "loss": 2.1707, "step": 15360 }, { "epoch": 0.36197070321699404, "grad_norm": 2.2068543434143066, "learning_rate": 0.00012763883001271726, "loss": 2.0808, "step": 15370 }, { "epoch": 0.3622062079035373, "grad_norm": 2.3455021381378174, "learning_rate": 0.0001275917290754086, "loss": 2.1042, "step": 15380 }, { "epoch": 0.36244171259008057, "grad_norm": 2.655992031097412, "learning_rate": 0.00012754462813809996, "loss": 2.165, "step": 15390 }, { "epoch": 0.36267721727662383, "grad_norm": 2.5498814582824707, "learning_rate": 0.0001274975272007913, "loss": 2.251, "step": 15400 }, { "epoch": 0.36291272196316704, "grad_norm": 2.270501136779785, "learning_rate": 0.00012745042626348264, "loss": 1.9743, "step": 15410 }, { "epoch": 0.3631482266497103, "grad_norm": 1.8444944620132446, "learning_rate": 0.000127403325326174, "loss": 2.2257, "step": 15420 }, { "epoch": 0.36338373133625357, "grad_norm": 3.1018927097320557, "learning_rate": 0.00012735622438886534, "loss": 2.0163, "step": 15430 }, { "epoch": 0.36361923602279683, "grad_norm": 3.2892568111419678, "learning_rate": 0.0001273091234515567, "loss": 2.0607, "step": 15440 }, { "epoch": 0.3638547407093401, "grad_norm": 2.4903275966644287, "learning_rate": 0.00012726202251424804, "loss": 2.0741, "step": 15450 }, { "epoch": 0.36409024539588336, "grad_norm": 2.295133590698242, "learning_rate": 0.0001272149215769394, "loss": 2.3047, "step": 15460 }, { "epoch": 0.3643257500824266, "grad_norm": 1.8592169284820557, "learning_rate": 0.00012716782063963074, "loss": 2.1098, "step": 15470 }, { "epoch": 0.3645612547689699, "grad_norm": 1.8355640172958374, "learning_rate": 0.0001271207197023221, "loss": 2.0824, "step": 15480 }, { "epoch": 0.36479675945551315, "grad_norm": 2.295816421508789, "learning_rate": 0.00012707361876501344, "loss": 2.1756, "step": 15490 }, { "epoch": 0.3650322641420564, "grad_norm": 2.0585267543792725, "learning_rate": 0.0001270265178277048, "loss": 2.3163, "step": 15500 }, { "epoch": 0.3652677688285997, "grad_norm": 1.753099799156189, "learning_rate": 0.0001269794168903961, "loss": 2.1565, "step": 15510 }, { "epoch": 0.36550327351514295, "grad_norm": 2.6697375774383545, "learning_rate": 0.0001269323159530875, "loss": 2.2484, "step": 15520 }, { "epoch": 0.3657387782016862, "grad_norm": 2.1869733333587646, "learning_rate": 0.0001268852150157788, "loss": 2.1681, "step": 15530 }, { "epoch": 0.3659742828882295, "grad_norm": 2.7051522731781006, "learning_rate": 0.00012683811407847016, "loss": 2.0296, "step": 15540 }, { "epoch": 0.36620978757477274, "grad_norm": 2.1270742416381836, "learning_rate": 0.0001267910131411615, "loss": 1.9708, "step": 15550 }, { "epoch": 0.366445292261316, "grad_norm": 2.645512342453003, "learning_rate": 0.00012674391220385286, "loss": 2.1499, "step": 15560 }, { "epoch": 0.36668079694785927, "grad_norm": 2.9818031787872314, "learning_rate": 0.0001266968112665442, "loss": 2.0822, "step": 15570 }, { "epoch": 0.36691630163440253, "grad_norm": 3.0397183895111084, "learning_rate": 0.00012664971032923556, "loss": 2.1616, "step": 15580 }, { "epoch": 0.3671518063209458, "grad_norm": 2.2161061763763428, "learning_rate": 0.00012660260939192689, "loss": 1.9094, "step": 15590 }, { "epoch": 0.36738731100748906, "grad_norm": 1.962997555732727, "learning_rate": 0.00012655550845461826, "loss": 2.0234, "step": 15600 }, { "epoch": 0.3676228156940323, "grad_norm": 2.1313388347625732, "learning_rate": 0.00012650840751730959, "loss": 2.04, "step": 15610 }, { "epoch": 0.3678583203805756, "grad_norm": 2.517944574356079, "learning_rate": 0.00012646130658000094, "loss": 2.0798, "step": 15620 }, { "epoch": 0.36809382506711885, "grad_norm": 2.792917251586914, "learning_rate": 0.0001264142056426923, "loss": 1.9457, "step": 15630 }, { "epoch": 0.3683293297536621, "grad_norm": 2.193131685256958, "learning_rate": 0.00012636710470538364, "loss": 2.1932, "step": 15640 }, { "epoch": 0.3685648344402054, "grad_norm": 2.234732151031494, "learning_rate": 0.00012632000376807501, "loss": 2.1695, "step": 15650 }, { "epoch": 0.36880033912674864, "grad_norm": 2.553999900817871, "learning_rate": 0.00012627290283076634, "loss": 2.1587, "step": 15660 }, { "epoch": 0.3690358438132919, "grad_norm": 2.0366036891937256, "learning_rate": 0.0001262258018934577, "loss": 2.1545, "step": 15670 }, { "epoch": 0.3692713484998352, "grad_norm": 1.8725734949111938, "learning_rate": 0.00012617870095614904, "loss": 2.1266, "step": 15680 }, { "epoch": 0.36950685318637844, "grad_norm": 2.4001288414001465, "learning_rate": 0.0001261316000188404, "loss": 2.1415, "step": 15690 }, { "epoch": 0.36974235787292165, "grad_norm": 2.269136428833008, "learning_rate": 0.00012608449908153174, "loss": 1.9927, "step": 15700 }, { "epoch": 0.3699778625594649, "grad_norm": 2.2865076065063477, "learning_rate": 0.0001260373981442231, "loss": 2.3061, "step": 15710 }, { "epoch": 0.3702133672460082, "grad_norm": 2.2969698905944824, "learning_rate": 0.0001259902972069144, "loss": 2.1753, "step": 15720 }, { "epoch": 0.37044887193255144, "grad_norm": 2.6389291286468506, "learning_rate": 0.0001259431962696058, "loss": 1.9136, "step": 15730 }, { "epoch": 0.3706843766190947, "grad_norm": 3.111884355545044, "learning_rate": 0.0001258960953322971, "loss": 1.9592, "step": 15740 }, { "epoch": 0.37091988130563797, "grad_norm": 1.8911104202270508, "learning_rate": 0.00012584899439498846, "loss": 1.9782, "step": 15750 }, { "epoch": 0.37115538599218123, "grad_norm": 2.410327672958374, "learning_rate": 0.0001258018934576798, "loss": 2.1944, "step": 15760 }, { "epoch": 0.3713908906787245, "grad_norm": 2.6267807483673096, "learning_rate": 0.00012575479252037116, "loss": 2.1023, "step": 15770 }, { "epoch": 0.37162639536526776, "grad_norm": 2.455199718475342, "learning_rate": 0.0001257076915830625, "loss": 2.2238, "step": 15780 }, { "epoch": 0.371861900051811, "grad_norm": 2.131810188293457, "learning_rate": 0.00012566059064575386, "loss": 2.1704, "step": 15790 }, { "epoch": 0.3720974047383543, "grad_norm": 2.0262787342071533, "learning_rate": 0.0001256134897084452, "loss": 2.0336, "step": 15800 }, { "epoch": 0.37233290942489755, "grad_norm": 2.7119791507720947, "learning_rate": 0.00012556638877113656, "loss": 2.0177, "step": 15810 }, { "epoch": 0.3725684141114408, "grad_norm": 2.81471586227417, "learning_rate": 0.0001255192878338279, "loss": 1.9555, "step": 15820 }, { "epoch": 0.3728039187979841, "grad_norm": 1.8543721437454224, "learning_rate": 0.00012547218689651924, "loss": 2.1615, "step": 15830 }, { "epoch": 0.37303942348452734, "grad_norm": 2.129915952682495, "learning_rate": 0.0001254250859592106, "loss": 2.0439, "step": 15840 }, { "epoch": 0.3732749281710706, "grad_norm": 1.9343938827514648, "learning_rate": 0.00012537798502190194, "loss": 1.944, "step": 15850 }, { "epoch": 0.37351043285761387, "grad_norm": 2.403035879135132, "learning_rate": 0.0001253308840845933, "loss": 2.0897, "step": 15860 }, { "epoch": 0.37374593754415714, "grad_norm": 1.8205158710479736, "learning_rate": 0.00012528378314728464, "loss": 2.1931, "step": 15870 }, { "epoch": 0.3739814422307004, "grad_norm": 2.119952917098999, "learning_rate": 0.000125236682209976, "loss": 2.254, "step": 15880 }, { "epoch": 0.37421694691724366, "grad_norm": 2.8081493377685547, "learning_rate": 0.00012518958127266734, "loss": 2.0929, "step": 15890 }, { "epoch": 0.37445245160378693, "grad_norm": 1.801512598991394, "learning_rate": 0.00012514248033535866, "loss": 2.1442, "step": 15900 }, { "epoch": 0.3746879562903302, "grad_norm": 2.782501697540283, "learning_rate": 0.00012509537939805004, "loss": 2.1213, "step": 15910 }, { "epoch": 0.37492346097687346, "grad_norm": 2.08080792427063, "learning_rate": 0.00012504827846074136, "loss": 2.0139, "step": 15920 }, { "epoch": 0.3751589656634167, "grad_norm": 2.6191723346710205, "learning_rate": 0.0001250011775234327, "loss": 2.1154, "step": 15930 }, { "epoch": 0.37539447034996, "grad_norm": 2.5812697410583496, "learning_rate": 0.0001249540765861241, "loss": 1.9264, "step": 15940 }, { "epoch": 0.37562997503650325, "grad_norm": 1.9726451635360718, "learning_rate": 0.0001249069756488154, "loss": 2.1646, "step": 15950 }, { "epoch": 0.3758654797230465, "grad_norm": 1.8344637155532837, "learning_rate": 0.00012485987471150676, "loss": 2.1906, "step": 15960 }, { "epoch": 0.3761009844095898, "grad_norm": 2.092193365097046, "learning_rate": 0.00012481277377419811, "loss": 2.2604, "step": 15970 }, { "epoch": 0.376336489096133, "grad_norm": 1.8692036867141724, "learning_rate": 0.00012476567283688946, "loss": 1.9886, "step": 15980 }, { "epoch": 0.37657199378267625, "grad_norm": 1.7763489484786987, "learning_rate": 0.00012471857189958081, "loss": 2.0102, "step": 15990 }, { "epoch": 0.3768074984692195, "grad_norm": 2.593400478363037, "learning_rate": 0.00012467147096227216, "loss": 2.1123, "step": 16000 }, { "epoch": 0.3770430031557628, "grad_norm": 2.239337205886841, "learning_rate": 0.0001246243700249635, "loss": 2.2664, "step": 16010 }, { "epoch": 0.37727850784230604, "grad_norm": 2.6145036220550537, "learning_rate": 0.00012457726908765486, "loss": 2.1313, "step": 16020 }, { "epoch": 0.3775140125288493, "grad_norm": 2.3163740634918213, "learning_rate": 0.0001245301681503462, "loss": 2.1796, "step": 16030 }, { "epoch": 0.37774951721539257, "grad_norm": 1.7631512880325317, "learning_rate": 0.00012448306721303757, "loss": 2.0957, "step": 16040 }, { "epoch": 0.37798502190193584, "grad_norm": 1.9558098316192627, "learning_rate": 0.0001244359662757289, "loss": 2.0811, "step": 16050 }, { "epoch": 0.3782205265884791, "grad_norm": 1.9874584674835205, "learning_rate": 0.00012438886533842024, "loss": 1.7742, "step": 16060 }, { "epoch": 0.37845603127502236, "grad_norm": 2.0579781532287598, "learning_rate": 0.0001243417644011116, "loss": 1.9229, "step": 16070 }, { "epoch": 0.37869153596156563, "grad_norm": 2.639272928237915, "learning_rate": 0.00012429466346380294, "loss": 2.294, "step": 16080 }, { "epoch": 0.3789270406481089, "grad_norm": 2.7305397987365723, "learning_rate": 0.0001242475625264943, "loss": 2.1147, "step": 16090 }, { "epoch": 0.37916254533465216, "grad_norm": 3.5461792945861816, "learning_rate": 0.00012420046158918564, "loss": 2.1957, "step": 16100 }, { "epoch": 0.3793980500211954, "grad_norm": 2.2670328617095947, "learning_rate": 0.00012415336065187696, "loss": 2.0242, "step": 16110 }, { "epoch": 0.3796335547077387, "grad_norm": 2.6801137924194336, "learning_rate": 0.00012410625971456834, "loss": 2.0179, "step": 16120 }, { "epoch": 0.37986905939428195, "grad_norm": 2.0788025856018066, "learning_rate": 0.00012405915877725966, "loss": 1.8448, "step": 16130 }, { "epoch": 0.3801045640808252, "grad_norm": 1.919297695159912, "learning_rate": 0.00012401205783995101, "loss": 1.9961, "step": 16140 }, { "epoch": 0.3803400687673685, "grad_norm": 1.8758867979049683, "learning_rate": 0.00012396495690264236, "loss": 2.3458, "step": 16150 }, { "epoch": 0.38057557345391174, "grad_norm": 2.559508800506592, "learning_rate": 0.00012391785596533371, "loss": 2.1396, "step": 16160 }, { "epoch": 0.380811078140455, "grad_norm": 1.8591848611831665, "learning_rate": 0.00012387075502802506, "loss": 2.0795, "step": 16170 }, { "epoch": 0.38104658282699827, "grad_norm": 1.8270467519760132, "learning_rate": 0.00012382365409071641, "loss": 1.9482, "step": 16180 }, { "epoch": 0.38128208751354153, "grad_norm": 1.938462257385254, "learning_rate": 0.00012377655315340774, "loss": 2.0556, "step": 16190 }, { "epoch": 0.3815175922000848, "grad_norm": 2.203063726425171, "learning_rate": 0.00012372945221609912, "loss": 2.1188, "step": 16200 }, { "epoch": 0.38175309688662806, "grad_norm": 2.3647751808166504, "learning_rate": 0.00012368235127879047, "loss": 1.9052, "step": 16210 }, { "epoch": 0.3819886015731713, "grad_norm": 2.129561185836792, "learning_rate": 0.0001236352503414818, "loss": 2.1535, "step": 16220 }, { "epoch": 0.3822241062597146, "grad_norm": 2.4006335735321045, "learning_rate": 0.00012358814940417317, "loss": 2.0127, "step": 16230 }, { "epoch": 0.38245961094625786, "grad_norm": 2.2458090782165527, "learning_rate": 0.0001235410484668645, "loss": 2.1334, "step": 16240 }, { "epoch": 0.3826951156328011, "grad_norm": 2.183962106704712, "learning_rate": 0.00012349394752955587, "loss": 2.0084, "step": 16250 }, { "epoch": 0.38293062031934433, "grad_norm": 2.067183017730713, "learning_rate": 0.0001234468465922472, "loss": 1.9644, "step": 16260 }, { "epoch": 0.3831661250058876, "grad_norm": 1.8971290588378906, "learning_rate": 0.00012339974565493854, "loss": 1.9136, "step": 16270 }, { "epoch": 0.38340162969243086, "grad_norm": 2.4257447719573975, "learning_rate": 0.0001233526447176299, "loss": 2.3739, "step": 16280 }, { "epoch": 0.3836371343789741, "grad_norm": 2.2010610103607178, "learning_rate": 0.00012330554378032124, "loss": 2.1275, "step": 16290 }, { "epoch": 0.3838726390655174, "grad_norm": 2.2034873962402344, "learning_rate": 0.0001232584428430126, "loss": 2.007, "step": 16300 }, { "epoch": 0.38410814375206065, "grad_norm": 2.489978551864624, "learning_rate": 0.00012321134190570394, "loss": 2.0106, "step": 16310 }, { "epoch": 0.3843436484386039, "grad_norm": 2.219968795776367, "learning_rate": 0.00012316424096839526, "loss": 1.946, "step": 16320 }, { "epoch": 0.3845791531251472, "grad_norm": 3.080683946609497, "learning_rate": 0.00012311714003108664, "loss": 2.1341, "step": 16330 }, { "epoch": 0.38481465781169044, "grad_norm": 2.717668294906616, "learning_rate": 0.00012307003909377796, "loss": 2.0214, "step": 16340 }, { "epoch": 0.3850501624982337, "grad_norm": 2.038290023803711, "learning_rate": 0.00012302293815646931, "loss": 1.8628, "step": 16350 }, { "epoch": 0.38528566718477697, "grad_norm": 2.27325177192688, "learning_rate": 0.00012297583721916066, "loss": 2.0714, "step": 16360 }, { "epoch": 0.38552117187132023, "grad_norm": 2.770235776901245, "learning_rate": 0.00012292873628185202, "loss": 2.1704, "step": 16370 }, { "epoch": 0.3857566765578635, "grad_norm": 2.967625379562378, "learning_rate": 0.00012288163534454337, "loss": 2.0807, "step": 16380 }, { "epoch": 0.38599218124440676, "grad_norm": 2.048448085784912, "learning_rate": 0.00012283453440723472, "loss": 2.1044, "step": 16390 }, { "epoch": 0.38622768593095, "grad_norm": 2.9608845710754395, "learning_rate": 0.00012278743346992604, "loss": 2.1001, "step": 16400 }, { "epoch": 0.3864631906174933, "grad_norm": 1.9301419258117676, "learning_rate": 0.00012274033253261742, "loss": 2.0542, "step": 16410 }, { "epoch": 0.38669869530403655, "grad_norm": 1.865768313407898, "learning_rate": 0.00012269323159530874, "loss": 2.001, "step": 16420 }, { "epoch": 0.3869341999905798, "grad_norm": 2.2581074237823486, "learning_rate": 0.0001226461306580001, "loss": 2.0847, "step": 16430 }, { "epoch": 0.3871697046771231, "grad_norm": 1.6205297708511353, "learning_rate": 0.00012259902972069144, "loss": 2.1412, "step": 16440 }, { "epoch": 0.38740520936366635, "grad_norm": 2.4812774658203125, "learning_rate": 0.0001225519287833828, "loss": 2.1315, "step": 16450 }, { "epoch": 0.3876407140502096, "grad_norm": 2.2032692432403564, "learning_rate": 0.00012250482784607414, "loss": 2.108, "step": 16460 }, { "epoch": 0.3878762187367529, "grad_norm": 2.2076354026794434, "learning_rate": 0.0001224577269087655, "loss": 2.1367, "step": 16470 }, { "epoch": 0.38811172342329614, "grad_norm": 2.061929225921631, "learning_rate": 0.0001224106259714568, "loss": 1.8931, "step": 16480 }, { "epoch": 0.3883472281098394, "grad_norm": 1.9706366062164307, "learning_rate": 0.0001223635250341482, "loss": 2.1087, "step": 16490 }, { "epoch": 0.38858273279638267, "grad_norm": 2.2798683643341064, "learning_rate": 0.00012231642409683954, "loss": 2.152, "step": 16500 }, { "epoch": 0.38881823748292593, "grad_norm": 2.740971326828003, "learning_rate": 0.0001222693231595309, "loss": 1.9273, "step": 16510 }, { "epoch": 0.3890537421694692, "grad_norm": 2.453092575073242, "learning_rate": 0.00012222222222222224, "loss": 1.8969, "step": 16520 }, { "epoch": 0.38928924685601246, "grad_norm": 2.553710460662842, "learning_rate": 0.00012217512128491356, "loss": 2.0968, "step": 16530 }, { "epoch": 0.38952475154255567, "grad_norm": 2.732374429702759, "learning_rate": 0.00012212802034760494, "loss": 2.2074, "step": 16540 }, { "epoch": 0.38976025622909893, "grad_norm": 2.342780113220215, "learning_rate": 0.00012208091941029627, "loss": 2.0617, "step": 16550 }, { "epoch": 0.3899957609156422, "grad_norm": 1.9254868030548096, "learning_rate": 0.00012203381847298763, "loss": 1.8196, "step": 16560 }, { "epoch": 0.39023126560218546, "grad_norm": 1.8668605089187622, "learning_rate": 0.00012198671753567897, "loss": 2.1088, "step": 16570 }, { "epoch": 0.3904667702887287, "grad_norm": 2.149210214614868, "learning_rate": 0.00012193961659837032, "loss": 2.232, "step": 16580 }, { "epoch": 0.390702274975272, "grad_norm": 2.4574036598205566, "learning_rate": 0.00012189251566106165, "loss": 2.0236, "step": 16590 }, { "epoch": 0.39093777966181525, "grad_norm": 1.9799808263778687, "learning_rate": 0.00012184541472375302, "loss": 2.1751, "step": 16600 }, { "epoch": 0.3911732843483585, "grad_norm": 2.1461141109466553, "learning_rate": 0.00012179831378644435, "loss": 2.1239, "step": 16610 }, { "epoch": 0.3914087890349018, "grad_norm": 2.3437371253967285, "learning_rate": 0.0001217512128491357, "loss": 2.2036, "step": 16620 }, { "epoch": 0.39164429372144505, "grad_norm": 2.012018918991089, "learning_rate": 0.00012170411191182704, "loss": 2.1692, "step": 16630 }, { "epoch": 0.3918797984079883, "grad_norm": 2.889059066772461, "learning_rate": 0.0001216570109745184, "loss": 2.2988, "step": 16640 }, { "epoch": 0.3921153030945316, "grad_norm": 2.31487774848938, "learning_rate": 0.00012160991003720974, "loss": 2.1439, "step": 16650 }, { "epoch": 0.39235080778107484, "grad_norm": 1.9940412044525146, "learning_rate": 0.0001215628090999011, "loss": 2.1955, "step": 16660 }, { "epoch": 0.3925863124676181, "grad_norm": 2.186189889907837, "learning_rate": 0.00012151570816259243, "loss": 1.9478, "step": 16670 }, { "epoch": 0.39282181715416137, "grad_norm": 1.8489048480987549, "learning_rate": 0.00012146860722528379, "loss": 2.0117, "step": 16680 }, { "epoch": 0.39305732184070463, "grad_norm": 2.188730239868164, "learning_rate": 0.00012142150628797513, "loss": 2.367, "step": 16690 }, { "epoch": 0.3932928265272479, "grad_norm": 2.6312897205352783, "learning_rate": 0.00012137440535066649, "loss": 2.1655, "step": 16700 }, { "epoch": 0.39352833121379116, "grad_norm": 2.244814872741699, "learning_rate": 0.00012132730441335783, "loss": 2.0669, "step": 16710 }, { "epoch": 0.3937638359003344, "grad_norm": 3.1631484031677246, "learning_rate": 0.00012128020347604918, "loss": 2.3648, "step": 16720 }, { "epoch": 0.3939993405868777, "grad_norm": 2.074009418487549, "learning_rate": 0.00012123310253874052, "loss": 2.192, "step": 16730 }, { "epoch": 0.39423484527342095, "grad_norm": 2.1103250980377197, "learning_rate": 0.00012118600160143188, "loss": 1.9476, "step": 16740 }, { "epoch": 0.3944703499599642, "grad_norm": 2.7866339683532715, "learning_rate": 0.00012113890066412322, "loss": 1.9956, "step": 16750 }, { "epoch": 0.3947058546465075, "grad_norm": 2.7085251808166504, "learning_rate": 0.00012109179972681457, "loss": 2.2952, "step": 16760 }, { "epoch": 0.39494135933305075, "grad_norm": 1.9454951286315918, "learning_rate": 0.0001210446987895059, "loss": 2.1263, "step": 16770 }, { "epoch": 0.395176864019594, "grad_norm": 3.369178056716919, "learning_rate": 0.00012099759785219727, "loss": 2.0465, "step": 16780 }, { "epoch": 0.3954123687061373, "grad_norm": 2.1586029529571533, "learning_rate": 0.00012095049691488862, "loss": 2.1699, "step": 16790 }, { "epoch": 0.39564787339268054, "grad_norm": 3.4209299087524414, "learning_rate": 0.00012090339597757995, "loss": 1.9791, "step": 16800 }, { "epoch": 0.3958833780792238, "grad_norm": 2.1973490715026855, "learning_rate": 0.00012085629504027132, "loss": 2.1723, "step": 16810 }, { "epoch": 0.39611888276576707, "grad_norm": 2.067542791366577, "learning_rate": 0.00012080919410296265, "loss": 1.8388, "step": 16820 }, { "epoch": 0.3963543874523103, "grad_norm": 2.0267772674560547, "learning_rate": 0.000120762093165654, "loss": 2.1342, "step": 16830 }, { "epoch": 0.39658989213885354, "grad_norm": 1.9653459787368774, "learning_rate": 0.00012071499222834534, "loss": 2.1676, "step": 16840 }, { "epoch": 0.3968253968253968, "grad_norm": 2.003641366958618, "learning_rate": 0.0001206678912910367, "loss": 2.119, "step": 16850 }, { "epoch": 0.39706090151194007, "grad_norm": 2.4826791286468506, "learning_rate": 0.00012062079035372804, "loss": 1.9719, "step": 16860 }, { "epoch": 0.39729640619848333, "grad_norm": 2.9601070880889893, "learning_rate": 0.0001205736894164194, "loss": 1.9954, "step": 16870 }, { "epoch": 0.3975319108850266, "grad_norm": 2.5066933631896973, "learning_rate": 0.00012052658847911074, "loss": 2.0153, "step": 16880 }, { "epoch": 0.39776741557156986, "grad_norm": 2.712014675140381, "learning_rate": 0.00012047948754180209, "loss": 2.2096, "step": 16890 }, { "epoch": 0.3980029202581131, "grad_norm": 1.7923743724822998, "learning_rate": 0.00012043238660449343, "loss": 2.2143, "step": 16900 }, { "epoch": 0.3982384249446564, "grad_norm": 2.5782217979431152, "learning_rate": 0.00012038528566718479, "loss": 2.2435, "step": 16910 }, { "epoch": 0.39847392963119965, "grad_norm": 2.224064350128174, "learning_rate": 0.00012033818472987613, "loss": 2.1383, "step": 16920 }, { "epoch": 0.3987094343177429, "grad_norm": 2.3517062664031982, "learning_rate": 0.00012029108379256748, "loss": 1.7965, "step": 16930 }, { "epoch": 0.3989449390042862, "grad_norm": 3.8429908752441406, "learning_rate": 0.00012024398285525882, "loss": 2.0901, "step": 16940 }, { "epoch": 0.39918044369082945, "grad_norm": 2.49701189994812, "learning_rate": 0.00012019688191795018, "loss": 2.075, "step": 16950 }, { "epoch": 0.3994159483773727, "grad_norm": 2.411953926086426, "learning_rate": 0.00012014978098064152, "loss": 2.2769, "step": 16960 }, { "epoch": 0.399651453063916, "grad_norm": 2.9988455772399902, "learning_rate": 0.00012010268004333287, "loss": 1.979, "step": 16970 }, { "epoch": 0.39988695775045924, "grad_norm": 3.0603721141815186, "learning_rate": 0.0001200555791060242, "loss": 2.0761, "step": 16980 }, { "epoch": 0.4001224624370025, "grad_norm": 2.097357988357544, "learning_rate": 0.00012000847816871557, "loss": 2.1266, "step": 16990 }, { "epoch": 0.40035796712354577, "grad_norm": 2.6588997840881348, "learning_rate": 0.0001199613772314069, "loss": 2.1037, "step": 17000 }, { "epoch": 0.40059347181008903, "grad_norm": 2.0685205459594727, "learning_rate": 0.00011991427629409825, "loss": 2.0696, "step": 17010 }, { "epoch": 0.4008289764966323, "grad_norm": 2.6148645877838135, "learning_rate": 0.00011986717535678959, "loss": 2.0947, "step": 17020 }, { "epoch": 0.40106448118317556, "grad_norm": 2.0467002391815186, "learning_rate": 0.00011982007441948096, "loss": 2.2171, "step": 17030 }, { "epoch": 0.4012999858697188, "grad_norm": 2.4288554191589355, "learning_rate": 0.00011977297348217229, "loss": 2.1036, "step": 17040 }, { "epoch": 0.4015354905562621, "grad_norm": 2.3127670288085938, "learning_rate": 0.00011972587254486364, "loss": 2.0517, "step": 17050 }, { "epoch": 0.40177099524280535, "grad_norm": 2.3823368549346924, "learning_rate": 0.000119678771607555, "loss": 2.0046, "step": 17060 }, { "epoch": 0.4020064999293486, "grad_norm": 2.0719432830810547, "learning_rate": 0.00011963167067024634, "loss": 2.0568, "step": 17070 }, { "epoch": 0.4022420046158919, "grad_norm": 2.4580512046813965, "learning_rate": 0.0001195845697329377, "loss": 2.0559, "step": 17080 }, { "epoch": 0.40247750930243514, "grad_norm": 2.124021291732788, "learning_rate": 0.00011953746879562904, "loss": 2.086, "step": 17090 }, { "epoch": 0.4027130139889784, "grad_norm": 2.4031240940093994, "learning_rate": 0.00011949036785832039, "loss": 2.1624, "step": 17100 }, { "epoch": 0.4029485186755216, "grad_norm": 2.5331597328186035, "learning_rate": 0.00011944326692101173, "loss": 2.0293, "step": 17110 }, { "epoch": 0.4031840233620649, "grad_norm": 2.3105568885803223, "learning_rate": 0.0001193961659837031, "loss": 2.0536, "step": 17120 }, { "epoch": 0.40341952804860814, "grad_norm": 1.935504674911499, "learning_rate": 0.00011934906504639443, "loss": 2.0363, "step": 17130 }, { "epoch": 0.4036550327351514, "grad_norm": 1.818901777267456, "learning_rate": 0.00011930196410908578, "loss": 2.1138, "step": 17140 }, { "epoch": 0.4038905374216947, "grad_norm": 2.2937822341918945, "learning_rate": 0.00011925486317177712, "loss": 2.2075, "step": 17150 }, { "epoch": 0.40412604210823794, "grad_norm": 2.4323720932006836, "learning_rate": 0.00011920776223446848, "loss": 1.8704, "step": 17160 }, { "epoch": 0.4043615467947812, "grad_norm": 2.0773727893829346, "learning_rate": 0.00011916066129715982, "loss": 2.1124, "step": 17170 }, { "epoch": 0.40459705148132447, "grad_norm": 2.424532413482666, "learning_rate": 0.00011911356035985117, "loss": 1.9422, "step": 17180 }, { "epoch": 0.40483255616786773, "grad_norm": 2.1791512966156006, "learning_rate": 0.0001190664594225425, "loss": 2.0547, "step": 17190 }, { "epoch": 0.405068060854411, "grad_norm": 2.939730167388916, "learning_rate": 0.00011901935848523387, "loss": 1.9766, "step": 17200 }, { "epoch": 0.40530356554095426, "grad_norm": 2.1646764278411865, "learning_rate": 0.0001189722575479252, "loss": 1.9632, "step": 17210 }, { "epoch": 0.4055390702274975, "grad_norm": 3.135854721069336, "learning_rate": 0.00011892515661061656, "loss": 1.942, "step": 17220 }, { "epoch": 0.4057745749140408, "grad_norm": 2.3029401302337646, "learning_rate": 0.00011887805567330789, "loss": 2.3985, "step": 17230 }, { "epoch": 0.40601007960058405, "grad_norm": 2.5653719902038574, "learning_rate": 0.00011883095473599926, "loss": 2.125, "step": 17240 }, { "epoch": 0.4062455842871273, "grad_norm": 2.4413366317749023, "learning_rate": 0.00011878385379869059, "loss": 2.2774, "step": 17250 }, { "epoch": 0.4064810889736706, "grad_norm": 1.920428991317749, "learning_rate": 0.00011873675286138196, "loss": 2.3723, "step": 17260 }, { "epoch": 0.40671659366021384, "grad_norm": 2.025369644165039, "learning_rate": 0.00011868965192407328, "loss": 2.1473, "step": 17270 }, { "epoch": 0.4069520983467571, "grad_norm": 2.081719160079956, "learning_rate": 0.00011864255098676464, "loss": 2.2024, "step": 17280 }, { "epoch": 0.40718760303330037, "grad_norm": 1.9507285356521606, "learning_rate": 0.00011859545004945598, "loss": 2.0511, "step": 17290 }, { "epoch": 0.40742310771984364, "grad_norm": 2.085679531097412, "learning_rate": 0.00011854834911214734, "loss": 2.0069, "step": 17300 }, { "epoch": 0.4076586124063869, "grad_norm": 2.4207077026367188, "learning_rate": 0.00011850124817483868, "loss": 2.1838, "step": 17310 }, { "epoch": 0.40789411709293016, "grad_norm": 1.945299744606018, "learning_rate": 0.00011845414723753003, "loss": 2.1365, "step": 17320 }, { "epoch": 0.40812962177947343, "grad_norm": 2.0724103450775146, "learning_rate": 0.00011840704630022137, "loss": 2.1791, "step": 17330 }, { "epoch": 0.4083651264660167, "grad_norm": 2.00799298286438, "learning_rate": 0.00011835994536291273, "loss": 2.103, "step": 17340 }, { "epoch": 0.40860063115255996, "grad_norm": 2.6130778789520264, "learning_rate": 0.00011831284442560408, "loss": 2.1468, "step": 17350 }, { "epoch": 0.4088361358391032, "grad_norm": 2.042046308517456, "learning_rate": 0.00011826574348829542, "loss": 1.9138, "step": 17360 }, { "epoch": 0.4090716405256465, "grad_norm": 2.197761058807373, "learning_rate": 0.00011821864255098678, "loss": 2.0824, "step": 17370 }, { "epoch": 0.40930714521218975, "grad_norm": 2.178068161010742, "learning_rate": 0.00011817154161367812, "loss": 2.0455, "step": 17380 }, { "epoch": 0.40954264989873296, "grad_norm": 1.9603676795959473, "learning_rate": 0.00011812444067636947, "loss": 2.0522, "step": 17390 }, { "epoch": 0.4097781545852762, "grad_norm": 1.8290703296661377, "learning_rate": 0.0001180773397390608, "loss": 2.0019, "step": 17400 }, { "epoch": 0.4100136592718195, "grad_norm": 2.015998601913452, "learning_rate": 0.00011803023880175217, "loss": 2.2494, "step": 17410 }, { "epoch": 0.41024916395836275, "grad_norm": 2.149958610534668, "learning_rate": 0.0001179831378644435, "loss": 1.9823, "step": 17420 }, { "epoch": 0.410484668644906, "grad_norm": 1.841565489768982, "learning_rate": 0.00011793603692713486, "loss": 1.9568, "step": 17430 }, { "epoch": 0.4107201733314493, "grad_norm": 2.855804204940796, "learning_rate": 0.00011788893598982619, "loss": 2.1627, "step": 17440 }, { "epoch": 0.41095567801799254, "grad_norm": 2.0609381198883057, "learning_rate": 0.00011784183505251756, "loss": 2.0022, "step": 17450 }, { "epoch": 0.4111911827045358, "grad_norm": 2.362701416015625, "learning_rate": 0.0001177947341152089, "loss": 2.2441, "step": 17460 }, { "epoch": 0.41142668739107907, "grad_norm": 1.9817253351211548, "learning_rate": 0.00011774763317790026, "loss": 1.8491, "step": 17470 }, { "epoch": 0.41166219207762234, "grad_norm": 1.9875420331954956, "learning_rate": 0.0001177005322405916, "loss": 1.8271, "step": 17480 }, { "epoch": 0.4118976967641656, "grad_norm": 1.8128178119659424, "learning_rate": 0.00011765343130328294, "loss": 2.0102, "step": 17490 }, { "epoch": 0.41213320145070886, "grad_norm": 2.7656726837158203, "learning_rate": 0.00011760633036597428, "loss": 2.0932, "step": 17500 }, { "epoch": 0.41236870613725213, "grad_norm": 2.31899094581604, "learning_rate": 0.00011755922942866564, "loss": 2.3904, "step": 17510 }, { "epoch": 0.4126042108237954, "grad_norm": 1.7432920932769775, "learning_rate": 0.00011751212849135698, "loss": 2.0688, "step": 17520 }, { "epoch": 0.41283971551033866, "grad_norm": 2.6849148273468018, "learning_rate": 0.00011746502755404833, "loss": 1.9904, "step": 17530 }, { "epoch": 0.4130752201968819, "grad_norm": 2.1877646446228027, "learning_rate": 0.00011741792661673967, "loss": 2.1276, "step": 17540 }, { "epoch": 0.4133107248834252, "grad_norm": 2.3878908157348633, "learning_rate": 0.00011737082567943103, "loss": 2.0121, "step": 17550 }, { "epoch": 0.41354622956996845, "grad_norm": 2.5052924156188965, "learning_rate": 0.00011732372474212237, "loss": 2.1646, "step": 17560 }, { "epoch": 0.4137817342565117, "grad_norm": 1.9085092544555664, "learning_rate": 0.00011727662380481372, "loss": 2.0815, "step": 17570 }, { "epoch": 0.414017238943055, "grad_norm": 2.4492745399475098, "learning_rate": 0.00011722952286750506, "loss": 2.0763, "step": 17580 }, { "epoch": 0.41425274362959824, "grad_norm": 2.189110279083252, "learning_rate": 0.00011718242193019642, "loss": 2.114, "step": 17590 }, { "epoch": 0.4144882483161415, "grad_norm": 2.4104158878326416, "learning_rate": 0.00011713532099288776, "loss": 2.19, "step": 17600 }, { "epoch": 0.41472375300268477, "grad_norm": 1.884446620941162, "learning_rate": 0.0001170882200555791, "loss": 1.9784, "step": 17610 }, { "epoch": 0.41495925768922803, "grad_norm": 2.4759879112243652, "learning_rate": 0.00011704111911827047, "loss": 2.2371, "step": 17620 }, { "epoch": 0.4151947623757713, "grad_norm": 2.4873902797698975, "learning_rate": 0.00011699401818096181, "loss": 2.1282, "step": 17630 }, { "epoch": 0.41543026706231456, "grad_norm": 1.9872322082519531, "learning_rate": 0.00011694691724365317, "loss": 2.1676, "step": 17640 }, { "epoch": 0.4156657717488578, "grad_norm": 2.117286205291748, "learning_rate": 0.0001168998163063445, "loss": 2.1725, "step": 17650 }, { "epoch": 0.4159012764354011, "grad_norm": 2.261207342147827, "learning_rate": 0.00011685271536903586, "loss": 2.1271, "step": 17660 }, { "epoch": 0.4161367811219443, "grad_norm": 2.076218843460083, "learning_rate": 0.0001168056144317272, "loss": 1.926, "step": 17670 }, { "epoch": 0.41637228580848756, "grad_norm": 2.5241241455078125, "learning_rate": 0.00011675851349441856, "loss": 1.9504, "step": 17680 }, { "epoch": 0.4166077904950308, "grad_norm": 2.2576680183410645, "learning_rate": 0.0001167114125571099, "loss": 2.2149, "step": 17690 }, { "epoch": 0.4168432951815741, "grad_norm": 2.1343159675598145, "learning_rate": 0.00011666431161980125, "loss": 2.1592, "step": 17700 }, { "epoch": 0.41707879986811736, "grad_norm": 2.8269872665405273, "learning_rate": 0.00011661721068249258, "loss": 1.8489, "step": 17710 }, { "epoch": 0.4173143045546606, "grad_norm": 2.6557393074035645, "learning_rate": 0.00011657010974518395, "loss": 2.3097, "step": 17720 }, { "epoch": 0.4175498092412039, "grad_norm": 1.9406561851501465, "learning_rate": 0.00011652300880787528, "loss": 2.2424, "step": 17730 }, { "epoch": 0.41778531392774715, "grad_norm": 2.8886189460754395, "learning_rate": 0.00011647590787056663, "loss": 1.9974, "step": 17740 }, { "epoch": 0.4180208186142904, "grad_norm": 2.958347797393799, "learning_rate": 0.00011642880693325797, "loss": 2.0821, "step": 17750 }, { "epoch": 0.4182563233008337, "grad_norm": 2.3907742500305176, "learning_rate": 0.00011638170599594933, "loss": 2.2051, "step": 17760 }, { "epoch": 0.41849182798737694, "grad_norm": 2.605231523513794, "learning_rate": 0.00011633460505864067, "loss": 2.1911, "step": 17770 }, { "epoch": 0.4187273326739202, "grad_norm": 2.1181795597076416, "learning_rate": 0.00011628750412133202, "loss": 1.8307, "step": 17780 }, { "epoch": 0.41896283736046347, "grad_norm": 2.3362369537353516, "learning_rate": 0.00011624040318402336, "loss": 1.9971, "step": 17790 }, { "epoch": 0.41919834204700673, "grad_norm": 2.4740500450134277, "learning_rate": 0.00011619330224671472, "loss": 2.0515, "step": 17800 }, { "epoch": 0.41943384673355, "grad_norm": 2.383115768432617, "learning_rate": 0.00011614620130940606, "loss": 2.1157, "step": 17810 }, { "epoch": 0.41966935142009326, "grad_norm": 1.487096905708313, "learning_rate": 0.00011609910037209741, "loss": 2.0911, "step": 17820 }, { "epoch": 0.4199048561066365, "grad_norm": 2.061518430709839, "learning_rate": 0.00011605199943478874, "loss": 2.2484, "step": 17830 }, { "epoch": 0.4201403607931798, "grad_norm": 2.2005579471588135, "learning_rate": 0.00011600489849748011, "loss": 2.0508, "step": 17840 }, { "epoch": 0.42037586547972305, "grad_norm": 2.021574020385742, "learning_rate": 0.00011595779756017144, "loss": 2.0133, "step": 17850 }, { "epoch": 0.4206113701662663, "grad_norm": 2.3463683128356934, "learning_rate": 0.00011591069662286281, "loss": 1.9324, "step": 17860 }, { "epoch": 0.4208468748528096, "grad_norm": 2.0511763095855713, "learning_rate": 0.00011586359568555413, "loss": 1.8826, "step": 17870 }, { "epoch": 0.42108237953935285, "grad_norm": 1.8911633491516113, "learning_rate": 0.0001158164947482455, "loss": 1.9932, "step": 17880 }, { "epoch": 0.4213178842258961, "grad_norm": 2.640658140182495, "learning_rate": 0.00011576939381093683, "loss": 2.0184, "step": 17890 }, { "epoch": 0.4215533889124394, "grad_norm": 2.112999677658081, "learning_rate": 0.0001157222928736282, "loss": 2.2726, "step": 17900 }, { "epoch": 0.42178889359898264, "grad_norm": 2.526036262512207, "learning_rate": 0.00011567519193631955, "loss": 2.1118, "step": 17910 }, { "epoch": 0.4220243982855259, "grad_norm": 2.948558807373047, "learning_rate": 0.00011562809099901088, "loss": 2.2965, "step": 17920 }, { "epoch": 0.42225990297206917, "grad_norm": 2.221510648727417, "learning_rate": 0.00011558099006170225, "loss": 2.0943, "step": 17930 }, { "epoch": 0.42249540765861243, "grad_norm": 1.9982761144638062, "learning_rate": 0.00011553388912439358, "loss": 2.1344, "step": 17940 }, { "epoch": 0.4227309123451557, "grad_norm": 1.816463589668274, "learning_rate": 0.00011548678818708493, "loss": 2.1454, "step": 17950 }, { "epoch": 0.4229664170316989, "grad_norm": 1.9133336544036865, "learning_rate": 0.00011543968724977627, "loss": 2.2219, "step": 17960 }, { "epoch": 0.42320192171824217, "grad_norm": 2.87747859954834, "learning_rate": 0.00011539258631246763, "loss": 2.2701, "step": 17970 }, { "epoch": 0.42343742640478543, "grad_norm": 2.2570955753326416, "learning_rate": 0.00011534548537515897, "loss": 2.2363, "step": 17980 }, { "epoch": 0.4236729310913287, "grad_norm": 2.1838762760162354, "learning_rate": 0.00011529838443785032, "loss": 2.1544, "step": 17990 }, { "epoch": 0.42390843577787196, "grad_norm": 2.118103504180908, "learning_rate": 0.00011525128350054166, "loss": 2.2437, "step": 18000 }, { "epoch": 0.4241439404644152, "grad_norm": 1.956613540649414, "learning_rate": 0.00011520418256323302, "loss": 2.0544, "step": 18010 }, { "epoch": 0.4243794451509585, "grad_norm": 2.5147013664245605, "learning_rate": 0.00011515708162592436, "loss": 2.1949, "step": 18020 }, { "epoch": 0.42461494983750175, "grad_norm": 1.6883703470230103, "learning_rate": 0.00011510998068861571, "loss": 2.1606, "step": 18030 }, { "epoch": 0.424850454524045, "grad_norm": 3.019367218017578, "learning_rate": 0.00011506287975130705, "loss": 2.1466, "step": 18040 }, { "epoch": 0.4250859592105883, "grad_norm": 2.5443851947784424, "learning_rate": 0.00011501577881399841, "loss": 2.0999, "step": 18050 }, { "epoch": 0.42532146389713155, "grad_norm": 1.759558081626892, "learning_rate": 0.00011496867787668975, "loss": 2.0157, "step": 18060 }, { "epoch": 0.4255569685836748, "grad_norm": 2.4723591804504395, "learning_rate": 0.00011492157693938111, "loss": 2.2185, "step": 18070 }, { "epoch": 0.4257924732702181, "grad_norm": 2.276010274887085, "learning_rate": 0.00011487447600207245, "loss": 2.197, "step": 18080 }, { "epoch": 0.42602797795676134, "grad_norm": 2.547868013381958, "learning_rate": 0.0001148273750647638, "loss": 2.0393, "step": 18090 }, { "epoch": 0.4262634826433046, "grad_norm": 2.285099983215332, "learning_rate": 0.00011478027412745513, "loss": 1.9714, "step": 18100 }, { "epoch": 0.42649898732984787, "grad_norm": 1.7757441997528076, "learning_rate": 0.0001147331731901465, "loss": 2.0319, "step": 18110 }, { "epoch": 0.42673449201639113, "grad_norm": 2.2466018199920654, "learning_rate": 0.00011468607225283783, "loss": 2.0001, "step": 18120 }, { "epoch": 0.4269699967029344, "grad_norm": 2.274280548095703, "learning_rate": 0.00011463897131552918, "loss": 2.0503, "step": 18130 }, { "epoch": 0.42720550138947766, "grad_norm": 2.588524341583252, "learning_rate": 0.00011459187037822052, "loss": 2.3182, "step": 18140 }, { "epoch": 0.4274410060760209, "grad_norm": 2.081986904144287, "learning_rate": 0.00011454476944091188, "loss": 2.1953, "step": 18150 }, { "epoch": 0.4276765107625642, "grad_norm": 2.046691417694092, "learning_rate": 0.00011449766850360322, "loss": 2.1025, "step": 18160 }, { "epoch": 0.42791201544910745, "grad_norm": 2.94069504737854, "learning_rate": 0.00011445056756629457, "loss": 2.2368, "step": 18170 }, { "epoch": 0.4281475201356507, "grad_norm": 2.245175361633301, "learning_rate": 0.00011440346662898591, "loss": 2.1502, "step": 18180 }, { "epoch": 0.428383024822194, "grad_norm": 2.029442310333252, "learning_rate": 0.00011435636569167727, "loss": 2.2961, "step": 18190 }, { "epoch": 0.42861852950873724, "grad_norm": 2.311154842376709, "learning_rate": 0.00011430926475436862, "loss": 2.0729, "step": 18200 }, { "epoch": 0.4288540341952805, "grad_norm": 2.393167018890381, "learning_rate": 0.00011426216381705996, "loss": 2.0608, "step": 18210 }, { "epoch": 0.4290895388818238, "grad_norm": 2.606497049331665, "learning_rate": 0.00011421506287975132, "loss": 2.0255, "step": 18220 }, { "epoch": 0.42932504356836704, "grad_norm": 1.9125550985336304, "learning_rate": 0.00011416796194244266, "loss": 2.0945, "step": 18230 }, { "epoch": 0.42956054825491025, "grad_norm": 2.4020402431488037, "learning_rate": 0.00011412086100513402, "loss": 2.0022, "step": 18240 }, { "epoch": 0.4297960529414535, "grad_norm": 1.5923528671264648, "learning_rate": 0.00011407376006782535, "loss": 2.0348, "step": 18250 }, { "epoch": 0.4300315576279968, "grad_norm": 2.0022480487823486, "learning_rate": 0.00011402665913051671, "loss": 2.1634, "step": 18260 }, { "epoch": 0.43026706231454004, "grad_norm": 2.2088112831115723, "learning_rate": 0.00011397955819320805, "loss": 2.2046, "step": 18270 }, { "epoch": 0.4305025670010833, "grad_norm": 1.8067007064819336, "learning_rate": 0.00011393245725589941, "loss": 2.0242, "step": 18280 }, { "epoch": 0.43073807168762657, "grad_norm": 2.725207567214966, "learning_rate": 0.00011388535631859075, "loss": 1.8446, "step": 18290 }, { "epoch": 0.43097357637416983, "grad_norm": 3.046445608139038, "learning_rate": 0.0001138382553812821, "loss": 2.1722, "step": 18300 }, { "epoch": 0.4312090810607131, "grad_norm": 2.3470003604888916, "learning_rate": 0.00011379115444397343, "loss": 2.0065, "step": 18310 }, { "epoch": 0.43144458574725636, "grad_norm": 3.09189772605896, "learning_rate": 0.0001137440535066648, "loss": 1.9948, "step": 18320 }, { "epoch": 0.4316800904337996, "grad_norm": 2.13586163520813, "learning_rate": 0.00011369695256935613, "loss": 1.9383, "step": 18330 }, { "epoch": 0.4319155951203429, "grad_norm": 2.6252524852752686, "learning_rate": 0.00011364985163204748, "loss": 1.9324, "step": 18340 }, { "epoch": 0.43215109980688615, "grad_norm": 2.855048179626465, "learning_rate": 0.00011360275069473882, "loss": 2.3409, "step": 18350 }, { "epoch": 0.4323866044934294, "grad_norm": 2.435730457305908, "learning_rate": 0.00011355564975743019, "loss": 2.1404, "step": 18360 }, { "epoch": 0.4326221091799727, "grad_norm": 2.411512613296509, "learning_rate": 0.00011350854882012152, "loss": 2.2621, "step": 18370 }, { "epoch": 0.43285761386651594, "grad_norm": 2.974522590637207, "learning_rate": 0.00011346144788281287, "loss": 2.1951, "step": 18380 }, { "epoch": 0.4330931185530592, "grad_norm": 1.7726364135742188, "learning_rate": 0.00011341434694550421, "loss": 1.8892, "step": 18390 }, { "epoch": 0.4333286232396025, "grad_norm": 2.825831174850464, "learning_rate": 0.00011336724600819557, "loss": 2.1635, "step": 18400 }, { "epoch": 0.43356412792614574, "grad_norm": 2.5960309505462646, "learning_rate": 0.00011332014507088691, "loss": 2.0728, "step": 18410 }, { "epoch": 0.433799632612689, "grad_norm": 2.0427327156066895, "learning_rate": 0.00011327304413357826, "loss": 2.0275, "step": 18420 }, { "epoch": 0.43403513729923227, "grad_norm": 2.6198508739471436, "learning_rate": 0.0001132259431962696, "loss": 2.0241, "step": 18430 }, { "epoch": 0.43427064198577553, "grad_norm": 2.224815845489502, "learning_rate": 0.00011317884225896096, "loss": 2.0585, "step": 18440 }, { "epoch": 0.4345061466723188, "grad_norm": 2.4373631477355957, "learning_rate": 0.0001131317413216523, "loss": 2.0333, "step": 18450 }, { "epoch": 0.43474165135886206, "grad_norm": 1.711761713027954, "learning_rate": 0.00011308464038434366, "loss": 1.7788, "step": 18460 }, { "epoch": 0.4349771560454053, "grad_norm": 3.0095267295837402, "learning_rate": 0.00011303753944703501, "loss": 2.2233, "step": 18470 }, { "epoch": 0.4352126607319486, "grad_norm": 2.0507631301879883, "learning_rate": 0.00011299043850972635, "loss": 2.019, "step": 18480 }, { "epoch": 0.43544816541849185, "grad_norm": 2.733670711517334, "learning_rate": 0.00011294333757241771, "loss": 2.074, "step": 18490 }, { "epoch": 0.4356836701050351, "grad_norm": 2.1250648498535156, "learning_rate": 0.00011289623663510905, "loss": 2.0513, "step": 18500 }, { "epoch": 0.4359191747915784, "grad_norm": 3.0923218727111816, "learning_rate": 0.0001128491356978004, "loss": 2.3304, "step": 18510 }, { "epoch": 0.4361546794781216, "grad_norm": 3.0069260597229004, "learning_rate": 0.00011280203476049173, "loss": 2.2824, "step": 18520 }, { "epoch": 0.43639018416466485, "grad_norm": 2.1305959224700928, "learning_rate": 0.0001127549338231831, "loss": 2.0499, "step": 18530 }, { "epoch": 0.4366256888512081, "grad_norm": 2.1745736598968506, "learning_rate": 0.00011270783288587444, "loss": 1.9903, "step": 18540 }, { "epoch": 0.4368611935377514, "grad_norm": 2.113219738006592, "learning_rate": 0.00011266073194856579, "loss": 2.1466, "step": 18550 }, { "epoch": 0.43709669822429464, "grad_norm": 2.403501510620117, "learning_rate": 0.00011261363101125712, "loss": 2.1115, "step": 18560 }, { "epoch": 0.4373322029108379, "grad_norm": 2.2626802921295166, "learning_rate": 0.00011256653007394849, "loss": 2.089, "step": 18570 }, { "epoch": 0.4375677075973812, "grad_norm": 2.3562400341033936, "learning_rate": 0.00011251942913663982, "loss": 2.3542, "step": 18580 }, { "epoch": 0.43780321228392444, "grad_norm": 1.9294075965881348, "learning_rate": 0.00011247232819933117, "loss": 2.1626, "step": 18590 }, { "epoch": 0.4380387169704677, "grad_norm": 2.3301680088043213, "learning_rate": 0.00011242522726202251, "loss": 2.047, "step": 18600 }, { "epoch": 0.43827422165701097, "grad_norm": 2.25012469291687, "learning_rate": 0.00011237812632471387, "loss": 2.1893, "step": 18610 }, { "epoch": 0.43850972634355423, "grad_norm": 2.4669597148895264, "learning_rate": 0.00011233102538740521, "loss": 1.9656, "step": 18620 }, { "epoch": 0.4387452310300975, "grad_norm": 2.1987082958221436, "learning_rate": 0.00011228392445009656, "loss": 2.1176, "step": 18630 }, { "epoch": 0.43898073571664076, "grad_norm": 2.573086738586426, "learning_rate": 0.0001122368235127879, "loss": 1.9486, "step": 18640 }, { "epoch": 0.439216240403184, "grad_norm": 2.510371446609497, "learning_rate": 0.00011218972257547926, "loss": 2.2401, "step": 18650 }, { "epoch": 0.4394517450897273, "grad_norm": 2.1042065620422363, "learning_rate": 0.0001121426216381706, "loss": 1.9531, "step": 18660 }, { "epoch": 0.43968724977627055, "grad_norm": 2.6314985752105713, "learning_rate": 0.00011209552070086196, "loss": 2.126, "step": 18670 }, { "epoch": 0.4399227544628138, "grad_norm": 1.911363959312439, "learning_rate": 0.0001120484197635533, "loss": 2.2783, "step": 18680 }, { "epoch": 0.4401582591493571, "grad_norm": 3.6765568256378174, "learning_rate": 0.00011200131882624465, "loss": 2.0344, "step": 18690 }, { "epoch": 0.44039376383590034, "grad_norm": 1.8842840194702148, "learning_rate": 0.00011195421788893599, "loss": 2.0042, "step": 18700 }, { "epoch": 0.4406292685224436, "grad_norm": 2.389233112335205, "learning_rate": 0.00011190711695162735, "loss": 2.0097, "step": 18710 }, { "epoch": 0.44086477320898687, "grad_norm": 1.9120182991027832, "learning_rate": 0.00011186001601431869, "loss": 1.9488, "step": 18720 }, { "epoch": 0.44110027789553014, "grad_norm": 3.073310613632202, "learning_rate": 0.00011181291507701004, "loss": 1.8932, "step": 18730 }, { "epoch": 0.4413357825820734, "grad_norm": 2.4336462020874023, "learning_rate": 0.00011176581413970137, "loss": 2.028, "step": 18740 }, { "epoch": 0.44157128726861666, "grad_norm": 2.4928979873657227, "learning_rate": 0.00011171871320239274, "loss": 2.0821, "step": 18750 }, { "epoch": 0.4418067919551599, "grad_norm": 2.694518804550171, "learning_rate": 0.00011167161226508409, "loss": 2.0103, "step": 18760 }, { "epoch": 0.4420422966417032, "grad_norm": 1.9793258905410767, "learning_rate": 0.00011162451132777542, "loss": 2.0106, "step": 18770 }, { "epoch": 0.44227780132824646, "grad_norm": 3.150414228439331, "learning_rate": 0.00011157741039046679, "loss": 2.2266, "step": 18780 }, { "epoch": 0.4425133060147897, "grad_norm": 2.1798579692840576, "learning_rate": 0.00011153030945315812, "loss": 2.1064, "step": 18790 }, { "epoch": 0.44274881070133293, "grad_norm": 2.7977168560028076, "learning_rate": 0.00011148320851584947, "loss": 2.179, "step": 18800 }, { "epoch": 0.4429843153878762, "grad_norm": 1.93820059299469, "learning_rate": 0.00011143610757854081, "loss": 2.2933, "step": 18810 }, { "epoch": 0.44321982007441946, "grad_norm": 2.5885021686553955, "learning_rate": 0.00011138900664123217, "loss": 2.1091, "step": 18820 }, { "epoch": 0.4434553247609627, "grad_norm": 2.002413034439087, "learning_rate": 0.00011134190570392351, "loss": 1.8793, "step": 18830 }, { "epoch": 0.443690829447506, "grad_norm": 2.016279697418213, "learning_rate": 0.00011129480476661487, "loss": 1.9843, "step": 18840 }, { "epoch": 0.44392633413404925, "grad_norm": 1.719465732574463, "learning_rate": 0.0001112477038293062, "loss": 2.0675, "step": 18850 }, { "epoch": 0.4441618388205925, "grad_norm": 2.9975037574768066, "learning_rate": 0.00011120060289199756, "loss": 1.9329, "step": 18860 }, { "epoch": 0.4443973435071358, "grad_norm": 2.3108158111572266, "learning_rate": 0.0001111535019546889, "loss": 2.1686, "step": 18870 }, { "epoch": 0.44463284819367904, "grad_norm": 2.196322202682495, "learning_rate": 0.00011110640101738026, "loss": 2.1436, "step": 18880 }, { "epoch": 0.4448683528802223, "grad_norm": 2.0889739990234375, "learning_rate": 0.0001110593000800716, "loss": 2.1503, "step": 18890 }, { "epoch": 0.44510385756676557, "grad_norm": 1.8750911951065063, "learning_rate": 0.00011101219914276295, "loss": 2.1539, "step": 18900 }, { "epoch": 0.44533936225330883, "grad_norm": 2.5251245498657227, "learning_rate": 0.00011096509820545429, "loss": 2.0683, "step": 18910 }, { "epoch": 0.4455748669398521, "grad_norm": 2.4985053539276123, "learning_rate": 0.00011091799726814565, "loss": 2.0526, "step": 18920 }, { "epoch": 0.44581037162639536, "grad_norm": 2.0587680339813232, "learning_rate": 0.00011087089633083699, "loss": 2.1422, "step": 18930 }, { "epoch": 0.4460458763129386, "grad_norm": 2.103192090988159, "learning_rate": 0.00011082379539352834, "loss": 1.9882, "step": 18940 }, { "epoch": 0.4462813809994819, "grad_norm": 2.437361717224121, "learning_rate": 0.00011077669445621967, "loss": 2.1671, "step": 18950 }, { "epoch": 0.44651688568602516, "grad_norm": 2.0805160999298096, "learning_rate": 0.00011072959351891104, "loss": 2.0893, "step": 18960 }, { "epoch": 0.4467523903725684, "grad_norm": 2.183110475540161, "learning_rate": 0.00011068249258160237, "loss": 1.9785, "step": 18970 }, { "epoch": 0.4469878950591117, "grad_norm": 3.0522234439849854, "learning_rate": 0.00011063539164429372, "loss": 2.1078, "step": 18980 }, { "epoch": 0.44722339974565495, "grad_norm": 2.6682941913604736, "learning_rate": 0.00011058829070698506, "loss": 2.1685, "step": 18990 }, { "epoch": 0.4474589044321982, "grad_norm": 2.421186685562134, "learning_rate": 0.00011054118976967642, "loss": 2.1228, "step": 19000 }, { "epoch": 0.4476944091187415, "grad_norm": 1.8149057626724243, "learning_rate": 0.00011049408883236776, "loss": 2.1178, "step": 19010 }, { "epoch": 0.44792991380528474, "grad_norm": 2.045279026031494, "learning_rate": 0.00011044698789505911, "loss": 1.934, "step": 19020 }, { "epoch": 0.448165418491828, "grad_norm": 2.152418375015259, "learning_rate": 0.00011039988695775045, "loss": 2.0527, "step": 19030 }, { "epoch": 0.44840092317837127, "grad_norm": 2.6455652713775635, "learning_rate": 0.00011035278602044181, "loss": 1.9045, "step": 19040 }, { "epoch": 0.44863642786491453, "grad_norm": 1.9313437938690186, "learning_rate": 0.00011030568508313318, "loss": 2.0172, "step": 19050 }, { "epoch": 0.4488719325514578, "grad_norm": 2.253701686859131, "learning_rate": 0.00011025858414582451, "loss": 2.164, "step": 19060 }, { "epoch": 0.44910743723800106, "grad_norm": 1.9324719905853271, "learning_rate": 0.00011021148320851586, "loss": 2.333, "step": 19070 }, { "epoch": 0.4493429419245443, "grad_norm": 1.903435230255127, "learning_rate": 0.0001101643822712072, "loss": 2.0204, "step": 19080 }, { "epoch": 0.44957844661108753, "grad_norm": 2.1830732822418213, "learning_rate": 0.00011011728133389856, "loss": 2.2265, "step": 19090 }, { "epoch": 0.4498139512976308, "grad_norm": 2.450608253479004, "learning_rate": 0.0001100701803965899, "loss": 2.047, "step": 19100 }, { "epoch": 0.45004945598417406, "grad_norm": 2.1484196186065674, "learning_rate": 0.00011002307945928125, "loss": 1.9849, "step": 19110 }, { "epoch": 0.4502849606707173, "grad_norm": 2.1579840183258057, "learning_rate": 0.00010997597852197259, "loss": 2.1648, "step": 19120 }, { "epoch": 0.4505204653572606, "grad_norm": 2.8706860542297363, "learning_rate": 0.00010992887758466395, "loss": 1.9726, "step": 19130 }, { "epoch": 0.45075597004380386, "grad_norm": 2.238607406616211, "learning_rate": 0.00010988177664735529, "loss": 2.056, "step": 19140 }, { "epoch": 0.4509914747303471, "grad_norm": 1.9871670007705688, "learning_rate": 0.00010983467571004664, "loss": 2.1684, "step": 19150 }, { "epoch": 0.4512269794168904, "grad_norm": 2.0434229373931885, "learning_rate": 0.00010978757477273797, "loss": 2.1818, "step": 19160 }, { "epoch": 0.45146248410343365, "grad_norm": 1.8030943870544434, "learning_rate": 0.00010974047383542934, "loss": 1.9667, "step": 19170 }, { "epoch": 0.4516979887899769, "grad_norm": 2.399859666824341, "learning_rate": 0.00010969337289812067, "loss": 1.9836, "step": 19180 }, { "epoch": 0.4519334934765202, "grad_norm": 2.7550058364868164, "learning_rate": 0.00010964627196081203, "loss": 2.1151, "step": 19190 }, { "epoch": 0.45216899816306344, "grad_norm": 2.3162660598754883, "learning_rate": 0.00010959917102350336, "loss": 2.0955, "step": 19200 }, { "epoch": 0.4524045028496067, "grad_norm": 1.7464721202850342, "learning_rate": 0.00010955678017992559, "loss": 1.9926, "step": 19210 }, { "epoch": 0.45264000753614997, "grad_norm": 3.1640796661376953, "learning_rate": 0.00010950967924261693, "loss": 2.0717, "step": 19220 }, { "epoch": 0.45287551222269323, "grad_norm": 1.9667413234710693, "learning_rate": 0.00010946257830530828, "loss": 2.1224, "step": 19230 }, { "epoch": 0.4531110169092365, "grad_norm": 2.176348924636841, "learning_rate": 0.00010941547736799962, "loss": 2.0679, "step": 19240 }, { "epoch": 0.45334652159577976, "grad_norm": 2.226438045501709, "learning_rate": 0.00010936837643069098, "loss": 2.1582, "step": 19250 }, { "epoch": 0.453582026282323, "grad_norm": 2.002314805984497, "learning_rate": 0.00010932127549338232, "loss": 2.0523, "step": 19260 }, { "epoch": 0.4538175309688663, "grad_norm": 3.0285584926605225, "learning_rate": 0.00010927417455607367, "loss": 2.1907, "step": 19270 }, { "epoch": 0.45405303565540955, "grad_norm": 3.506699323654175, "learning_rate": 0.000109227073618765, "loss": 2.1037, "step": 19280 }, { "epoch": 0.4542885403419528, "grad_norm": 2.3388898372650146, "learning_rate": 0.00010917997268145637, "loss": 2.0536, "step": 19290 }, { "epoch": 0.4545240450284961, "grad_norm": 1.9860022068023682, "learning_rate": 0.00010913287174414772, "loss": 2.1253, "step": 19300 }, { "epoch": 0.45475954971503935, "grad_norm": 2.931128740310669, "learning_rate": 0.00010908577080683905, "loss": 2.1019, "step": 19310 }, { "epoch": 0.4549950544015826, "grad_norm": 1.9610087871551514, "learning_rate": 0.00010903866986953042, "loss": 2.114, "step": 19320 }, { "epoch": 0.4552305590881259, "grad_norm": 2.159877300262451, "learning_rate": 0.00010899156893222176, "loss": 1.8286, "step": 19330 }, { "epoch": 0.45546606377466914, "grad_norm": 1.8019903898239136, "learning_rate": 0.00010894446799491312, "loss": 2.1, "step": 19340 }, { "epoch": 0.4557015684612124, "grad_norm": 2.368715524673462, "learning_rate": 0.00010889736705760444, "loss": 2.1636, "step": 19350 }, { "epoch": 0.45593707314775567, "grad_norm": 2.2068965435028076, "learning_rate": 0.0001088502661202958, "loss": 2.1377, "step": 19360 }, { "epoch": 0.4561725778342989, "grad_norm": 2.2656490802764893, "learning_rate": 0.00010880316518298714, "loss": 2.1331, "step": 19370 }, { "epoch": 0.45640808252084214, "grad_norm": 1.6999800205230713, "learning_rate": 0.0001087560642456785, "loss": 1.8618, "step": 19380 }, { "epoch": 0.4566435872073854, "grad_norm": 2.0297725200653076, "learning_rate": 0.00010870896330836984, "loss": 2.0886, "step": 19390 }, { "epoch": 0.45687909189392867, "grad_norm": 2.10459566116333, "learning_rate": 0.0001086618623710612, "loss": 2.0182, "step": 19400 }, { "epoch": 0.45711459658047193, "grad_norm": 2.357388496398926, "learning_rate": 0.00010861476143375253, "loss": 1.9764, "step": 19410 }, { "epoch": 0.4573501012670152, "grad_norm": 2.0262765884399414, "learning_rate": 0.0001085676604964439, "loss": 2.3218, "step": 19420 }, { "epoch": 0.45758560595355846, "grad_norm": 3.8287534713745117, "learning_rate": 0.00010852055955913523, "loss": 2.1722, "step": 19430 }, { "epoch": 0.4578211106401017, "grad_norm": 2.3780245780944824, "learning_rate": 0.00010847345862182658, "loss": 2.0486, "step": 19440 }, { "epoch": 0.458056615326645, "grad_norm": 2.3219287395477295, "learning_rate": 0.00010842635768451792, "loss": 2.2132, "step": 19450 }, { "epoch": 0.45829212001318825, "grad_norm": 2.278029680252075, "learning_rate": 0.00010837925674720928, "loss": 1.9851, "step": 19460 }, { "epoch": 0.4585276246997315, "grad_norm": 2.079396963119507, "learning_rate": 0.00010833215580990062, "loss": 2.2242, "step": 19470 }, { "epoch": 0.4587631293862748, "grad_norm": 2.292618751525879, "learning_rate": 0.00010828505487259197, "loss": 1.912, "step": 19480 }, { "epoch": 0.45899863407281805, "grad_norm": 2.677804708480835, "learning_rate": 0.0001082379539352833, "loss": 2.1219, "step": 19490 }, { "epoch": 0.4592341387593613, "grad_norm": 2.149505138397217, "learning_rate": 0.00010819085299797467, "loss": 2.0128, "step": 19500 }, { "epoch": 0.4594696434459046, "grad_norm": 2.596064329147339, "learning_rate": 0.000108143752060666, "loss": 2.1132, "step": 19510 }, { "epoch": 0.45970514813244784, "grad_norm": 2.409451484680176, "learning_rate": 0.00010809665112335736, "loss": 1.9036, "step": 19520 }, { "epoch": 0.4599406528189911, "grad_norm": 2.5537643432617188, "learning_rate": 0.00010804955018604869, "loss": 1.9008, "step": 19530 }, { "epoch": 0.46017615750553437, "grad_norm": 1.7899531126022339, "learning_rate": 0.00010800244924874006, "loss": 1.829, "step": 19540 }, { "epoch": 0.46041166219207763, "grad_norm": 1.7132242918014526, "learning_rate": 0.00010795534831143139, "loss": 2.1235, "step": 19550 }, { "epoch": 0.4606471668786209, "grad_norm": 2.0346591472625732, "learning_rate": 0.00010790824737412276, "loss": 2.0902, "step": 19560 }, { "epoch": 0.46088267156516416, "grad_norm": 3.4156370162963867, "learning_rate": 0.00010786114643681408, "loss": 2.2832, "step": 19570 }, { "epoch": 0.4611181762517074, "grad_norm": 2.067667007446289, "learning_rate": 0.00010781404549950544, "loss": 2.1939, "step": 19580 }, { "epoch": 0.4613536809382507, "grad_norm": 1.9971282482147217, "learning_rate": 0.00010776694456219681, "loss": 2.1049, "step": 19590 }, { "epoch": 0.46158918562479395, "grad_norm": 2.331735849380493, "learning_rate": 0.00010771984362488814, "loss": 2.3262, "step": 19600 }, { "epoch": 0.4618246903113372, "grad_norm": 2.4964492321014404, "learning_rate": 0.0001076727426875795, "loss": 1.8584, "step": 19610 }, { "epoch": 0.4620601949978805, "grad_norm": 2.145075559616089, "learning_rate": 0.00010762564175027083, "loss": 2.1807, "step": 19620 }, { "epoch": 0.46229569968442374, "grad_norm": 2.1231913566589355, "learning_rate": 0.00010758325090669306, "loss": 2.0905, "step": 19630 }, { "epoch": 0.462531204370967, "grad_norm": 2.6678659915924072, "learning_rate": 0.00010753614996938439, "loss": 1.9073, "step": 19640 }, { "epoch": 0.4627667090575102, "grad_norm": 1.718123435974121, "learning_rate": 0.00010748904903207575, "loss": 2.1715, "step": 19650 }, { "epoch": 0.4630022137440535, "grad_norm": 2.190896511077881, "learning_rate": 0.00010744194809476709, "loss": 2.1435, "step": 19660 }, { "epoch": 0.46323771843059675, "grad_norm": 2.8094241619110107, "learning_rate": 0.00010739484715745845, "loss": 1.9762, "step": 19670 }, { "epoch": 0.46347322311714, "grad_norm": 1.8374676704406738, "learning_rate": 0.00010734774622014979, "loss": 2.0374, "step": 19680 }, { "epoch": 0.4637087278036833, "grad_norm": 2.1029560565948486, "learning_rate": 0.00010730064528284114, "loss": 2.3126, "step": 19690 }, { "epoch": 0.46394423249022654, "grad_norm": 2.3267154693603516, "learning_rate": 0.00010725354434553247, "loss": 2.0234, "step": 19700 }, { "epoch": 0.4641797371767698, "grad_norm": 2.216888189315796, "learning_rate": 0.00010720644340822384, "loss": 2.0581, "step": 19710 }, { "epoch": 0.46441524186331307, "grad_norm": 2.1676642894744873, "learning_rate": 0.00010715934247091517, "loss": 2.0509, "step": 19720 }, { "epoch": 0.46465074654985633, "grad_norm": 2.206120014190674, "learning_rate": 0.00010711224153360652, "loss": 2.0382, "step": 19730 }, { "epoch": 0.4648862512363996, "grad_norm": 2.1404688358306885, "learning_rate": 0.00010706514059629786, "loss": 2.086, "step": 19740 }, { "epoch": 0.46512175592294286, "grad_norm": 2.2367289066314697, "learning_rate": 0.00010701803965898922, "loss": 2.2163, "step": 19750 }, { "epoch": 0.4653572606094861, "grad_norm": 1.8468304872512817, "learning_rate": 0.00010697093872168056, "loss": 2.1602, "step": 19760 }, { "epoch": 0.4655927652960294, "grad_norm": 2.4168541431427, "learning_rate": 0.00010692383778437191, "loss": 2.0497, "step": 19770 }, { "epoch": 0.46582826998257265, "grad_norm": 1.9554232358932495, "learning_rate": 0.00010687673684706325, "loss": 2.1027, "step": 19780 }, { "epoch": 0.4660637746691159, "grad_norm": 2.9504382610321045, "learning_rate": 0.00010682963590975461, "loss": 1.9, "step": 19790 }, { "epoch": 0.4662992793556592, "grad_norm": 3.071281671524048, "learning_rate": 0.00010678253497244595, "loss": 2.0025, "step": 19800 }, { "epoch": 0.46653478404220244, "grad_norm": 2.1430885791778564, "learning_rate": 0.0001067354340351373, "loss": 1.8642, "step": 19810 }, { "epoch": 0.4667702887287457, "grad_norm": 1.6342874765396118, "learning_rate": 0.00010668833309782864, "loss": 2.0941, "step": 19820 }, { "epoch": 0.467005793415289, "grad_norm": 1.8183525800704956, "learning_rate": 0.00010664123216052, "loss": 2.0612, "step": 19830 }, { "epoch": 0.46724129810183224, "grad_norm": 2.2085859775543213, "learning_rate": 0.00010659413122321136, "loss": 2.0461, "step": 19840 }, { "epoch": 0.4674768027883755, "grad_norm": 2.12282395362854, "learning_rate": 0.0001065470302859027, "loss": 2.0324, "step": 19850 }, { "epoch": 0.46771230747491876, "grad_norm": 2.824951648712158, "learning_rate": 0.00010649992934859405, "loss": 1.9978, "step": 19860 }, { "epoch": 0.46794781216146203, "grad_norm": 1.9392645359039307, "learning_rate": 0.00010645282841128539, "loss": 2.2445, "step": 19870 }, { "epoch": 0.4681833168480053, "grad_norm": 2.1611478328704834, "learning_rate": 0.00010640572747397675, "loss": 2.1556, "step": 19880 }, { "epoch": 0.46841882153454856, "grad_norm": 2.281257390975952, "learning_rate": 0.00010635862653666809, "loss": 2.0552, "step": 19890 }, { "epoch": 0.4686543262210918, "grad_norm": 2.2869932651519775, "learning_rate": 0.00010631152559935944, "loss": 2.1354, "step": 19900 }, { "epoch": 0.4688898309076351, "grad_norm": 2.583472967147827, "learning_rate": 0.00010626442466205077, "loss": 2.0894, "step": 19910 }, { "epoch": 0.46912533559417835, "grad_norm": 1.9615871906280518, "learning_rate": 0.00010621732372474214, "loss": 2.0168, "step": 19920 }, { "epoch": 0.4693608402807216, "grad_norm": 2.0687925815582275, "learning_rate": 0.00010617022278743348, "loss": 1.9977, "step": 19930 }, { "epoch": 0.4695963449672648, "grad_norm": 1.8753052949905396, "learning_rate": 0.00010612312185012483, "loss": 1.9732, "step": 19940 }, { "epoch": 0.4698318496538081, "grad_norm": 2.3094046115875244, "learning_rate": 0.00010607602091281616, "loss": 2.1751, "step": 19950 }, { "epoch": 0.47006735434035135, "grad_norm": 1.8566091060638428, "learning_rate": 0.00010602891997550753, "loss": 2.097, "step": 19960 }, { "epoch": 0.4703028590268946, "grad_norm": 2.8540568351745605, "learning_rate": 0.00010598181903819886, "loss": 2.0623, "step": 19970 }, { "epoch": 0.4705383637134379, "grad_norm": 2.5797126293182373, "learning_rate": 0.00010593471810089021, "loss": 2.0501, "step": 19980 }, { "epoch": 0.47077386839998114, "grad_norm": 2.1992506980895996, "learning_rate": 0.00010588761716358155, "loss": 1.9637, "step": 19990 }, { "epoch": 0.4710093730865244, "grad_norm": 1.7316781282424927, "learning_rate": 0.00010584051622627291, "loss": 2.156, "step": 20000 }, { "epoch": 0.47124487777306767, "grad_norm": 2.292141914367676, "learning_rate": 0.00010579341528896425, "loss": 2.1193, "step": 20010 }, { "epoch": 0.47148038245961094, "grad_norm": 2.6842830181121826, "learning_rate": 0.0001057463143516556, "loss": 2.0507, "step": 20020 }, { "epoch": 0.4717158871461542, "grad_norm": 1.8671191930770874, "learning_rate": 0.00010569921341434694, "loss": 1.8824, "step": 20030 }, { "epoch": 0.47195139183269746, "grad_norm": 2.062864303588867, "learning_rate": 0.0001056521124770383, "loss": 2.1032, "step": 20040 }, { "epoch": 0.47218689651924073, "grad_norm": 1.6221140623092651, "learning_rate": 0.00010560501153972964, "loss": 2.2215, "step": 20050 }, { "epoch": 0.472422401205784, "grad_norm": 3.1304547786712646, "learning_rate": 0.000105557910602421, "loss": 2.106, "step": 20060 }, { "epoch": 0.47265790589232726, "grad_norm": 2.085540533065796, "learning_rate": 0.00010551080966511234, "loss": 2.1274, "step": 20070 }, { "epoch": 0.4728934105788705, "grad_norm": 1.9874452352523804, "learning_rate": 0.00010546370872780369, "loss": 1.8995, "step": 20080 }, { "epoch": 0.4731289152654138, "grad_norm": 2.646638870239258, "learning_rate": 0.00010541660779049502, "loss": 2.1751, "step": 20090 }, { "epoch": 0.47336441995195705, "grad_norm": 2.6359567642211914, "learning_rate": 0.00010536950685318639, "loss": 1.9397, "step": 20100 }, { "epoch": 0.4735999246385003, "grad_norm": 2.21799635887146, "learning_rate": 0.00010532240591587774, "loss": 2.1049, "step": 20110 }, { "epoch": 0.4738354293250436, "grad_norm": 2.5067179203033447, "learning_rate": 0.00010527530497856908, "loss": 2.174, "step": 20120 }, { "epoch": 0.47407093401158684, "grad_norm": 2.724372148513794, "learning_rate": 0.00010522820404126044, "loss": 2.1434, "step": 20130 }, { "epoch": 0.4743064386981301, "grad_norm": 2.247986078262329, "learning_rate": 0.00010518110310395178, "loss": 2.0298, "step": 20140 }, { "epoch": 0.47454194338467337, "grad_norm": 1.7842614650726318, "learning_rate": 0.00010513400216664313, "loss": 2.0235, "step": 20150 }, { "epoch": 0.47477744807121663, "grad_norm": 2.239865303039551, "learning_rate": 0.00010508690122933446, "loss": 2.0488, "step": 20160 }, { "epoch": 0.4750129527577599, "grad_norm": 1.8381290435791016, "learning_rate": 0.00010503980029202583, "loss": 2.137, "step": 20170 }, { "epoch": 0.47524845744430316, "grad_norm": 1.837793231010437, "learning_rate": 0.00010499269935471716, "loss": 1.7239, "step": 20180 }, { "epoch": 0.4754839621308464, "grad_norm": 2.2856781482696533, "learning_rate": 0.00010494559841740851, "loss": 2.0022, "step": 20190 }, { "epoch": 0.4757194668173897, "grad_norm": 2.448535919189453, "learning_rate": 0.00010489849748009985, "loss": 2.191, "step": 20200 }, { "epoch": 0.47595497150393296, "grad_norm": 2.522270679473877, "learning_rate": 0.00010485139654279121, "loss": 2.1016, "step": 20210 }, { "epoch": 0.47619047619047616, "grad_norm": 2.3444366455078125, "learning_rate": 0.00010480429560548255, "loss": 2.1629, "step": 20220 }, { "epoch": 0.47642598087701943, "grad_norm": 1.8811753988265991, "learning_rate": 0.00010475719466817391, "loss": 1.9501, "step": 20230 }, { "epoch": 0.4766614855635627, "grad_norm": 2.2614917755126953, "learning_rate": 0.00010471009373086524, "loss": 1.9971, "step": 20240 }, { "epoch": 0.47689699025010596, "grad_norm": 2.2525382041931152, "learning_rate": 0.0001046629927935566, "loss": 2.0558, "step": 20250 }, { "epoch": 0.4771324949366492, "grad_norm": 3.0018701553344727, "learning_rate": 0.00010461589185624794, "loss": 2.1558, "step": 20260 }, { "epoch": 0.4773679996231925, "grad_norm": 2.163959264755249, "learning_rate": 0.0001045687909189393, "loss": 2.0081, "step": 20270 }, { "epoch": 0.47760350430973575, "grad_norm": 1.9860162734985352, "learning_rate": 0.00010452168998163064, "loss": 2.0474, "step": 20280 }, { "epoch": 0.477839008996279, "grad_norm": 2.7075717449188232, "learning_rate": 0.00010447458904432199, "loss": 2.0118, "step": 20290 }, { "epoch": 0.4780745136828223, "grad_norm": 2.2994916439056396, "learning_rate": 0.00010442748810701333, "loss": 1.9586, "step": 20300 }, { "epoch": 0.47831001836936554, "grad_norm": 1.9415532350540161, "learning_rate": 0.00010438038716970469, "loss": 2.1362, "step": 20310 }, { "epoch": 0.4785455230559088, "grad_norm": 2.319530725479126, "learning_rate": 0.00010433328623239603, "loss": 1.8642, "step": 20320 }, { "epoch": 0.47878102774245207, "grad_norm": 2.8633079528808594, "learning_rate": 0.00010428618529508738, "loss": 2.2701, "step": 20330 }, { "epoch": 0.47901653242899533, "grad_norm": 3.3309438228607178, "learning_rate": 0.00010423908435777871, "loss": 2.0667, "step": 20340 }, { "epoch": 0.4792520371155386, "grad_norm": 1.8573964834213257, "learning_rate": 0.00010419198342047008, "loss": 2.1726, "step": 20350 }, { "epoch": 0.47948754180208186, "grad_norm": 2.2213289737701416, "learning_rate": 0.00010414488248316141, "loss": 2.1585, "step": 20360 }, { "epoch": 0.4797230464886251, "grad_norm": 2.0454204082489014, "learning_rate": 0.00010409778154585276, "loss": 2.1965, "step": 20370 }, { "epoch": 0.4799585511751684, "grad_norm": 2.0503110885620117, "learning_rate": 0.0001040506806085441, "loss": 2.0648, "step": 20380 }, { "epoch": 0.48019405586171166, "grad_norm": 2.948557138442993, "learning_rate": 0.00010400357967123546, "loss": 1.9269, "step": 20390 }, { "epoch": 0.4804295605482549, "grad_norm": 2.1079161167144775, "learning_rate": 0.00010395647873392681, "loss": 1.9769, "step": 20400 }, { "epoch": 0.4806650652347982, "grad_norm": 2.086078643798828, "learning_rate": 0.00010390937779661815, "loss": 2.0752, "step": 20410 }, { "epoch": 0.48090056992134145, "grad_norm": 1.7482393980026245, "learning_rate": 0.00010386227685930952, "loss": 1.9872, "step": 20420 }, { "epoch": 0.4811360746078847, "grad_norm": 2.2785568237304688, "learning_rate": 0.00010381517592200085, "loss": 2.1667, "step": 20430 }, { "epoch": 0.481371579294428, "grad_norm": 2.2354800701141357, "learning_rate": 0.00010376807498469222, "loss": 2.0142, "step": 20440 }, { "epoch": 0.48160708398097124, "grad_norm": 2.2600879669189453, "learning_rate": 0.00010372097404738355, "loss": 2.1101, "step": 20450 }, { "epoch": 0.4818425886675145, "grad_norm": 2.2491447925567627, "learning_rate": 0.0001036738731100749, "loss": 2.0255, "step": 20460 }, { "epoch": 0.48207809335405777, "grad_norm": 3.314981698989868, "learning_rate": 0.00010362677217276624, "loss": 1.8906, "step": 20470 }, { "epoch": 0.48231359804060103, "grad_norm": 2.204580783843994, "learning_rate": 0.0001035796712354576, "loss": 2.235, "step": 20480 }, { "epoch": 0.4825491027271443, "grad_norm": 2.513540029525757, "learning_rate": 0.00010353257029814894, "loss": 1.9714, "step": 20490 }, { "epoch": 0.4827846074136875, "grad_norm": 2.1390328407287598, "learning_rate": 0.00010348546936084029, "loss": 2.242, "step": 20500 }, { "epoch": 0.48302011210023077, "grad_norm": 1.9267793893814087, "learning_rate": 0.00010343836842353163, "loss": 2.2083, "step": 20510 }, { "epoch": 0.48325561678677403, "grad_norm": 1.821351408958435, "learning_rate": 0.00010339126748622299, "loss": 2.0652, "step": 20520 }, { "epoch": 0.4834911214733173, "grad_norm": 2.3727545738220215, "learning_rate": 0.00010334416654891433, "loss": 2.2172, "step": 20530 }, { "epoch": 0.48372662615986056, "grad_norm": 2.450948476791382, "learning_rate": 0.00010329706561160568, "loss": 2.0812, "step": 20540 }, { "epoch": 0.4839621308464038, "grad_norm": 2.104326009750366, "learning_rate": 0.00010324996467429701, "loss": 2.1296, "step": 20550 }, { "epoch": 0.4841976355329471, "grad_norm": 1.6081938743591309, "learning_rate": 0.00010320286373698838, "loss": 2.1119, "step": 20560 }, { "epoch": 0.48443314021949035, "grad_norm": 2.259737491607666, "learning_rate": 0.00010315576279967971, "loss": 2.0855, "step": 20570 }, { "epoch": 0.4846686449060336, "grad_norm": 2.6324872970581055, "learning_rate": 0.00010310866186237106, "loss": 2.1031, "step": 20580 }, { "epoch": 0.4849041495925769, "grad_norm": 2.676846504211426, "learning_rate": 0.0001030615609250624, "loss": 1.8389, "step": 20590 }, { "epoch": 0.48513965427912015, "grad_norm": 2.062814712524414, "learning_rate": 0.00010301445998775377, "loss": 2.0708, "step": 20600 }, { "epoch": 0.4853751589656634, "grad_norm": 1.8760517835617065, "learning_rate": 0.0001029673590504451, "loss": 1.9559, "step": 20610 }, { "epoch": 0.4856106636522067, "grad_norm": 2.316432476043701, "learning_rate": 0.00010292025811313645, "loss": 2.0927, "step": 20620 }, { "epoch": 0.48584616833874994, "grad_norm": 2.2129321098327637, "learning_rate": 0.00010287315717582779, "loss": 2.0794, "step": 20630 }, { "epoch": 0.4860816730252932, "grad_norm": 2.3312692642211914, "learning_rate": 0.00010282605623851915, "loss": 1.9387, "step": 20640 }, { "epoch": 0.48631717771183647, "grad_norm": 1.807623267173767, "learning_rate": 0.00010277895530121049, "loss": 1.984, "step": 20650 }, { "epoch": 0.48655268239837973, "grad_norm": 2.3233418464660645, "learning_rate": 0.00010273185436390185, "loss": 1.8548, "step": 20660 }, { "epoch": 0.486788187084923, "grad_norm": 2.37127423286438, "learning_rate": 0.00010268475342659319, "loss": 2.2169, "step": 20670 }, { "epoch": 0.48702369177146626, "grad_norm": 2.1091103553771973, "learning_rate": 0.00010263765248928454, "loss": 2.0467, "step": 20680 }, { "epoch": 0.4872591964580095, "grad_norm": 3.436962127685547, "learning_rate": 0.0001025905515519759, "loss": 2.1332, "step": 20690 }, { "epoch": 0.4874947011445528, "grad_norm": 2.2271640300750732, "learning_rate": 0.00010254345061466724, "loss": 2.0451, "step": 20700 }, { "epoch": 0.48773020583109605, "grad_norm": 2.0807104110717773, "learning_rate": 0.00010249634967735859, "loss": 1.9054, "step": 20710 }, { "epoch": 0.4879657105176393, "grad_norm": 2.3256382942199707, "learning_rate": 0.00010244924874004993, "loss": 1.9937, "step": 20720 }, { "epoch": 0.4882012152041826, "grad_norm": 1.8084198236465454, "learning_rate": 0.00010240214780274129, "loss": 2.0964, "step": 20730 }, { "epoch": 0.48843671989072585, "grad_norm": 2.9819064140319824, "learning_rate": 0.00010235504686543263, "loss": 2.0176, "step": 20740 }, { "epoch": 0.4886722245772691, "grad_norm": 2.723691701889038, "learning_rate": 0.00010230794592812398, "loss": 2.2222, "step": 20750 }, { "epoch": 0.4889077292638124, "grad_norm": 2.697124719619751, "learning_rate": 0.00010226084499081532, "loss": 2.1921, "step": 20760 }, { "epoch": 0.48914323395035564, "grad_norm": 2.0441181659698486, "learning_rate": 0.00010221374405350668, "loss": 2.1415, "step": 20770 }, { "epoch": 0.48937873863689885, "grad_norm": 2.7658491134643555, "learning_rate": 0.00010216664311619802, "loss": 2.068, "step": 20780 }, { "epoch": 0.4896142433234421, "grad_norm": 2.291435718536377, "learning_rate": 0.00010211954217888937, "loss": 1.9272, "step": 20790 }, { "epoch": 0.4898497480099854, "grad_norm": 2.1761622428894043, "learning_rate": 0.0001020724412415807, "loss": 2.1504, "step": 20800 }, { "epoch": 0.49008525269652864, "grad_norm": 1.9706686735153198, "learning_rate": 0.00010202534030427207, "loss": 2.1641, "step": 20810 }, { "epoch": 0.4903207573830719, "grad_norm": 2.0886855125427246, "learning_rate": 0.0001019782393669634, "loss": 2.0652, "step": 20820 }, { "epoch": 0.49055626206961517, "grad_norm": 2.300699234008789, "learning_rate": 0.00010193113842965477, "loss": 1.9582, "step": 20830 }, { "epoch": 0.49079176675615843, "grad_norm": 2.276219606399536, "learning_rate": 0.00010188403749234609, "loss": 2.3446, "step": 20840 }, { "epoch": 0.4910272714427017, "grad_norm": 1.8282259702682495, "learning_rate": 0.00010183693655503745, "loss": 2.1345, "step": 20850 }, { "epoch": 0.49126277612924496, "grad_norm": 2.4664013385772705, "learning_rate": 0.00010178983561772879, "loss": 2.081, "step": 20860 }, { "epoch": 0.4914982808157882, "grad_norm": 2.2036657333374023, "learning_rate": 0.00010174273468042015, "loss": 1.9418, "step": 20870 }, { "epoch": 0.4917337855023315, "grad_norm": 2.229975938796997, "learning_rate": 0.00010169563374311149, "loss": 2.0532, "step": 20880 }, { "epoch": 0.49196929018887475, "grad_norm": 2.0748679637908936, "learning_rate": 0.00010164853280580284, "loss": 2.0162, "step": 20890 }, { "epoch": 0.492204794875418, "grad_norm": 2.1549928188323975, "learning_rate": 0.00010160143186849418, "loss": 2.1265, "step": 20900 }, { "epoch": 0.4924402995619613, "grad_norm": 2.1036789417266846, "learning_rate": 0.00010155433093118554, "loss": 2.1916, "step": 20910 }, { "epoch": 0.49267580424850455, "grad_norm": 1.9950534105300903, "learning_rate": 0.00010150722999387688, "loss": 1.9724, "step": 20920 }, { "epoch": 0.4929113089350478, "grad_norm": 1.8147923946380615, "learning_rate": 0.00010146012905656823, "loss": 1.9993, "step": 20930 }, { "epoch": 0.4931468136215911, "grad_norm": 3.3798739910125732, "learning_rate": 0.00010141302811925957, "loss": 2.1009, "step": 20940 }, { "epoch": 0.49338231830813434, "grad_norm": 2.460247039794922, "learning_rate": 0.00010136592718195093, "loss": 2.2822, "step": 20950 }, { "epoch": 0.4936178229946776, "grad_norm": 2.2641265392303467, "learning_rate": 0.00010131882624464228, "loss": 2.0259, "step": 20960 }, { "epoch": 0.49385332768122087, "grad_norm": 2.0152974128723145, "learning_rate": 0.00010127172530733362, "loss": 2.2931, "step": 20970 }, { "epoch": 0.49408883236776413, "grad_norm": 2.167557716369629, "learning_rate": 0.00010122462437002498, "loss": 1.9011, "step": 20980 }, { "epoch": 0.4943243370543074, "grad_norm": 2.114088773727417, "learning_rate": 0.00010117752343271632, "loss": 1.8406, "step": 20990 }, { "epoch": 0.49455984174085066, "grad_norm": 2.915698289871216, "learning_rate": 0.00010113042249540767, "loss": 2.1208, "step": 21000 }, { "epoch": 0.4947953464273939, "grad_norm": 2.237755060195923, "learning_rate": 0.000101083321558099, "loss": 2.0806, "step": 21010 }, { "epoch": 0.4950308511139372, "grad_norm": 2.4972400665283203, "learning_rate": 0.00010103622062079037, "loss": 2.1965, "step": 21020 }, { "epoch": 0.49526635580048045, "grad_norm": 1.718467116355896, "learning_rate": 0.0001009891196834817, "loss": 1.7707, "step": 21030 }, { "epoch": 0.4955018604870237, "grad_norm": 2.2876641750335693, "learning_rate": 0.00010094201874617307, "loss": 2.0414, "step": 21040 }, { "epoch": 0.495737365173567, "grad_norm": 2.5221569538116455, "learning_rate": 0.0001008949178088644, "loss": 1.9567, "step": 21050 }, { "epoch": 0.49597286986011024, "grad_norm": 2.7397263050079346, "learning_rate": 0.00010084781687155575, "loss": 1.938, "step": 21060 }, { "epoch": 0.49620837454665345, "grad_norm": 1.9480063915252686, "learning_rate": 0.00010080071593424709, "loss": 2.033, "step": 21070 }, { "epoch": 0.4964438792331967, "grad_norm": 2.3631954193115234, "learning_rate": 0.00010075361499693846, "loss": 2.1295, "step": 21080 }, { "epoch": 0.49667938391974, "grad_norm": 2.4683914184570312, "learning_rate": 0.00010070651405962979, "loss": 1.9156, "step": 21090 }, { "epoch": 0.49691488860628324, "grad_norm": 2.1928350925445557, "learning_rate": 0.00010065941312232114, "loss": 2.1845, "step": 21100 }, { "epoch": 0.4971503932928265, "grad_norm": 1.8933937549591064, "learning_rate": 0.00010061231218501248, "loss": 2.0413, "step": 21110 }, { "epoch": 0.4973858979793698, "grad_norm": 2.532449722290039, "learning_rate": 0.00010056521124770384, "loss": 2.1513, "step": 21120 }, { "epoch": 0.49762140266591304, "grad_norm": 2.1574230194091797, "learning_rate": 0.00010051811031039518, "loss": 1.8182, "step": 21130 }, { "epoch": 0.4978569073524563, "grad_norm": 2.202259063720703, "learning_rate": 0.00010047100937308653, "loss": 2.2352, "step": 21140 }, { "epoch": 0.49809241203899957, "grad_norm": 2.3444535732269287, "learning_rate": 0.00010042390843577787, "loss": 2.0119, "step": 21150 }, { "epoch": 0.49832791672554283, "grad_norm": 2.5278213024139404, "learning_rate": 0.00010037680749846923, "loss": 2.1794, "step": 21160 }, { "epoch": 0.4985634214120861, "grad_norm": 1.6672074794769287, "learning_rate": 0.00010032970656116057, "loss": 1.8915, "step": 21170 }, { "epoch": 0.49879892609862936, "grad_norm": 1.9059944152832031, "learning_rate": 0.00010028260562385192, "loss": 1.9642, "step": 21180 }, { "epoch": 0.4990344307851726, "grad_norm": 2.001873254776001, "learning_rate": 0.00010023550468654325, "loss": 2.1203, "step": 21190 }, { "epoch": 0.4992699354717159, "grad_norm": 2.2856180667877197, "learning_rate": 0.00010018840374923462, "loss": 2.1251, "step": 21200 }, { "epoch": 0.49950544015825915, "grad_norm": 3.174320936203003, "learning_rate": 0.00010014130281192595, "loss": 2.0888, "step": 21210 }, { "epoch": 0.4997409448448024, "grad_norm": 3.9642059803009033, "learning_rate": 0.0001000942018746173, "loss": 2.13, "step": 21220 }, { "epoch": 0.4999764495313457, "grad_norm": 2.8816041946411133, "learning_rate": 0.00010004710093730864, "loss": 2.2852, "step": 21230 }, { "epoch": 0.5002119542178889, "grad_norm": 2.1514241695404053, "learning_rate": 0.0001, "loss": 2.175, "step": 21240 }, { "epoch": 0.5004474589044322, "grad_norm": 2.823493242263794, "learning_rate": 9.995289906269136e-05, "loss": 1.9793, "step": 21250 }, { "epoch": 0.5006829635909754, "grad_norm": 2.3700454235076904, "learning_rate": 9.99057981253827e-05, "loss": 2.2788, "step": 21260 }, { "epoch": 0.5009184682775187, "grad_norm": 2.2594282627105713, "learning_rate": 9.985869718807404e-05, "loss": 2.0639, "step": 21270 }, { "epoch": 0.501153972964062, "grad_norm": 2.6550183296203613, "learning_rate": 9.981159625076539e-05, "loss": 2.1829, "step": 21280 }, { "epoch": 0.5013894776506053, "grad_norm": 2.1802735328674316, "learning_rate": 9.976449531345674e-05, "loss": 2.0109, "step": 21290 }, { "epoch": 0.5016249823371485, "grad_norm": 2.058255672454834, "learning_rate": 9.971739437614809e-05, "loss": 2.2871, "step": 21300 }, { "epoch": 0.5018604870236918, "grad_norm": 2.2070486545562744, "learning_rate": 9.967029343883943e-05, "loss": 2.062, "step": 21310 }, { "epoch": 0.502095991710235, "grad_norm": 2.2400360107421875, "learning_rate": 9.962319250153078e-05, "loss": 2.0432, "step": 21320 }, { "epoch": 0.5023314963967783, "grad_norm": 1.9597783088684082, "learning_rate": 9.957609156422213e-05, "loss": 2.0918, "step": 21330 }, { "epoch": 0.5025670010833215, "grad_norm": 2.0394837856292725, "learning_rate": 9.952899062691348e-05, "loss": 2.0768, "step": 21340 }, { "epoch": 0.5028025057698648, "grad_norm": 2.9763147830963135, "learning_rate": 9.948188968960482e-05, "loss": 2.2218, "step": 21350 }, { "epoch": 0.5030380104564081, "grad_norm": 2.2641072273254395, "learning_rate": 9.943478875229617e-05, "loss": 2.1642, "step": 21360 }, { "epoch": 0.5032735151429514, "grad_norm": 1.8236515522003174, "learning_rate": 9.938768781498752e-05, "loss": 2.0839, "step": 21370 }, { "epoch": 0.5035090198294946, "grad_norm": 2.490896224975586, "learning_rate": 9.934058687767887e-05, "loss": 2.2359, "step": 21380 }, { "epoch": 0.5037445245160379, "grad_norm": 2.3176302909851074, "learning_rate": 9.929348594037022e-05, "loss": 2.2307, "step": 21390 }, { "epoch": 0.5039800292025811, "grad_norm": 2.2265188694000244, "learning_rate": 9.924638500306157e-05, "loss": 2.0561, "step": 21400 }, { "epoch": 0.5042155338891244, "grad_norm": 2.182687759399414, "learning_rate": 9.919928406575292e-05, "loss": 2.1138, "step": 21410 }, { "epoch": 0.5044510385756676, "grad_norm": 2.3917243480682373, "learning_rate": 9.915218312844427e-05, "loss": 2.1989, "step": 21420 }, { "epoch": 0.504686543262211, "grad_norm": 2.182008981704712, "learning_rate": 9.91050821911356e-05, "loss": 1.9135, "step": 21430 }, { "epoch": 0.5049220479487542, "grad_norm": 2.1667606830596924, "learning_rate": 9.905798125382696e-05, "loss": 1.977, "step": 21440 }, { "epoch": 0.5051575526352975, "grad_norm": 2.5774123668670654, "learning_rate": 9.90108803165183e-05, "loss": 2.0104, "step": 21450 }, { "epoch": 0.5053930573218407, "grad_norm": 2.3878164291381836, "learning_rate": 9.896377937920966e-05, "loss": 1.9225, "step": 21460 }, { "epoch": 0.505628562008384, "grad_norm": 2.676603078842163, "learning_rate": 9.8916678441901e-05, "loss": 2.1486, "step": 21470 }, { "epoch": 0.5058640666949272, "grad_norm": 1.9781603813171387, "learning_rate": 9.886957750459234e-05, "loss": 2.0173, "step": 21480 }, { "epoch": 0.5060995713814704, "grad_norm": 2.2742934226989746, "learning_rate": 9.882247656728369e-05, "loss": 2.0306, "step": 21490 }, { "epoch": 0.5063350760680138, "grad_norm": 1.8691270351409912, "learning_rate": 9.877537562997504e-05, "loss": 1.8714, "step": 21500 }, { "epoch": 0.506570580754557, "grad_norm": 2.0995771884918213, "learning_rate": 9.87282746926664e-05, "loss": 1.931, "step": 21510 }, { "epoch": 0.5068060854411003, "grad_norm": 2.2483112812042236, "learning_rate": 9.868117375535773e-05, "loss": 2.1824, "step": 21520 }, { "epoch": 0.5070415901276435, "grad_norm": 2.2055604457855225, "learning_rate": 9.863407281804908e-05, "loss": 1.8965, "step": 21530 }, { "epoch": 0.5072770948141868, "grad_norm": 2.187610626220703, "learning_rate": 9.858697188074043e-05, "loss": 2.1238, "step": 21540 }, { "epoch": 0.50751259950073, "grad_norm": 2.0155301094055176, "learning_rate": 9.853987094343178e-05, "loss": 2.2547, "step": 21550 }, { "epoch": 0.5077481041872733, "grad_norm": 2.0881471633911133, "learning_rate": 9.849277000612313e-05, "loss": 2.05, "step": 21560 }, { "epoch": 0.5079836088738166, "grad_norm": 2.319646120071411, "learning_rate": 9.844566906881447e-05, "loss": 1.9783, "step": 21570 }, { "epoch": 0.5082191135603599, "grad_norm": 2.136197805404663, "learning_rate": 9.839856813150582e-05, "loss": 2.0, "step": 21580 }, { "epoch": 0.5084546182469031, "grad_norm": 2.998568058013916, "learning_rate": 9.835146719419717e-05, "loss": 2.1183, "step": 21590 }, { "epoch": 0.5086901229334464, "grad_norm": 2.355353355407715, "learning_rate": 9.830436625688852e-05, "loss": 2.2414, "step": 21600 }, { "epoch": 0.5089256276199896, "grad_norm": 1.928250789642334, "learning_rate": 9.825726531957986e-05, "loss": 1.8088, "step": 21610 }, { "epoch": 0.5091611323065329, "grad_norm": 3.0732262134552, "learning_rate": 9.82101643822712e-05, "loss": 1.9908, "step": 21620 }, { "epoch": 0.5093966369930761, "grad_norm": 3.366936445236206, "learning_rate": 9.816306344496256e-05, "loss": 2.067, "step": 21630 }, { "epoch": 0.5096321416796195, "grad_norm": 2.2484872341156006, "learning_rate": 9.81159625076539e-05, "loss": 2.1143, "step": 21640 }, { "epoch": 0.5098676463661627, "grad_norm": 2.183173894882202, "learning_rate": 9.806886157034524e-05, "loss": 2.1001, "step": 21650 }, { "epoch": 0.510103151052706, "grad_norm": 2.780048131942749, "learning_rate": 9.802176063303659e-05, "loss": 2.2141, "step": 21660 }, { "epoch": 0.5103386557392492, "grad_norm": 2.5140750408172607, "learning_rate": 9.797465969572796e-05, "loss": 2.136, "step": 21670 }, { "epoch": 0.5105741604257925, "grad_norm": 2.4135138988494873, "learning_rate": 9.792755875841931e-05, "loss": 1.924, "step": 21680 }, { "epoch": 0.5108096651123357, "grad_norm": 2.8856024742126465, "learning_rate": 9.788045782111064e-05, "loss": 1.9122, "step": 21690 }, { "epoch": 0.511045169798879, "grad_norm": 2.015821695327759, "learning_rate": 9.7833356883802e-05, "loss": 2.2464, "step": 21700 }, { "epoch": 0.5112806744854222, "grad_norm": 3.2356677055358887, "learning_rate": 9.778625594649334e-05, "loss": 2.0949, "step": 21710 }, { "epoch": 0.5115161791719656, "grad_norm": 1.8908910751342773, "learning_rate": 9.77391550091847e-05, "loss": 1.8944, "step": 21720 }, { "epoch": 0.5117516838585088, "grad_norm": 1.9458872079849243, "learning_rate": 9.769205407187603e-05, "loss": 2.1298, "step": 21730 }, { "epoch": 0.5119871885450521, "grad_norm": 2.1977505683898926, "learning_rate": 9.764495313456738e-05, "loss": 2.3225, "step": 21740 }, { "epoch": 0.5122226932315953, "grad_norm": 2.247251033782959, "learning_rate": 9.759785219725873e-05, "loss": 2.0982, "step": 21750 }, { "epoch": 0.5124581979181386, "grad_norm": 2.334291934967041, "learning_rate": 9.755075125995008e-05, "loss": 2.144, "step": 21760 }, { "epoch": 0.5126937026046818, "grad_norm": 2.0786211490631104, "learning_rate": 9.750365032264143e-05, "loss": 2.1501, "step": 21770 }, { "epoch": 0.512929207291225, "grad_norm": 2.1926138401031494, "learning_rate": 9.745654938533277e-05, "loss": 2.0627, "step": 21780 }, { "epoch": 0.5131647119777684, "grad_norm": 2.517853021621704, "learning_rate": 9.740944844802412e-05, "loss": 2.0631, "step": 21790 }, { "epoch": 0.5134002166643116, "grad_norm": 1.8308531045913696, "learning_rate": 9.736234751071547e-05, "loss": 2.1762, "step": 21800 }, { "epoch": 0.5136357213508549, "grad_norm": 2.1084744930267334, "learning_rate": 9.731524657340682e-05, "loss": 1.9098, "step": 21810 }, { "epoch": 0.5138712260373981, "grad_norm": 2.3798017501831055, "learning_rate": 9.726814563609816e-05, "loss": 1.8055, "step": 21820 }, { "epoch": 0.5141067307239414, "grad_norm": 2.443279266357422, "learning_rate": 9.72210446987895e-05, "loss": 2.0454, "step": 21830 }, { "epoch": 0.5143422354104846, "grad_norm": 2.0576086044311523, "learning_rate": 9.717394376148086e-05, "loss": 2.2606, "step": 21840 }, { "epoch": 0.514577740097028, "grad_norm": 1.869069218635559, "learning_rate": 9.712684282417221e-05, "loss": 1.8852, "step": 21850 }, { "epoch": 0.5148132447835712, "grad_norm": 1.9165115356445312, "learning_rate": 9.707974188686356e-05, "loss": 1.9244, "step": 21860 }, { "epoch": 0.5150487494701145, "grad_norm": 1.7431786060333252, "learning_rate": 9.70326409495549e-05, "loss": 1.9097, "step": 21870 }, { "epoch": 0.5152842541566577, "grad_norm": 2.2474775314331055, "learning_rate": 9.698554001224624e-05, "loss": 2.1901, "step": 21880 }, { "epoch": 0.515519758843201, "grad_norm": 1.9225223064422607, "learning_rate": 9.69384390749376e-05, "loss": 2.217, "step": 21890 }, { "epoch": 0.5157552635297442, "grad_norm": 1.9866782426834106, "learning_rate": 9.689133813762894e-05, "loss": 1.8987, "step": 21900 }, { "epoch": 0.5159907682162875, "grad_norm": 2.126774549484253, "learning_rate": 9.684423720032028e-05, "loss": 2.0953, "step": 21910 }, { "epoch": 0.5162262729028307, "grad_norm": 1.8921127319335938, "learning_rate": 9.679713626301163e-05, "loss": 2.0589, "step": 21920 }, { "epoch": 0.5164617775893741, "grad_norm": 2.78843092918396, "learning_rate": 9.675003532570298e-05, "loss": 1.9555, "step": 21930 }, { "epoch": 0.5166972822759173, "grad_norm": 2.5705997943878174, "learning_rate": 9.670293438839433e-05, "loss": 2.0471, "step": 21940 }, { "epoch": 0.5169327869624606, "grad_norm": 1.8780876398086548, "learning_rate": 9.665583345108568e-05, "loss": 2.0409, "step": 21950 }, { "epoch": 0.5171682916490038, "grad_norm": 2.1440744400024414, "learning_rate": 9.660873251377703e-05, "loss": 2.0148, "step": 21960 }, { "epoch": 0.5174037963355471, "grad_norm": 2.174579381942749, "learning_rate": 9.656163157646838e-05, "loss": 2.1547, "step": 21970 }, { "epoch": 0.5176393010220903, "grad_norm": 3.240619421005249, "learning_rate": 9.651453063915973e-05, "loss": 2.0667, "step": 21980 }, { "epoch": 0.5178748057086336, "grad_norm": 2.3205692768096924, "learning_rate": 9.646742970185107e-05, "loss": 1.8602, "step": 21990 }, { "epoch": 0.5181103103951769, "grad_norm": 3.049022912979126, "learning_rate": 9.642032876454242e-05, "loss": 2.0942, "step": 22000 }, { "epoch": 0.5183458150817202, "grad_norm": 2.585932970046997, "learning_rate": 9.637322782723377e-05, "loss": 1.9998, "step": 22010 }, { "epoch": 0.5185813197682634, "grad_norm": 2.4109232425689697, "learning_rate": 9.632612688992512e-05, "loss": 2.075, "step": 22020 }, { "epoch": 0.5188168244548067, "grad_norm": 2.334937334060669, "learning_rate": 9.627902595261646e-05, "loss": 1.8412, "step": 22030 }, { "epoch": 0.5190523291413499, "grad_norm": 3.0095884799957275, "learning_rate": 9.623192501530781e-05, "loss": 1.918, "step": 22040 }, { "epoch": 0.5192878338278932, "grad_norm": 1.9258203506469727, "learning_rate": 9.618482407799916e-05, "loss": 1.9809, "step": 22050 }, { "epoch": 0.5195233385144364, "grad_norm": 1.8226529359817505, "learning_rate": 9.613772314069051e-05, "loss": 1.9827, "step": 22060 }, { "epoch": 0.5197588432009796, "grad_norm": 2.051090955734253, "learning_rate": 9.609062220338186e-05, "loss": 1.9504, "step": 22070 }, { "epoch": 0.519994347887523, "grad_norm": 2.140054941177368, "learning_rate": 9.60435212660732e-05, "loss": 2.0869, "step": 22080 }, { "epoch": 0.5202298525740662, "grad_norm": 2.1044740676879883, "learning_rate": 9.599642032876455e-05, "loss": 1.9475, "step": 22090 }, { "epoch": 0.5204653572606095, "grad_norm": 1.9756555557250977, "learning_rate": 9.59493193914559e-05, "loss": 2.0775, "step": 22100 }, { "epoch": 0.5207008619471527, "grad_norm": 2.549635171890259, "learning_rate": 9.590221845414725e-05, "loss": 1.8268, "step": 22110 }, { "epoch": 0.520936366633696, "grad_norm": 2.121861696243286, "learning_rate": 9.585511751683858e-05, "loss": 2.103, "step": 22120 }, { "epoch": 0.5211718713202392, "grad_norm": 2.389665365219116, "learning_rate": 9.580801657952993e-05, "loss": 2.2068, "step": 22130 }, { "epoch": 0.5214073760067826, "grad_norm": 2.340815782546997, "learning_rate": 9.576091564222128e-05, "loss": 2.0227, "step": 22140 }, { "epoch": 0.5216428806933258, "grad_norm": 2.191857099533081, "learning_rate": 9.571381470491263e-05, "loss": 2.1419, "step": 22150 }, { "epoch": 0.5218783853798691, "grad_norm": 3.036268711090088, "learning_rate": 9.566671376760398e-05, "loss": 2.0012, "step": 22160 }, { "epoch": 0.5221138900664123, "grad_norm": 2.6708240509033203, "learning_rate": 9.561961283029532e-05, "loss": 2.052, "step": 22170 }, { "epoch": 0.5223493947529556, "grad_norm": 2.439678907394409, "learning_rate": 9.557251189298667e-05, "loss": 2.2863, "step": 22180 }, { "epoch": 0.5225848994394988, "grad_norm": 2.5144474506378174, "learning_rate": 9.552541095567802e-05, "loss": 2.1124, "step": 22190 }, { "epoch": 0.5228204041260421, "grad_norm": 2.6806702613830566, "learning_rate": 9.547831001836937e-05, "loss": 2.1127, "step": 22200 }, { "epoch": 0.5230559088125853, "grad_norm": 2.2948286533355713, "learning_rate": 9.543120908106071e-05, "loss": 2.0362, "step": 22210 }, { "epoch": 0.5232914134991287, "grad_norm": 2.144993543624878, "learning_rate": 9.538410814375206e-05, "loss": 1.8412, "step": 22220 }, { "epoch": 0.5235269181856719, "grad_norm": 2.0969650745391846, "learning_rate": 9.533700720644342e-05, "loss": 2.1297, "step": 22230 }, { "epoch": 0.5237624228722152, "grad_norm": 1.9239368438720703, "learning_rate": 9.528990626913477e-05, "loss": 2.0009, "step": 22240 }, { "epoch": 0.5239979275587584, "grad_norm": 2.1357004642486572, "learning_rate": 9.524280533182611e-05, "loss": 2.1151, "step": 22250 }, { "epoch": 0.5242334322453017, "grad_norm": 2.1568477153778076, "learning_rate": 9.519570439451746e-05, "loss": 1.991, "step": 22260 }, { "epoch": 0.5244689369318449, "grad_norm": 2.371934175491333, "learning_rate": 9.514860345720881e-05, "loss": 2.1708, "step": 22270 }, { "epoch": 0.5247044416183883, "grad_norm": 1.8997218608856201, "learning_rate": 9.510150251990016e-05, "loss": 2.0686, "step": 22280 }, { "epoch": 0.5249399463049315, "grad_norm": 2.1850790977478027, "learning_rate": 9.50544015825915e-05, "loss": 2.0206, "step": 22290 }, { "epoch": 0.5251754509914748, "grad_norm": 2.4998154640197754, "learning_rate": 9.500730064528285e-05, "loss": 1.874, "step": 22300 }, { "epoch": 0.525410955678018, "grad_norm": 2.6623775959014893, "learning_rate": 9.49601997079742e-05, "loss": 2.1144, "step": 22310 }, { "epoch": 0.5256464603645613, "grad_norm": 2.4677085876464844, "learning_rate": 9.491309877066555e-05, "loss": 2.0148, "step": 22320 }, { "epoch": 0.5258819650511045, "grad_norm": 2.5146234035491943, "learning_rate": 9.486599783335688e-05, "loss": 2.0617, "step": 22330 }, { "epoch": 0.5261174697376477, "grad_norm": 1.9805279970169067, "learning_rate": 9.481889689604823e-05, "loss": 2.0317, "step": 22340 }, { "epoch": 0.526352974424191, "grad_norm": 1.9576618671417236, "learning_rate": 9.477179595873958e-05, "loss": 2.1339, "step": 22350 }, { "epoch": 0.5265884791107343, "grad_norm": 2.2992379665374756, "learning_rate": 9.472469502143093e-05, "loss": 2.1361, "step": 22360 }, { "epoch": 0.5268239837972776, "grad_norm": 2.034731149673462, "learning_rate": 9.467759408412228e-05, "loss": 2.062, "step": 22370 }, { "epoch": 0.5270594884838208, "grad_norm": 2.1001486778259277, "learning_rate": 9.463049314681362e-05, "loss": 2.1102, "step": 22380 }, { "epoch": 0.5272949931703641, "grad_norm": 1.7902547121047974, "learning_rate": 9.458339220950497e-05, "loss": 1.8913, "step": 22390 }, { "epoch": 0.5275304978569073, "grad_norm": 2.0695128440856934, "learning_rate": 9.453629127219632e-05, "loss": 2.0724, "step": 22400 }, { "epoch": 0.5277660025434506, "grad_norm": 2.392047643661499, "learning_rate": 9.448919033488767e-05, "loss": 1.8279, "step": 22410 }, { "epoch": 0.5280015072299938, "grad_norm": 2.0684807300567627, "learning_rate": 9.444208939757901e-05, "loss": 2.2671, "step": 22420 }, { "epoch": 0.5282370119165372, "grad_norm": 1.8215241432189941, "learning_rate": 9.439498846027036e-05, "loss": 2.1644, "step": 22430 }, { "epoch": 0.5284725166030804, "grad_norm": 2.322417974472046, "learning_rate": 9.434788752296171e-05, "loss": 2.1318, "step": 22440 }, { "epoch": 0.5287080212896237, "grad_norm": 2.333282232284546, "learning_rate": 9.430078658565306e-05, "loss": 1.8483, "step": 22450 }, { "epoch": 0.5289435259761669, "grad_norm": 2.832303285598755, "learning_rate": 9.425368564834441e-05, "loss": 2.0394, "step": 22460 }, { "epoch": 0.5291790306627102, "grad_norm": 1.6820281744003296, "learning_rate": 9.420658471103575e-05, "loss": 2.027, "step": 22470 }, { "epoch": 0.5294145353492534, "grad_norm": 3.3374030590057373, "learning_rate": 9.41594837737271e-05, "loss": 2.1277, "step": 22480 }, { "epoch": 0.5296500400357967, "grad_norm": 2.0465030670166016, "learning_rate": 9.411238283641845e-05, "loss": 2.0185, "step": 22490 }, { "epoch": 0.52988554472234, "grad_norm": 2.40169620513916, "learning_rate": 9.40652818991098e-05, "loss": 2.2946, "step": 22500 }, { "epoch": 0.5301210494088833, "grad_norm": 2.6130552291870117, "learning_rate": 9.401818096180113e-05, "loss": 2.3308, "step": 22510 }, { "epoch": 0.5303565540954265, "grad_norm": 2.0632524490356445, "learning_rate": 9.39710800244925e-05, "loss": 2.1752, "step": 22520 }, { "epoch": 0.5305920587819698, "grad_norm": 1.7699297666549683, "learning_rate": 9.392397908718385e-05, "loss": 2.0812, "step": 22530 }, { "epoch": 0.530827563468513, "grad_norm": 2.3439061641693115, "learning_rate": 9.38768781498752e-05, "loss": 2.0295, "step": 22540 }, { "epoch": 0.5310630681550563, "grad_norm": 2.736743688583374, "learning_rate": 9.382977721256653e-05, "loss": 2.0404, "step": 22550 }, { "epoch": 0.5312985728415995, "grad_norm": 1.7512779235839844, "learning_rate": 9.378267627525788e-05, "loss": 2.0293, "step": 22560 }, { "epoch": 0.5315340775281429, "grad_norm": 2.9181957244873047, "learning_rate": 9.373557533794923e-05, "loss": 2.0772, "step": 22570 }, { "epoch": 0.5317695822146861, "grad_norm": 2.2531585693359375, "learning_rate": 9.368847440064059e-05, "loss": 2.1138, "step": 22580 }, { "epoch": 0.5320050869012294, "grad_norm": 2.3378305435180664, "learning_rate": 9.364137346333192e-05, "loss": 2.164, "step": 22590 }, { "epoch": 0.5322405915877726, "grad_norm": 2.257312536239624, "learning_rate": 9.359427252602327e-05, "loss": 1.9478, "step": 22600 }, { "epoch": 0.5324760962743159, "grad_norm": 3.4792850017547607, "learning_rate": 9.354717158871462e-05, "loss": 1.9697, "step": 22610 }, { "epoch": 0.5327116009608591, "grad_norm": 2.011868476867676, "learning_rate": 9.350007065140597e-05, "loss": 1.9607, "step": 22620 }, { "epoch": 0.5329471056474023, "grad_norm": 3.508305072784424, "learning_rate": 9.345296971409731e-05, "loss": 2.0881, "step": 22630 }, { "epoch": 0.5331826103339457, "grad_norm": 2.206843137741089, "learning_rate": 9.340586877678866e-05, "loss": 2.0199, "step": 22640 }, { "epoch": 0.5334181150204889, "grad_norm": 2.1505041122436523, "learning_rate": 9.335876783948001e-05, "loss": 1.9688, "step": 22650 }, { "epoch": 0.5336536197070322, "grad_norm": 2.0961718559265137, "learning_rate": 9.331166690217136e-05, "loss": 2.0561, "step": 22660 }, { "epoch": 0.5338891243935754, "grad_norm": 1.9985352754592896, "learning_rate": 9.326456596486271e-05, "loss": 1.9848, "step": 22670 }, { "epoch": 0.5341246290801187, "grad_norm": 3.043963670730591, "learning_rate": 9.321746502755405e-05, "loss": 2.0881, "step": 22680 }, { "epoch": 0.5343601337666619, "grad_norm": 2.3572945594787598, "learning_rate": 9.31703640902454e-05, "loss": 1.995, "step": 22690 }, { "epoch": 0.5345956384532052, "grad_norm": 2.074956178665161, "learning_rate": 9.312326315293675e-05, "loss": 2.0339, "step": 22700 }, { "epoch": 0.5348311431397484, "grad_norm": 1.7180410623550415, "learning_rate": 9.30761622156281e-05, "loss": 1.9256, "step": 22710 }, { "epoch": 0.5350666478262918, "grad_norm": 2.5363078117370605, "learning_rate": 9.302906127831943e-05, "loss": 2.2295, "step": 22720 }, { "epoch": 0.535302152512835, "grad_norm": 2.200470209121704, "learning_rate": 9.298196034101078e-05, "loss": 1.904, "step": 22730 }, { "epoch": 0.5355376571993783, "grad_norm": 1.8723726272583008, "learning_rate": 9.293485940370213e-05, "loss": 2.2491, "step": 22740 }, { "epoch": 0.5357731618859215, "grad_norm": 2.703275442123413, "learning_rate": 9.288775846639349e-05, "loss": 1.7582, "step": 22750 }, { "epoch": 0.5360086665724648, "grad_norm": 2.2764389514923096, "learning_rate": 9.284065752908484e-05, "loss": 1.9832, "step": 22760 }, { "epoch": 0.536244171259008, "grad_norm": 2.089346170425415, "learning_rate": 9.279355659177617e-05, "loss": 2.2175, "step": 22770 }, { "epoch": 0.5364796759455513, "grad_norm": 2.090502977371216, "learning_rate": 9.274645565446752e-05, "loss": 2.1681, "step": 22780 }, { "epoch": 0.5367151806320946, "grad_norm": 3.0695157051086426, "learning_rate": 9.269935471715887e-05, "loss": 2.205, "step": 22790 }, { "epoch": 0.5369506853186379, "grad_norm": 2.0072319507598877, "learning_rate": 9.265225377985022e-05, "loss": 1.9657, "step": 22800 }, { "epoch": 0.5371861900051811, "grad_norm": 2.8740854263305664, "learning_rate": 9.260515284254157e-05, "loss": 2.011, "step": 22810 }, { "epoch": 0.5374216946917244, "grad_norm": 2.9721429347991943, "learning_rate": 9.255805190523292e-05, "loss": 2.0737, "step": 22820 }, { "epoch": 0.5376571993782676, "grad_norm": 1.8961126804351807, "learning_rate": 9.251095096792427e-05, "loss": 1.89, "step": 22830 }, { "epoch": 0.5378927040648109, "grad_norm": 2.3074028491973877, "learning_rate": 9.246385003061562e-05, "loss": 2.0739, "step": 22840 }, { "epoch": 0.5381282087513541, "grad_norm": 2.082486391067505, "learning_rate": 9.241674909330696e-05, "loss": 2.0767, "step": 22850 }, { "epoch": 0.5383637134378975, "grad_norm": 2.289553165435791, "learning_rate": 9.236964815599831e-05, "loss": 2.1389, "step": 22860 }, { "epoch": 0.5385992181244407, "grad_norm": 2.9601359367370605, "learning_rate": 9.232254721868966e-05, "loss": 1.984, "step": 22870 }, { "epoch": 0.538834722810984, "grad_norm": 2.5509331226348877, "learning_rate": 9.227544628138101e-05, "loss": 2.1656, "step": 22880 }, { "epoch": 0.5390702274975272, "grad_norm": 2.1479685306549072, "learning_rate": 9.222834534407235e-05, "loss": 2.0994, "step": 22890 }, { "epoch": 0.5393057321840704, "grad_norm": 1.8056848049163818, "learning_rate": 9.21812444067637e-05, "loss": 1.9269, "step": 22900 }, { "epoch": 0.5395412368706137, "grad_norm": 2.1940340995788574, "learning_rate": 9.213414346945505e-05, "loss": 1.9795, "step": 22910 }, { "epoch": 0.5397767415571569, "grad_norm": 2.247976541519165, "learning_rate": 9.20870425321464e-05, "loss": 2.0206, "step": 22920 }, { "epoch": 0.5400122462437003, "grad_norm": 2.516195774078369, "learning_rate": 9.203994159483774e-05, "loss": 1.9513, "step": 22930 }, { "epoch": 0.5402477509302435, "grad_norm": 1.9935544729232788, "learning_rate": 9.199284065752909e-05, "loss": 1.8158, "step": 22940 }, { "epoch": 0.5404832556167868, "grad_norm": 1.9285348653793335, "learning_rate": 9.194573972022044e-05, "loss": 2.0684, "step": 22950 }, { "epoch": 0.54071876030333, "grad_norm": 2.0052757263183594, "learning_rate": 9.189863878291179e-05, "loss": 1.8693, "step": 22960 }, { "epoch": 0.5409542649898733, "grad_norm": 3.399982213973999, "learning_rate": 9.185153784560314e-05, "loss": 2.2221, "step": 22970 }, { "epoch": 0.5411897696764165, "grad_norm": 1.7858917713165283, "learning_rate": 9.180443690829447e-05, "loss": 1.9837, "step": 22980 }, { "epoch": 0.5414252743629598, "grad_norm": 2.4309234619140625, "learning_rate": 9.175733597098582e-05, "loss": 2.0491, "step": 22990 }, { "epoch": 0.541660779049503, "grad_norm": 1.9532793760299683, "learning_rate": 9.171023503367717e-05, "loss": 2.0639, "step": 23000 }, { "epoch": 0.5418962837360464, "grad_norm": 2.7318344116210938, "learning_rate": 9.166313409636852e-05, "loss": 2.1516, "step": 23010 }, { "epoch": 0.5421317884225896, "grad_norm": 1.8187711238861084, "learning_rate": 9.161603315905986e-05, "loss": 1.8155, "step": 23020 }, { "epoch": 0.5423672931091329, "grad_norm": 2.425743818283081, "learning_rate": 9.156893222175121e-05, "loss": 2.1712, "step": 23030 }, { "epoch": 0.5426027977956761, "grad_norm": 1.8586986064910889, "learning_rate": 9.152183128444256e-05, "loss": 2.03, "step": 23040 }, { "epoch": 0.5428383024822194, "grad_norm": 2.399402618408203, "learning_rate": 9.147473034713391e-05, "loss": 1.9666, "step": 23050 }, { "epoch": 0.5430738071687626, "grad_norm": 1.966139554977417, "learning_rate": 9.142762940982526e-05, "loss": 1.9428, "step": 23060 }, { "epoch": 0.543309311855306, "grad_norm": 2.4574363231658936, "learning_rate": 9.13805284725166e-05, "loss": 2.115, "step": 23070 }, { "epoch": 0.5435448165418492, "grad_norm": 2.235666275024414, "learning_rate": 9.133342753520796e-05, "loss": 2.0983, "step": 23080 }, { "epoch": 0.5437803212283925, "grad_norm": 1.8291510343551636, "learning_rate": 9.128632659789931e-05, "loss": 2.0416, "step": 23090 }, { "epoch": 0.5440158259149357, "grad_norm": 2.8294384479522705, "learning_rate": 9.123922566059065e-05, "loss": 2.2959, "step": 23100 }, { "epoch": 0.544251330601479, "grad_norm": 2.171060085296631, "learning_rate": 9.1192124723282e-05, "loss": 2.1052, "step": 23110 }, { "epoch": 0.5444868352880222, "grad_norm": 2.0458946228027344, "learning_rate": 9.114502378597335e-05, "loss": 2.029, "step": 23120 }, { "epoch": 0.5447223399745655, "grad_norm": 1.9284874200820923, "learning_rate": 9.10979228486647e-05, "loss": 2.0337, "step": 23130 }, { "epoch": 0.5449578446611087, "grad_norm": 1.6422959566116333, "learning_rate": 9.105082191135605e-05, "loss": 2.2123, "step": 23140 }, { "epoch": 0.5451933493476521, "grad_norm": 2.0372812747955322, "learning_rate": 9.100372097404739e-05, "loss": 2.0597, "step": 23150 }, { "epoch": 0.5454288540341953, "grad_norm": 2.334294319152832, "learning_rate": 9.095662003673874e-05, "loss": 1.9422, "step": 23160 }, { "epoch": 0.5456643587207386, "grad_norm": 2.039754629135132, "learning_rate": 9.090951909943009e-05, "loss": 2.1631, "step": 23170 }, { "epoch": 0.5458998634072818, "grad_norm": 1.9584988355636597, "learning_rate": 9.086241816212144e-05, "loss": 2.1069, "step": 23180 }, { "epoch": 0.546135368093825, "grad_norm": 2.1057446002960205, "learning_rate": 9.081531722481277e-05, "loss": 2.0025, "step": 23190 }, { "epoch": 0.5463708727803683, "grad_norm": 2.6134400367736816, "learning_rate": 9.076821628750412e-05, "loss": 2.2541, "step": 23200 }, { "epoch": 0.5466063774669115, "grad_norm": 1.9767898321151733, "learning_rate": 9.072111535019547e-05, "loss": 1.9391, "step": 23210 }, { "epoch": 0.5468418821534549, "grad_norm": 2.1497464179992676, "learning_rate": 9.067401441288682e-05, "loss": 1.992, "step": 23220 }, { "epoch": 0.5470773868399981, "grad_norm": 1.8777291774749756, "learning_rate": 9.062691347557816e-05, "loss": 2.0687, "step": 23230 }, { "epoch": 0.5473128915265414, "grad_norm": 2.7456014156341553, "learning_rate": 9.057981253826951e-05, "loss": 1.9403, "step": 23240 }, { "epoch": 0.5475483962130846, "grad_norm": 1.9173210859298706, "learning_rate": 9.053271160096086e-05, "loss": 2.362, "step": 23250 }, { "epoch": 0.5477839008996279, "grad_norm": 3.4497084617614746, "learning_rate": 9.048561066365221e-05, "loss": 2.3116, "step": 23260 }, { "epoch": 0.5480194055861711, "grad_norm": 1.9495487213134766, "learning_rate": 9.043850972634356e-05, "loss": 2.2498, "step": 23270 }, { "epoch": 0.5482549102727144, "grad_norm": 2.1822617053985596, "learning_rate": 9.03914087890349e-05, "loss": 2.1444, "step": 23280 }, { "epoch": 0.5484904149592577, "grad_norm": 2.625450849533081, "learning_rate": 9.034430785172625e-05, "loss": 2.1074, "step": 23290 }, { "epoch": 0.548725919645801, "grad_norm": 2.899829149246216, "learning_rate": 9.02972069144176e-05, "loss": 2.1126, "step": 23300 }, { "epoch": 0.5489614243323442, "grad_norm": 3.0588479042053223, "learning_rate": 9.025010597710895e-05, "loss": 1.9202, "step": 23310 }, { "epoch": 0.5491969290188875, "grad_norm": 2.1747286319732666, "learning_rate": 9.020300503980029e-05, "loss": 1.9893, "step": 23320 }, { "epoch": 0.5494324337054307, "grad_norm": 2.146778106689453, "learning_rate": 9.015590410249164e-05, "loss": 1.988, "step": 23330 }, { "epoch": 0.549667938391974, "grad_norm": 1.8845597505569458, "learning_rate": 9.010880316518299e-05, "loss": 2.1452, "step": 23340 }, { "epoch": 0.5499034430785172, "grad_norm": 2.016979694366455, "learning_rate": 9.006170222787434e-05, "loss": 2.1675, "step": 23350 }, { "epoch": 0.5501389477650606, "grad_norm": 2.911409378051758, "learning_rate": 9.001460129056569e-05, "loss": 2.0028, "step": 23360 }, { "epoch": 0.5503744524516038, "grad_norm": 2.8870551586151123, "learning_rate": 8.996750035325704e-05, "loss": 1.9068, "step": 23370 }, { "epoch": 0.5506099571381471, "grad_norm": 2.1140267848968506, "learning_rate": 8.992039941594839e-05, "loss": 2.1994, "step": 23380 }, { "epoch": 0.5508454618246903, "grad_norm": 1.9857615232467651, "learning_rate": 8.987329847863974e-05, "loss": 2.1649, "step": 23390 }, { "epoch": 0.5510809665112336, "grad_norm": 2.7637999057769775, "learning_rate": 8.982619754133107e-05, "loss": 1.9992, "step": 23400 }, { "epoch": 0.5513164711977768, "grad_norm": 2.225193738937378, "learning_rate": 8.977909660402243e-05, "loss": 2.1342, "step": 23410 }, { "epoch": 0.5515519758843201, "grad_norm": 1.7296162843704224, "learning_rate": 8.973199566671378e-05, "loss": 2.0405, "step": 23420 }, { "epoch": 0.5517874805708634, "grad_norm": 2.2399344444274902, "learning_rate": 8.968489472940513e-05, "loss": 2.0995, "step": 23430 }, { "epoch": 0.5520229852574067, "grad_norm": 2.2582974433898926, "learning_rate": 8.963779379209648e-05, "loss": 1.9908, "step": 23440 }, { "epoch": 0.5522584899439499, "grad_norm": 2.6607015132904053, "learning_rate": 8.959069285478781e-05, "loss": 1.9292, "step": 23450 }, { "epoch": 0.5524939946304932, "grad_norm": 2.3309760093688965, "learning_rate": 8.954359191747916e-05, "loss": 1.874, "step": 23460 }, { "epoch": 0.5527294993170364, "grad_norm": 2.2204902172088623, "learning_rate": 8.949649098017051e-05, "loss": 2.064, "step": 23470 }, { "epoch": 0.5529650040035796, "grad_norm": 1.4835526943206787, "learning_rate": 8.944939004286186e-05, "loss": 1.9785, "step": 23480 }, { "epoch": 0.5532005086901229, "grad_norm": 2.3408985137939453, "learning_rate": 8.94022891055532e-05, "loss": 2.0047, "step": 23490 }, { "epoch": 0.5534360133766661, "grad_norm": 2.300576686859131, "learning_rate": 8.935518816824455e-05, "loss": 2.1159, "step": 23500 }, { "epoch": 0.5536715180632095, "grad_norm": 1.9129767417907715, "learning_rate": 8.93080872309359e-05, "loss": 2.0221, "step": 23510 }, { "epoch": 0.5539070227497527, "grad_norm": 1.8746204376220703, "learning_rate": 8.926098629362725e-05, "loss": 2.1288, "step": 23520 }, { "epoch": 0.554142527436296, "grad_norm": 1.7646563053131104, "learning_rate": 8.921388535631859e-05, "loss": 2.2262, "step": 23530 }, { "epoch": 0.5543780321228392, "grad_norm": 1.9448425769805908, "learning_rate": 8.916678441900994e-05, "loss": 1.8702, "step": 23540 }, { "epoch": 0.5546135368093825, "grad_norm": 2.126228094100952, "learning_rate": 8.911968348170129e-05, "loss": 2.1124, "step": 23550 }, { "epoch": 0.5548490414959257, "grad_norm": 1.686448097229004, "learning_rate": 8.907258254439264e-05, "loss": 2.0523, "step": 23560 }, { "epoch": 0.555084546182469, "grad_norm": 2.2924816608428955, "learning_rate": 8.902548160708399e-05, "loss": 2.1922, "step": 23570 }, { "epoch": 0.5553200508690123, "grad_norm": 2.001551866531372, "learning_rate": 8.897838066977532e-05, "loss": 1.8586, "step": 23580 }, { "epoch": 0.5555555555555556, "grad_norm": 2.2796730995178223, "learning_rate": 8.893127973246668e-05, "loss": 2.1119, "step": 23590 }, { "epoch": 0.5557910602420988, "grad_norm": 1.9059703350067139, "learning_rate": 8.888417879515803e-05, "loss": 2.1778, "step": 23600 }, { "epoch": 0.5560265649286421, "grad_norm": 2.31387996673584, "learning_rate": 8.883707785784938e-05, "loss": 1.9897, "step": 23610 }, { "epoch": 0.5562620696151853, "grad_norm": 2.014240264892578, "learning_rate": 8.878997692054071e-05, "loss": 2.12, "step": 23620 }, { "epoch": 0.5564975743017286, "grad_norm": 2.1083483695983887, "learning_rate": 8.874287598323206e-05, "loss": 1.9658, "step": 23630 }, { "epoch": 0.5567330789882718, "grad_norm": 1.9519131183624268, "learning_rate": 8.869577504592341e-05, "loss": 2.2165, "step": 23640 }, { "epoch": 0.5569685836748152, "grad_norm": 1.703078031539917, "learning_rate": 8.864867410861478e-05, "loss": 2.1628, "step": 23650 }, { "epoch": 0.5572040883613584, "grad_norm": 2.055586814880371, "learning_rate": 8.860157317130611e-05, "loss": 1.9498, "step": 23660 }, { "epoch": 0.5574395930479017, "grad_norm": 2.0325064659118652, "learning_rate": 8.855447223399746e-05, "loss": 1.9808, "step": 23670 }, { "epoch": 0.5576750977344449, "grad_norm": 1.8139878511428833, "learning_rate": 8.850737129668881e-05, "loss": 2.0656, "step": 23680 }, { "epoch": 0.5579106024209882, "grad_norm": 2.5634472370147705, "learning_rate": 8.846027035938016e-05, "loss": 1.9946, "step": 23690 }, { "epoch": 0.5581461071075314, "grad_norm": 2.3983256816864014, "learning_rate": 8.84131694220715e-05, "loss": 2.0557, "step": 23700 }, { "epoch": 0.5583816117940748, "grad_norm": 1.7592830657958984, "learning_rate": 8.836606848476285e-05, "loss": 1.9625, "step": 23710 }, { "epoch": 0.558617116480618, "grad_norm": 1.9398764371871948, "learning_rate": 8.83189675474542e-05, "loss": 1.9973, "step": 23720 }, { "epoch": 0.5588526211671613, "grad_norm": 2.4670112133026123, "learning_rate": 8.827186661014555e-05, "loss": 2.0338, "step": 23730 }, { "epoch": 0.5590881258537045, "grad_norm": 2.532526969909668, "learning_rate": 8.82247656728369e-05, "loss": 2.0994, "step": 23740 }, { "epoch": 0.5593236305402477, "grad_norm": 3.6450741291046143, "learning_rate": 8.81823748292591e-05, "loss": 2.1508, "step": 23750 }, { "epoch": 0.559559135226791, "grad_norm": 2.395998239517212, "learning_rate": 8.813527389195046e-05, "loss": 2.035, "step": 23760 }, { "epoch": 0.5597946399133342, "grad_norm": 2.092289686203003, "learning_rate": 8.80881729546418e-05, "loss": 2.0636, "step": 23770 }, { "epoch": 0.5600301445998775, "grad_norm": 1.8740358352661133, "learning_rate": 8.804107201733314e-05, "loss": 1.9921, "step": 23780 }, { "epoch": 0.5602656492864208, "grad_norm": 3.254704475402832, "learning_rate": 8.79939710800245e-05, "loss": 2.0609, "step": 23790 }, { "epoch": 0.5605011539729641, "grad_norm": 2.3679122924804688, "learning_rate": 8.794687014271584e-05, "loss": 1.9789, "step": 23800 }, { "epoch": 0.5607366586595073, "grad_norm": 2.638920783996582, "learning_rate": 8.78997692054072e-05, "loss": 2.0542, "step": 23810 }, { "epoch": 0.5609721633460506, "grad_norm": 2.1278998851776123, "learning_rate": 8.785266826809853e-05, "loss": 2.185, "step": 23820 }, { "epoch": 0.5612076680325938, "grad_norm": 2.04278826713562, "learning_rate": 8.780556733078988e-05, "loss": 2.1442, "step": 23830 }, { "epoch": 0.5614431727191371, "grad_norm": 2.327648639678955, "learning_rate": 8.775846639348123e-05, "loss": 2.2638, "step": 23840 }, { "epoch": 0.5616786774056803, "grad_norm": 2.190995454788208, "learning_rate": 8.771136545617258e-05, "loss": 2.3052, "step": 23850 }, { "epoch": 0.5619141820922237, "grad_norm": 2.5929126739501953, "learning_rate": 8.766426451886393e-05, "loss": 2.078, "step": 23860 }, { "epoch": 0.5621496867787669, "grad_norm": 2.1054494380950928, "learning_rate": 8.761716358155527e-05, "loss": 2.1903, "step": 23870 }, { "epoch": 0.5623851914653102, "grad_norm": 2.804368495941162, "learning_rate": 8.757006264424662e-05, "loss": 1.8636, "step": 23880 }, { "epoch": 0.5626206961518534, "grad_norm": 2.6225225925445557, "learning_rate": 8.752296170693797e-05, "loss": 2.0436, "step": 23890 }, { "epoch": 0.5628562008383967, "grad_norm": 1.7755427360534668, "learning_rate": 8.747586076962932e-05, "loss": 2.1628, "step": 23900 }, { "epoch": 0.5630917055249399, "grad_norm": 2.0387985706329346, "learning_rate": 8.742875983232067e-05, "loss": 2.1442, "step": 23910 }, { "epoch": 0.5633272102114832, "grad_norm": 2.0750441551208496, "learning_rate": 8.738165889501202e-05, "loss": 2.0374, "step": 23920 }, { "epoch": 0.5635627148980265, "grad_norm": 2.1440858840942383, "learning_rate": 8.733455795770337e-05, "loss": 2.0789, "step": 23930 }, { "epoch": 0.5637982195845698, "grad_norm": 2.1340370178222656, "learning_rate": 8.728745702039472e-05, "loss": 2.2198, "step": 23940 }, { "epoch": 0.564033724271113, "grad_norm": 2.3632407188415527, "learning_rate": 8.724035608308606e-05, "loss": 2.0647, "step": 23950 }, { "epoch": 0.5642692289576563, "grad_norm": 2.03859543800354, "learning_rate": 8.719325514577741e-05, "loss": 2.1071, "step": 23960 }, { "epoch": 0.5645047336441995, "grad_norm": 2.2486722469329834, "learning_rate": 8.714615420846876e-05, "loss": 2.0443, "step": 23970 }, { "epoch": 0.5647402383307428, "grad_norm": 1.8128294944763184, "learning_rate": 8.709905327116011e-05, "loss": 1.9782, "step": 23980 }, { "epoch": 0.564975743017286, "grad_norm": 2.3327388763427734, "learning_rate": 8.705195233385144e-05, "loss": 2.1579, "step": 23990 }, { "epoch": 0.5652112477038294, "grad_norm": 2.0959858894348145, "learning_rate": 8.70048513965428e-05, "loss": 1.8995, "step": 24000 }, { "epoch": 0.5654467523903726, "grad_norm": 2.87188458442688, "learning_rate": 8.695775045923414e-05, "loss": 2.0005, "step": 24010 }, { "epoch": 0.5656822570769159, "grad_norm": 2.495626449584961, "learning_rate": 8.69106495219255e-05, "loss": 2.3682, "step": 24020 }, { "epoch": 0.5659177617634591, "grad_norm": 3.239908456802368, "learning_rate": 8.686354858461683e-05, "loss": 1.9192, "step": 24030 }, { "epoch": 0.5661532664500023, "grad_norm": 1.99828040599823, "learning_rate": 8.681644764730818e-05, "loss": 2.3154, "step": 24040 }, { "epoch": 0.5663887711365456, "grad_norm": 2.1217734813690186, "learning_rate": 8.676934670999953e-05, "loss": 1.9546, "step": 24050 }, { "epoch": 0.5666242758230888, "grad_norm": 3.101858377456665, "learning_rate": 8.672224577269088e-05, "loss": 2.0897, "step": 24060 }, { "epoch": 0.5668597805096321, "grad_norm": 2.358698606491089, "learning_rate": 8.667514483538223e-05, "loss": 2.0906, "step": 24070 }, { "epoch": 0.5670952851961754, "grad_norm": 2.2802388668060303, "learning_rate": 8.662804389807357e-05, "loss": 2.0595, "step": 24080 }, { "epoch": 0.5673307898827187, "grad_norm": 2.175506114959717, "learning_rate": 8.658094296076492e-05, "loss": 1.8499, "step": 24090 }, { "epoch": 0.5675662945692619, "grad_norm": 1.782609224319458, "learning_rate": 8.653384202345627e-05, "loss": 2.1053, "step": 24100 }, { "epoch": 0.5678017992558052, "grad_norm": 1.9120488166809082, "learning_rate": 8.648674108614762e-05, "loss": 2.2141, "step": 24110 }, { "epoch": 0.5680373039423484, "grad_norm": 3.216996908187866, "learning_rate": 8.643964014883896e-05, "loss": 2.0973, "step": 24120 }, { "epoch": 0.5682728086288917, "grad_norm": 2.160984754562378, "learning_rate": 8.639253921153031e-05, "loss": 2.1362, "step": 24130 }, { "epoch": 0.5685083133154349, "grad_norm": 1.6789969205856323, "learning_rate": 8.634543827422166e-05, "loss": 2.1757, "step": 24140 }, { "epoch": 0.5687438180019783, "grad_norm": 2.3847877979278564, "learning_rate": 8.629833733691301e-05, "loss": 2.2551, "step": 24150 }, { "epoch": 0.5689793226885215, "grad_norm": 2.110700845718384, "learning_rate": 8.625123639960436e-05, "loss": 2.1488, "step": 24160 }, { "epoch": 0.5692148273750648, "grad_norm": 2.052661657333374, "learning_rate": 8.62041354622957e-05, "loss": 2.166, "step": 24170 }, { "epoch": 0.569450332061608, "grad_norm": 2.335162401199341, "learning_rate": 8.615703452498704e-05, "loss": 1.9717, "step": 24180 }, { "epoch": 0.5696858367481513, "grad_norm": 3.1710004806518555, "learning_rate": 8.610993358767841e-05, "loss": 1.8111, "step": 24190 }, { "epoch": 0.5699213414346945, "grad_norm": 2.3458499908447266, "learning_rate": 8.606283265036975e-05, "loss": 2.159, "step": 24200 }, { "epoch": 0.5701568461212378, "grad_norm": 2.641172170639038, "learning_rate": 8.60157317130611e-05, "loss": 1.9753, "step": 24210 }, { "epoch": 0.5703923508077811, "grad_norm": 1.8560911417007446, "learning_rate": 8.596863077575245e-05, "loss": 2.1409, "step": 24220 }, { "epoch": 0.5706278554943244, "grad_norm": 2.3327791690826416, "learning_rate": 8.59215298384438e-05, "loss": 2.0098, "step": 24230 }, { "epoch": 0.5708633601808676, "grad_norm": 2.3636698722839355, "learning_rate": 8.587442890113515e-05, "loss": 2.1375, "step": 24240 }, { "epoch": 0.5710988648674109, "grad_norm": 2.747985601425171, "learning_rate": 8.582732796382648e-05, "loss": 2.1451, "step": 24250 }, { "epoch": 0.5713343695539541, "grad_norm": 2.1141562461853027, "learning_rate": 8.578022702651783e-05, "loss": 2.1145, "step": 24260 }, { "epoch": 0.5715698742404974, "grad_norm": 2.0584163665771484, "learning_rate": 8.573312608920918e-05, "loss": 1.919, "step": 24270 }, { "epoch": 0.5718053789270406, "grad_norm": 2.1240551471710205, "learning_rate": 8.568602515190053e-05, "loss": 2.0322, "step": 24280 }, { "epoch": 0.572040883613584, "grad_norm": 2.4911723136901855, "learning_rate": 8.563892421459187e-05, "loss": 2.4528, "step": 24290 }, { "epoch": 0.5722763883001272, "grad_norm": 1.859668493270874, "learning_rate": 8.559182327728322e-05, "loss": 2.027, "step": 24300 }, { "epoch": 0.5725118929866705, "grad_norm": 1.7338566780090332, "learning_rate": 8.554472233997457e-05, "loss": 1.9711, "step": 24310 }, { "epoch": 0.5727473976732137, "grad_norm": 2.1398377418518066, "learning_rate": 8.549762140266592e-05, "loss": 2.1034, "step": 24320 }, { "epoch": 0.5729829023597569, "grad_norm": 1.8699679374694824, "learning_rate": 8.545052046535726e-05, "loss": 1.9032, "step": 24330 }, { "epoch": 0.5732184070463002, "grad_norm": 2.045719623565674, "learning_rate": 8.540341952804861e-05, "loss": 1.9111, "step": 24340 }, { "epoch": 0.5734539117328434, "grad_norm": 2.614588499069214, "learning_rate": 8.535631859073996e-05, "loss": 2.0196, "step": 24350 }, { "epoch": 0.5736894164193868, "grad_norm": 2.135077953338623, "learning_rate": 8.530921765343131e-05, "loss": 2.2211, "step": 24360 }, { "epoch": 0.57392492110593, "grad_norm": 2.3194169998168945, "learning_rate": 8.526211671612266e-05, "loss": 2.1909, "step": 24370 }, { "epoch": 0.5741604257924733, "grad_norm": 2.06208872795105, "learning_rate": 8.5215015778814e-05, "loss": 2.099, "step": 24380 }, { "epoch": 0.5743959304790165, "grad_norm": 1.9081873893737793, "learning_rate": 8.516791484150535e-05, "loss": 1.8989, "step": 24390 }, { "epoch": 0.5746314351655598, "grad_norm": 1.925196886062622, "learning_rate": 8.51208139041967e-05, "loss": 2.1537, "step": 24400 }, { "epoch": 0.574866939852103, "grad_norm": 2.0393810272216797, "learning_rate": 8.507371296688805e-05, "loss": 1.9381, "step": 24410 }, { "epoch": 0.5751024445386463, "grad_norm": 2.061901807785034, "learning_rate": 8.502661202957938e-05, "loss": 2.0184, "step": 24420 }, { "epoch": 0.5753379492251895, "grad_norm": 1.582486629486084, "learning_rate": 8.497951109227073e-05, "loss": 2.1767, "step": 24430 }, { "epoch": 0.5755734539117329, "grad_norm": 2.24311900138855, "learning_rate": 8.493241015496208e-05, "loss": 2.1155, "step": 24440 }, { "epoch": 0.5758089585982761, "grad_norm": 1.794895052909851, "learning_rate": 8.488530921765343e-05, "loss": 1.8587, "step": 24450 }, { "epoch": 0.5760444632848194, "grad_norm": 2.3999338150024414, "learning_rate": 8.483820828034478e-05, "loss": 2.1511, "step": 24460 }, { "epoch": 0.5762799679713626, "grad_norm": 2.538761854171753, "learning_rate": 8.479110734303613e-05, "loss": 2.1788, "step": 24470 }, { "epoch": 0.5765154726579059, "grad_norm": 2.7868943214416504, "learning_rate": 8.474400640572748e-05, "loss": 2.0722, "step": 24480 }, { "epoch": 0.5767509773444491, "grad_norm": 2.052523136138916, "learning_rate": 8.469690546841883e-05, "loss": 2.1654, "step": 24490 }, { "epoch": 0.5769864820309925, "grad_norm": 2.1687915325164795, "learning_rate": 8.464980453111017e-05, "loss": 1.9479, "step": 24500 }, { "epoch": 0.5772219867175357, "grad_norm": 2.0487418174743652, "learning_rate": 8.460270359380152e-05, "loss": 2.1456, "step": 24510 }, { "epoch": 0.577457491404079, "grad_norm": 2.201139211654663, "learning_rate": 8.455560265649287e-05, "loss": 2.0357, "step": 24520 }, { "epoch": 0.5776929960906222, "grad_norm": 2.2305378913879395, "learning_rate": 8.450850171918422e-05, "loss": 2.1552, "step": 24530 }, { "epoch": 0.5779285007771655, "grad_norm": 4.121927261352539, "learning_rate": 8.446140078187557e-05, "loss": 2.0478, "step": 24540 }, { "epoch": 0.5781640054637087, "grad_norm": 2.2727227210998535, "learning_rate": 8.441429984456691e-05, "loss": 1.9688, "step": 24550 }, { "epoch": 0.578399510150252, "grad_norm": 2.3096418380737305, "learning_rate": 8.436719890725826e-05, "loss": 2.0188, "step": 24560 }, { "epoch": 0.5786350148367952, "grad_norm": 1.787701964378357, "learning_rate": 8.432009796994961e-05, "loss": 1.9433, "step": 24570 }, { "epoch": 0.5788705195233386, "grad_norm": 1.5643645524978638, "learning_rate": 8.427299703264096e-05, "loss": 1.9101, "step": 24580 }, { "epoch": 0.5791060242098818, "grad_norm": 2.1164591312408447, "learning_rate": 8.42258960953323e-05, "loss": 2.0142, "step": 24590 }, { "epoch": 0.579341528896425, "grad_norm": 1.9861196279525757, "learning_rate": 8.417879515802365e-05, "loss": 2.1013, "step": 24600 }, { "epoch": 0.5795770335829683, "grad_norm": 2.388425588607788, "learning_rate": 8.4131694220715e-05, "loss": 1.9681, "step": 24610 }, { "epoch": 0.5798125382695115, "grad_norm": 1.6500285863876343, "learning_rate": 8.408459328340635e-05, "loss": 1.992, "step": 24620 }, { "epoch": 0.5800480429560548, "grad_norm": 1.7825500965118408, "learning_rate": 8.403749234609768e-05, "loss": 2.0607, "step": 24630 }, { "epoch": 0.580283547642598, "grad_norm": 2.313638210296631, "learning_rate": 8.399039140878903e-05, "loss": 2.0777, "step": 24640 }, { "epoch": 0.5805190523291414, "grad_norm": 2.432238817214966, "learning_rate": 8.394329047148038e-05, "loss": 2.0948, "step": 24650 }, { "epoch": 0.5807545570156846, "grad_norm": 2.8354523181915283, "learning_rate": 8.389618953417173e-05, "loss": 2.1771, "step": 24660 }, { "epoch": 0.5809900617022279, "grad_norm": 2.3798136711120605, "learning_rate": 8.384908859686308e-05, "loss": 1.9815, "step": 24670 }, { "epoch": 0.5812255663887711, "grad_norm": 2.092031478881836, "learning_rate": 8.380198765955442e-05, "loss": 2.0798, "step": 24680 }, { "epoch": 0.5814610710753144, "grad_norm": 2.465062379837036, "learning_rate": 8.375488672224577e-05, "loss": 1.8865, "step": 24690 }, { "epoch": 0.5816965757618576, "grad_norm": 2.506052017211914, "learning_rate": 8.370778578493712e-05, "loss": 1.9352, "step": 24700 }, { "epoch": 0.581932080448401, "grad_norm": 2.2928619384765625, "learning_rate": 8.366068484762847e-05, "loss": 1.8513, "step": 24710 }, { "epoch": 0.5821675851349442, "grad_norm": 1.858624815940857, "learning_rate": 8.361358391031981e-05, "loss": 2.095, "step": 24720 }, { "epoch": 0.5824030898214875, "grad_norm": 2.981670379638672, "learning_rate": 8.356648297301116e-05, "loss": 1.978, "step": 24730 }, { "epoch": 0.5826385945080307, "grad_norm": 2.1256954669952393, "learning_rate": 8.351938203570251e-05, "loss": 1.9985, "step": 24740 }, { "epoch": 0.582874099194574, "grad_norm": 1.9734386205673218, "learning_rate": 8.347228109839387e-05, "loss": 2.0097, "step": 24750 }, { "epoch": 0.5831096038811172, "grad_norm": 2.111104965209961, "learning_rate": 8.342518016108521e-05, "loss": 2.0503, "step": 24760 }, { "epoch": 0.5833451085676605, "grad_norm": 1.7222291231155396, "learning_rate": 8.337807922377656e-05, "loss": 1.8505, "step": 24770 }, { "epoch": 0.5835806132542037, "grad_norm": 2.5214107036590576, "learning_rate": 8.333097828646791e-05, "loss": 2.0998, "step": 24780 }, { "epoch": 0.5838161179407471, "grad_norm": 2.7706222534179688, "learning_rate": 8.328387734915926e-05, "loss": 2.0881, "step": 24790 }, { "epoch": 0.5840516226272903, "grad_norm": 2.2346792221069336, "learning_rate": 8.32367764118506e-05, "loss": 2.0906, "step": 24800 }, { "epoch": 0.5842871273138336, "grad_norm": 2.1181459426879883, "learning_rate": 8.318967547454195e-05, "loss": 1.9335, "step": 24810 }, { "epoch": 0.5845226320003768, "grad_norm": 2.1477532386779785, "learning_rate": 8.31425745372333e-05, "loss": 2.1249, "step": 24820 }, { "epoch": 0.5847581366869201, "grad_norm": 2.5332417488098145, "learning_rate": 8.309547359992465e-05, "loss": 2.1898, "step": 24830 }, { "epoch": 0.5849936413734633, "grad_norm": 1.6639565229415894, "learning_rate": 8.3048372662616e-05, "loss": 2.077, "step": 24840 }, { "epoch": 0.5852291460600066, "grad_norm": 2.576613187789917, "learning_rate": 8.300127172530733e-05, "loss": 2.1235, "step": 24850 }, { "epoch": 0.5854646507465499, "grad_norm": 2.24748158454895, "learning_rate": 8.295417078799869e-05, "loss": 1.7241, "step": 24860 }, { "epoch": 0.5857001554330932, "grad_norm": 2.139662265777588, "learning_rate": 8.290706985069004e-05, "loss": 2.0476, "step": 24870 }, { "epoch": 0.5859356601196364, "grad_norm": 2.513113260269165, "learning_rate": 8.285996891338139e-05, "loss": 2.0448, "step": 24880 }, { "epoch": 0.5861711648061796, "grad_norm": 2.219991445541382, "learning_rate": 8.281286797607272e-05, "loss": 1.7964, "step": 24890 }, { "epoch": 0.5864066694927229, "grad_norm": 2.668653964996338, "learning_rate": 8.276576703876407e-05, "loss": 2.2925, "step": 24900 }, { "epoch": 0.5866421741792661, "grad_norm": 2.3607492446899414, "learning_rate": 8.271866610145542e-05, "loss": 2.0662, "step": 24910 }, { "epoch": 0.5868776788658094, "grad_norm": 2.165482521057129, "learning_rate": 8.267156516414677e-05, "loss": 2.0108, "step": 24920 }, { "epoch": 0.5871131835523526, "grad_norm": 3.355048418045044, "learning_rate": 8.262446422683811e-05, "loss": 2.2239, "step": 24930 }, { "epoch": 0.587348688238896, "grad_norm": 2.3312196731567383, "learning_rate": 8.257736328952946e-05, "loss": 2.1449, "step": 24940 }, { "epoch": 0.5875841929254392, "grad_norm": 2.766026735305786, "learning_rate": 8.253026235222081e-05, "loss": 2.0531, "step": 24950 }, { "epoch": 0.5878196976119825, "grad_norm": 2.3361899852752686, "learning_rate": 8.248316141491216e-05, "loss": 2.029, "step": 24960 }, { "epoch": 0.5880552022985257, "grad_norm": 2.563586950302124, "learning_rate": 8.243606047760351e-05, "loss": 2.0073, "step": 24970 }, { "epoch": 0.588290706985069, "grad_norm": 3.044583320617676, "learning_rate": 8.238895954029485e-05, "loss": 2.3071, "step": 24980 }, { "epoch": 0.5885262116716122, "grad_norm": 1.7707631587982178, "learning_rate": 8.23418586029862e-05, "loss": 1.9584, "step": 24990 }, { "epoch": 0.5887617163581556, "grad_norm": 3.5477962493896484, "learning_rate": 8.229475766567755e-05, "loss": 2.0358, "step": 25000 }, { "epoch": 0.5889972210446988, "grad_norm": 2.394061326980591, "learning_rate": 8.22476567283689e-05, "loss": 2.0315, "step": 25010 }, { "epoch": 0.5892327257312421, "grad_norm": 2.087583065032959, "learning_rate": 8.220055579106023e-05, "loss": 2.0567, "step": 25020 }, { "epoch": 0.5894682304177853, "grad_norm": 1.9279582500457764, "learning_rate": 8.21534548537516e-05, "loss": 2.2003, "step": 25030 }, { "epoch": 0.5897037351043286, "grad_norm": 2.105403184890747, "learning_rate": 8.210635391644295e-05, "loss": 1.9165, "step": 25040 }, { "epoch": 0.5899392397908718, "grad_norm": 2.1151294708251953, "learning_rate": 8.20592529791343e-05, "loss": 2.1332, "step": 25050 }, { "epoch": 0.5901747444774151, "grad_norm": 2.34464693069458, "learning_rate": 8.201215204182564e-05, "loss": 1.9142, "step": 25060 }, { "epoch": 0.5904102491639583, "grad_norm": 2.0007407665252686, "learning_rate": 8.196505110451699e-05, "loss": 1.9881, "step": 25070 }, { "epoch": 0.5906457538505017, "grad_norm": 2.4901375770568848, "learning_rate": 8.191795016720834e-05, "loss": 2.0313, "step": 25080 }, { "epoch": 0.5908812585370449, "grad_norm": 1.5879149436950684, "learning_rate": 8.187084922989969e-05, "loss": 2.0404, "step": 25090 }, { "epoch": 0.5911167632235882, "grad_norm": 1.9847928285598755, "learning_rate": 8.182374829259102e-05, "loss": 1.9612, "step": 25100 }, { "epoch": 0.5913522679101314, "grad_norm": 1.7049063444137573, "learning_rate": 8.177664735528237e-05, "loss": 2.1503, "step": 25110 }, { "epoch": 0.5915877725966747, "grad_norm": 2.089395046234131, "learning_rate": 8.172954641797372e-05, "loss": 2.1733, "step": 25120 }, { "epoch": 0.5918232772832179, "grad_norm": 2.2690000534057617, "learning_rate": 8.168244548066507e-05, "loss": 2.1087, "step": 25130 }, { "epoch": 0.5920587819697612, "grad_norm": 2.3506076335906982, "learning_rate": 8.163534454335642e-05, "loss": 1.9857, "step": 25140 }, { "epoch": 0.5922942866563045, "grad_norm": 2.1546356678009033, "learning_rate": 8.158824360604776e-05, "loss": 1.8125, "step": 25150 }, { "epoch": 0.5925297913428478, "grad_norm": 2.6273109912872314, "learning_rate": 8.154114266873911e-05, "loss": 2.1505, "step": 25160 }, { "epoch": 0.592765296029391, "grad_norm": 2.0121755599975586, "learning_rate": 8.149404173143046e-05, "loss": 2.2848, "step": 25170 }, { "epoch": 0.5930008007159342, "grad_norm": 2.0071730613708496, "learning_rate": 8.144694079412181e-05, "loss": 2.0596, "step": 25180 }, { "epoch": 0.5932363054024775, "grad_norm": 1.9122415781021118, "learning_rate": 8.139983985681315e-05, "loss": 1.9869, "step": 25190 }, { "epoch": 0.5934718100890207, "grad_norm": 2.8985416889190674, "learning_rate": 8.13527389195045e-05, "loss": 1.9928, "step": 25200 }, { "epoch": 0.593707314775564, "grad_norm": 2.471587657928467, "learning_rate": 8.130563798219585e-05, "loss": 1.9349, "step": 25210 }, { "epoch": 0.5939428194621073, "grad_norm": 2.232741594314575, "learning_rate": 8.12585370448872e-05, "loss": 2.3072, "step": 25220 }, { "epoch": 0.5941783241486506, "grad_norm": 2.7327969074249268, "learning_rate": 8.121143610757854e-05, "loss": 2.0982, "step": 25230 }, { "epoch": 0.5944138288351938, "grad_norm": 2.561145305633545, "learning_rate": 8.116433517026989e-05, "loss": 1.9554, "step": 25240 }, { "epoch": 0.5946493335217371, "grad_norm": 2.3863377571105957, "learning_rate": 8.111723423296124e-05, "loss": 1.9331, "step": 25250 }, { "epoch": 0.5948848382082803, "grad_norm": 2.0999460220336914, "learning_rate": 8.107484338938345e-05, "loss": 1.9116, "step": 25260 }, { "epoch": 0.5951203428948236, "grad_norm": 2.5434398651123047, "learning_rate": 8.102774245207479e-05, "loss": 2.1479, "step": 25270 }, { "epoch": 0.5953558475813668, "grad_norm": 2.038438558578491, "learning_rate": 8.098064151476614e-05, "loss": 2.0945, "step": 25280 }, { "epoch": 0.5955913522679102, "grad_norm": 2.3757026195526123, "learning_rate": 8.09335405774575e-05, "loss": 2.142, "step": 25290 }, { "epoch": 0.5958268569544534, "grad_norm": 2.268975257873535, "learning_rate": 8.088643964014884e-05, "loss": 2.0171, "step": 25300 }, { "epoch": 0.5960623616409967, "grad_norm": 2.189600944519043, "learning_rate": 8.083933870284019e-05, "loss": 2.0769, "step": 25310 }, { "epoch": 0.5962978663275399, "grad_norm": 2.25864577293396, "learning_rate": 8.079223776553154e-05, "loss": 2.0467, "step": 25320 }, { "epoch": 0.5965333710140832, "grad_norm": 1.8093814849853516, "learning_rate": 8.074513682822289e-05, "loss": 1.9886, "step": 25330 }, { "epoch": 0.5967688757006264, "grad_norm": 2.4894871711730957, "learning_rate": 8.069803589091424e-05, "loss": 2.0495, "step": 25340 }, { "epoch": 0.5970043803871697, "grad_norm": 2.965756893157959, "learning_rate": 8.065093495360558e-05, "loss": 2.233, "step": 25350 }, { "epoch": 0.597239885073713, "grad_norm": 2.278423309326172, "learning_rate": 8.060383401629693e-05, "loss": 2.0152, "step": 25360 }, { "epoch": 0.5974753897602563, "grad_norm": 2.2974514961242676, "learning_rate": 8.055673307898828e-05, "loss": 2.2189, "step": 25370 }, { "epoch": 0.5977108944467995, "grad_norm": 2.401662588119507, "learning_rate": 8.050963214167963e-05, "loss": 1.9795, "step": 25380 }, { "epoch": 0.5979463991333428, "grad_norm": 2.429227113723755, "learning_rate": 8.046253120437097e-05, "loss": 1.9638, "step": 25390 }, { "epoch": 0.598181903819886, "grad_norm": 2.7841548919677734, "learning_rate": 8.041543026706232e-05, "loss": 2.1462, "step": 25400 }, { "epoch": 0.5984174085064293, "grad_norm": 2.3505892753601074, "learning_rate": 8.036832932975367e-05, "loss": 2.1676, "step": 25410 }, { "epoch": 0.5986529131929725, "grad_norm": 1.8642781972885132, "learning_rate": 8.032122839244502e-05, "loss": 2.2374, "step": 25420 }, { "epoch": 0.5988884178795159, "grad_norm": 2.466585874557495, "learning_rate": 8.027412745513637e-05, "loss": 2.1254, "step": 25430 }, { "epoch": 0.5991239225660591, "grad_norm": 2.247736692428589, "learning_rate": 8.02270265178277e-05, "loss": 1.8839, "step": 25440 }, { "epoch": 0.5993594272526023, "grad_norm": 2.2101500034332275, "learning_rate": 8.017992558051905e-05, "loss": 2.0983, "step": 25450 }, { "epoch": 0.5995949319391456, "grad_norm": 2.6747212409973145, "learning_rate": 8.01328246432104e-05, "loss": 2.0788, "step": 25460 }, { "epoch": 0.5998304366256888, "grad_norm": 2.289003610610962, "learning_rate": 8.008572370590176e-05, "loss": 2.1053, "step": 25470 }, { "epoch": 0.6000659413122321, "grad_norm": 2.8795812129974365, "learning_rate": 8.003862276859309e-05, "loss": 1.9574, "step": 25480 }, { "epoch": 0.6003014459987753, "grad_norm": 1.9603990316390991, "learning_rate": 7.999152183128444e-05, "loss": 2.1414, "step": 25490 }, { "epoch": 0.6005369506853186, "grad_norm": 1.8929415941238403, "learning_rate": 7.994442089397579e-05, "loss": 2.0685, "step": 25500 }, { "epoch": 0.6007724553718619, "grad_norm": 2.2865395545959473, "learning_rate": 7.989731995666714e-05, "loss": 1.9385, "step": 25510 }, { "epoch": 0.6010079600584052, "grad_norm": 1.7141823768615723, "learning_rate": 7.985021901935848e-05, "loss": 1.9145, "step": 25520 }, { "epoch": 0.6012434647449484, "grad_norm": 2.2416176795959473, "learning_rate": 7.980311808204983e-05, "loss": 1.9598, "step": 25530 }, { "epoch": 0.6014789694314917, "grad_norm": 1.8684344291687012, "learning_rate": 7.975601714474118e-05, "loss": 2.0327, "step": 25540 }, { "epoch": 0.6017144741180349, "grad_norm": 2.3493103981018066, "learning_rate": 7.970891620743253e-05, "loss": 1.9675, "step": 25550 }, { "epoch": 0.6019499788045782, "grad_norm": 2.669125556945801, "learning_rate": 7.966181527012388e-05, "loss": 2.1429, "step": 25560 }, { "epoch": 0.6021854834911214, "grad_norm": 2.1872849464416504, "learning_rate": 7.961471433281523e-05, "loss": 2.1035, "step": 25570 }, { "epoch": 0.6024209881776648, "grad_norm": 2.5659477710723877, "learning_rate": 7.956761339550658e-05, "loss": 2.1668, "step": 25580 }, { "epoch": 0.602656492864208, "grad_norm": 1.815410852432251, "learning_rate": 7.952051245819793e-05, "loss": 1.9685, "step": 25590 }, { "epoch": 0.6028919975507513, "grad_norm": 2.178764820098877, "learning_rate": 7.947341152088927e-05, "loss": 2.0261, "step": 25600 }, { "epoch": 0.6031275022372945, "grad_norm": 2.0727059841156006, "learning_rate": 7.942631058358062e-05, "loss": 2.3205, "step": 25610 }, { "epoch": 0.6033630069238378, "grad_norm": 2.4925308227539062, "learning_rate": 7.937920964627197e-05, "loss": 1.9224, "step": 25620 }, { "epoch": 0.603598511610381, "grad_norm": 2.7875311374664307, "learning_rate": 7.933210870896332e-05, "loss": 2.1901, "step": 25630 }, { "epoch": 0.6038340162969243, "grad_norm": 2.135871648788452, "learning_rate": 7.928500777165467e-05, "loss": 2.0171, "step": 25640 }, { "epoch": 0.6040695209834676, "grad_norm": 2.568267345428467, "learning_rate": 7.9237906834346e-05, "loss": 2.2376, "step": 25650 }, { "epoch": 0.6043050256700109, "grad_norm": 2.8752918243408203, "learning_rate": 7.919080589703736e-05, "loss": 2.0801, "step": 25660 }, { "epoch": 0.6045405303565541, "grad_norm": 2.950716257095337, "learning_rate": 7.91437049597287e-05, "loss": 2.1433, "step": 25670 }, { "epoch": 0.6047760350430974, "grad_norm": 1.806060791015625, "learning_rate": 7.909660402242006e-05, "loss": 2.0425, "step": 25680 }, { "epoch": 0.6050115397296406, "grad_norm": 2.009774923324585, "learning_rate": 7.904950308511139e-05, "loss": 1.9264, "step": 25690 }, { "epoch": 0.6052470444161839, "grad_norm": 2.4653844833374023, "learning_rate": 7.900240214780274e-05, "loss": 2.2729, "step": 25700 }, { "epoch": 0.6054825491027271, "grad_norm": 2.0661404132843018, "learning_rate": 7.895530121049409e-05, "loss": 1.8604, "step": 25710 }, { "epoch": 0.6057180537892705, "grad_norm": 2.1067216396331787, "learning_rate": 7.890820027318544e-05, "loss": 1.918, "step": 25720 }, { "epoch": 0.6059535584758137, "grad_norm": 1.9731528759002686, "learning_rate": 7.88610993358768e-05, "loss": 2.0417, "step": 25730 }, { "epoch": 0.6061890631623569, "grad_norm": 2.0043580532073975, "learning_rate": 7.881399839856813e-05, "loss": 1.8275, "step": 25740 }, { "epoch": 0.6064245678489002, "grad_norm": 2.3526785373687744, "learning_rate": 7.876689746125948e-05, "loss": 1.9985, "step": 25750 }, { "epoch": 0.6066600725354434, "grad_norm": 2.168198585510254, "learning_rate": 7.871979652395083e-05, "loss": 1.9785, "step": 25760 }, { "epoch": 0.6068955772219867, "grad_norm": 1.7501553297042847, "learning_rate": 7.867269558664218e-05, "loss": 2.0646, "step": 25770 }, { "epoch": 0.6071310819085299, "grad_norm": 2.0135562419891357, "learning_rate": 7.862559464933352e-05, "loss": 2.152, "step": 25780 }, { "epoch": 0.6073665865950733, "grad_norm": 1.9424461126327515, "learning_rate": 7.857849371202487e-05, "loss": 2.0319, "step": 25790 }, { "epoch": 0.6076020912816165, "grad_norm": 2.0011091232299805, "learning_rate": 7.853139277471622e-05, "loss": 1.9419, "step": 25800 }, { "epoch": 0.6078375959681598, "grad_norm": 1.9479938745498657, "learning_rate": 7.848429183740757e-05, "loss": 2.0519, "step": 25810 }, { "epoch": 0.608073100654703, "grad_norm": 2.629399538040161, "learning_rate": 7.84371909000989e-05, "loss": 1.8832, "step": 25820 }, { "epoch": 0.6083086053412463, "grad_norm": 2.2955732345581055, "learning_rate": 7.839008996279026e-05, "loss": 1.8042, "step": 25830 }, { "epoch": 0.6085441100277895, "grad_norm": 2.8424713611602783, "learning_rate": 7.83429890254816e-05, "loss": 2.4232, "step": 25840 }, { "epoch": 0.6087796147143328, "grad_norm": 2.084066152572632, "learning_rate": 7.829588808817296e-05, "loss": 2.0076, "step": 25850 }, { "epoch": 0.609015119400876, "grad_norm": 2.988767623901367, "learning_rate": 7.82487871508643e-05, "loss": 1.9684, "step": 25860 }, { "epoch": 0.6092506240874194, "grad_norm": 2.3572447299957275, "learning_rate": 7.820168621355566e-05, "loss": 1.9121, "step": 25870 }, { "epoch": 0.6094861287739626, "grad_norm": 1.948307752609253, "learning_rate": 7.8154585276247e-05, "loss": 1.6575, "step": 25880 }, { "epoch": 0.6097216334605059, "grad_norm": 4.080255031585693, "learning_rate": 7.810748433893836e-05, "loss": 2.0366, "step": 25890 }, { "epoch": 0.6099571381470491, "grad_norm": 2.5627658367156982, "learning_rate": 7.80603834016297e-05, "loss": 1.7901, "step": 25900 }, { "epoch": 0.6101926428335924, "grad_norm": 1.9693503379821777, "learning_rate": 7.801328246432104e-05, "loss": 1.9767, "step": 25910 }, { "epoch": 0.6104281475201356, "grad_norm": 2.0714879035949707, "learning_rate": 7.79661815270124e-05, "loss": 2.0898, "step": 25920 }, { "epoch": 0.610663652206679, "grad_norm": 2.8542380332946777, "learning_rate": 7.791908058970374e-05, "loss": 2.2137, "step": 25930 }, { "epoch": 0.6108991568932222, "grad_norm": 2.3239822387695312, "learning_rate": 7.78719796523951e-05, "loss": 1.8162, "step": 25940 }, { "epoch": 0.6111346615797655, "grad_norm": 1.9721574783325195, "learning_rate": 7.782487871508643e-05, "loss": 2.1246, "step": 25950 }, { "epoch": 0.6113701662663087, "grad_norm": 2.5218284130096436, "learning_rate": 7.777777777777778e-05, "loss": 1.9872, "step": 25960 }, { "epoch": 0.611605670952852, "grad_norm": 1.9830020666122437, "learning_rate": 7.773067684046913e-05, "loss": 1.8981, "step": 25970 }, { "epoch": 0.6118411756393952, "grad_norm": 2.9052417278289795, "learning_rate": 7.768357590316048e-05, "loss": 1.961, "step": 25980 }, { "epoch": 0.6120766803259385, "grad_norm": 2.380767583847046, "learning_rate": 7.763647496585182e-05, "loss": 2.0117, "step": 25990 }, { "epoch": 0.6123121850124817, "grad_norm": 2.563429117202759, "learning_rate": 7.758937402854317e-05, "loss": 2.0541, "step": 26000 }, { "epoch": 0.612547689699025, "grad_norm": 2.067661762237549, "learning_rate": 7.754227309123452e-05, "loss": 1.9734, "step": 26010 }, { "epoch": 0.6127831943855683, "grad_norm": 2.019895315170288, "learning_rate": 7.749517215392587e-05, "loss": 2.2234, "step": 26020 }, { "epoch": 0.6130186990721115, "grad_norm": 2.9683279991149902, "learning_rate": 7.74480712166172e-05, "loss": 1.9863, "step": 26030 }, { "epoch": 0.6132542037586548, "grad_norm": 2.322253465652466, "learning_rate": 7.740097027930856e-05, "loss": 2.0671, "step": 26040 }, { "epoch": 0.613489708445198, "grad_norm": 2.235363483428955, "learning_rate": 7.73538693419999e-05, "loss": 1.9467, "step": 26050 }, { "epoch": 0.6137252131317413, "grad_norm": 2.408618927001953, "learning_rate": 7.730676840469126e-05, "loss": 2.1104, "step": 26060 }, { "epoch": 0.6139607178182845, "grad_norm": 2.392561912536621, "learning_rate": 7.725966746738261e-05, "loss": 1.8468, "step": 26070 }, { "epoch": 0.6141962225048279, "grad_norm": 2.707282781600952, "learning_rate": 7.721256653007394e-05, "loss": 2.0011, "step": 26080 }, { "epoch": 0.6144317271913711, "grad_norm": 2.337557792663574, "learning_rate": 7.71654655927653e-05, "loss": 2.026, "step": 26090 }, { "epoch": 0.6146672318779144, "grad_norm": 2.9671690464019775, "learning_rate": 7.711836465545664e-05, "loss": 1.965, "step": 26100 }, { "epoch": 0.6149027365644576, "grad_norm": 1.8119666576385498, "learning_rate": 7.7071263718148e-05, "loss": 2.0404, "step": 26110 }, { "epoch": 0.6151382412510009, "grad_norm": 2.940488576889038, "learning_rate": 7.702416278083933e-05, "loss": 1.7759, "step": 26120 }, { "epoch": 0.6153737459375441, "grad_norm": 2.25976300239563, "learning_rate": 7.697706184353068e-05, "loss": 1.7815, "step": 26130 }, { "epoch": 0.6156092506240874, "grad_norm": 2.183560371398926, "learning_rate": 7.692996090622205e-05, "loss": 1.8272, "step": 26140 }, { "epoch": 0.6158447553106307, "grad_norm": 2.320969820022583, "learning_rate": 7.68828599689134e-05, "loss": 2.1829, "step": 26150 }, { "epoch": 0.616080259997174, "grad_norm": 2.730713129043579, "learning_rate": 7.683575903160473e-05, "loss": 1.908, "step": 26160 }, { "epoch": 0.6163157646837172, "grad_norm": 2.4461238384246826, "learning_rate": 7.678865809429608e-05, "loss": 2.1768, "step": 26170 }, { "epoch": 0.6165512693702605, "grad_norm": 2.157590866088867, "learning_rate": 7.674155715698743e-05, "loss": 2.1194, "step": 26180 }, { "epoch": 0.6167867740568037, "grad_norm": 2.2713019847869873, "learning_rate": 7.669445621967878e-05, "loss": 2.0888, "step": 26190 }, { "epoch": 0.617022278743347, "grad_norm": 2.798720598220825, "learning_rate": 7.664735528237012e-05, "loss": 2.0496, "step": 26200 }, { "epoch": 0.6172577834298902, "grad_norm": 2.4818081855773926, "learning_rate": 7.660025434506147e-05, "loss": 2.2425, "step": 26210 }, { "epoch": 0.6174932881164336, "grad_norm": 2.5907058715820312, "learning_rate": 7.655315340775282e-05, "loss": 1.9915, "step": 26220 }, { "epoch": 0.6177287928029768, "grad_norm": 3.659034490585327, "learning_rate": 7.650605247044417e-05, "loss": 1.9745, "step": 26230 }, { "epoch": 0.6179642974895201, "grad_norm": 1.8911350965499878, "learning_rate": 7.645895153313552e-05, "loss": 2.02, "step": 26240 }, { "epoch": 0.6181998021760633, "grad_norm": 2.086848020553589, "learning_rate": 7.641185059582686e-05, "loss": 2.0886, "step": 26250 }, { "epoch": 0.6184353068626066, "grad_norm": 2.335862636566162, "learning_rate": 7.636474965851821e-05, "loss": 2.13, "step": 26260 }, { "epoch": 0.6186708115491498, "grad_norm": 3.3991475105285645, "learning_rate": 7.631764872120956e-05, "loss": 2.0224, "step": 26270 }, { "epoch": 0.6189063162356931, "grad_norm": 2.554853677749634, "learning_rate": 7.627054778390091e-05, "loss": 2.0263, "step": 26280 }, { "epoch": 0.6191418209222364, "grad_norm": 1.8681471347808838, "learning_rate": 7.622344684659224e-05, "loss": 1.8256, "step": 26290 }, { "epoch": 0.6193773256087796, "grad_norm": 2.5911004543304443, "learning_rate": 7.61763459092836e-05, "loss": 2.1467, "step": 26300 }, { "epoch": 0.6196128302953229, "grad_norm": 2.3044159412384033, "learning_rate": 7.612924497197495e-05, "loss": 1.8694, "step": 26310 }, { "epoch": 0.6198483349818661, "grad_norm": 2.6165413856506348, "learning_rate": 7.60821440346663e-05, "loss": 1.9561, "step": 26320 }, { "epoch": 0.6200838396684094, "grad_norm": 2.0098507404327393, "learning_rate": 7.603504309735763e-05, "loss": 2.0378, "step": 26330 }, { "epoch": 0.6203193443549526, "grad_norm": 3.309293508529663, "learning_rate": 7.598794216004898e-05, "loss": 1.8842, "step": 26340 }, { "epoch": 0.6205548490414959, "grad_norm": 2.2279319763183594, "learning_rate": 7.594084122274033e-05, "loss": 2.1463, "step": 26350 }, { "epoch": 0.6207903537280391, "grad_norm": 1.7420648336410522, "learning_rate": 7.589374028543168e-05, "loss": 2.033, "step": 26360 }, { "epoch": 0.6210258584145825, "grad_norm": 2.068329334259033, "learning_rate": 7.584663934812303e-05, "loss": 2.0667, "step": 26370 }, { "epoch": 0.6212613631011257, "grad_norm": 2.148442506790161, "learning_rate": 7.579953841081437e-05, "loss": 2.0649, "step": 26380 }, { "epoch": 0.621496867787669, "grad_norm": 2.486476421356201, "learning_rate": 7.575243747350572e-05, "loss": 2.0631, "step": 26390 }, { "epoch": 0.6217323724742122, "grad_norm": 2.4887843132019043, "learning_rate": 7.570533653619707e-05, "loss": 1.7455, "step": 26400 }, { "epoch": 0.6219678771607555, "grad_norm": 1.8628402948379517, "learning_rate": 7.565823559888842e-05, "loss": 2.0665, "step": 26410 }, { "epoch": 0.6222033818472987, "grad_norm": 1.9637489318847656, "learning_rate": 7.561113466157977e-05, "loss": 1.9645, "step": 26420 }, { "epoch": 0.622438886533842, "grad_norm": 2.734393835067749, "learning_rate": 7.556403372427112e-05, "loss": 2.0079, "step": 26430 }, { "epoch": 0.6226743912203853, "grad_norm": 2.7208571434020996, "learning_rate": 7.551693278696247e-05, "loss": 2.0066, "step": 26440 }, { "epoch": 0.6229098959069286, "grad_norm": 2.311509132385254, "learning_rate": 7.546983184965382e-05, "loss": 2.0782, "step": 26450 }, { "epoch": 0.6231454005934718, "grad_norm": 2.43361759185791, "learning_rate": 7.542273091234516e-05, "loss": 2.1844, "step": 26460 }, { "epoch": 0.6233809052800151, "grad_norm": 2.6297848224639893, "learning_rate": 7.537562997503651e-05, "loss": 1.9422, "step": 26470 }, { "epoch": 0.6236164099665583, "grad_norm": 1.997184157371521, "learning_rate": 7.532852903772786e-05, "loss": 2.1683, "step": 26480 }, { "epoch": 0.6238519146531016, "grad_norm": 1.8917272090911865, "learning_rate": 7.528142810041921e-05, "loss": 1.9385, "step": 26490 }, { "epoch": 0.6240874193396448, "grad_norm": 2.0081610679626465, "learning_rate": 7.523432716311055e-05, "loss": 1.9071, "step": 26500 }, { "epoch": 0.6243229240261882, "grad_norm": 2.179267644882202, "learning_rate": 7.51872262258019e-05, "loss": 2.1131, "step": 26510 }, { "epoch": 0.6245584287127314, "grad_norm": 1.9860879182815552, "learning_rate": 7.514012528849325e-05, "loss": 2.3245, "step": 26520 }, { "epoch": 0.6247939333992747, "grad_norm": 1.9527974128723145, "learning_rate": 7.50930243511846e-05, "loss": 1.9865, "step": 26530 }, { "epoch": 0.6250294380858179, "grad_norm": 2.507073402404785, "learning_rate": 7.504592341387595e-05, "loss": 1.8888, "step": 26540 }, { "epoch": 0.6252649427723612, "grad_norm": 2.8235909938812256, "learning_rate": 7.499882247656728e-05, "loss": 2.0307, "step": 26550 }, { "epoch": 0.6255004474589044, "grad_norm": 3.5919525623321533, "learning_rate": 7.495172153925863e-05, "loss": 2.1907, "step": 26560 }, { "epoch": 0.6257359521454477, "grad_norm": 1.6083260774612427, "learning_rate": 7.490462060194998e-05, "loss": 1.8715, "step": 26570 }, { "epoch": 0.625971456831991, "grad_norm": 2.9224612712860107, "learning_rate": 7.485751966464133e-05, "loss": 2.1417, "step": 26580 }, { "epoch": 0.6262069615185342, "grad_norm": 2.0312814712524414, "learning_rate": 7.481041872733267e-05, "loss": 1.9067, "step": 26590 }, { "epoch": 0.6264424662050775, "grad_norm": 2.3932294845581055, "learning_rate": 7.476331779002402e-05, "loss": 2.0652, "step": 26600 }, { "epoch": 0.6266779708916207, "grad_norm": 2.058734893798828, "learning_rate": 7.471621685271537e-05, "loss": 2.1212, "step": 26610 }, { "epoch": 0.626913475578164, "grad_norm": 2.9603049755096436, "learning_rate": 7.466911591540672e-05, "loss": 1.917, "step": 26620 }, { "epoch": 0.6271489802647072, "grad_norm": 2.741225004196167, "learning_rate": 7.462201497809806e-05, "loss": 2.095, "step": 26630 }, { "epoch": 0.6273844849512505, "grad_norm": 2.8252007961273193, "learning_rate": 7.457491404078941e-05, "loss": 2.045, "step": 26640 }, { "epoch": 0.6276199896377938, "grad_norm": 2.179786443710327, "learning_rate": 7.452781310348076e-05, "loss": 2.0414, "step": 26650 }, { "epoch": 0.6278554943243371, "grad_norm": 2.8023617267608643, "learning_rate": 7.448071216617211e-05, "loss": 2.2393, "step": 26660 }, { "epoch": 0.6280909990108803, "grad_norm": 1.851933240890503, "learning_rate": 7.443361122886346e-05, "loss": 2.1044, "step": 26670 }, { "epoch": 0.6283265036974236, "grad_norm": 2.2054028511047363, "learning_rate": 7.43865102915548e-05, "loss": 2.328, "step": 26680 }, { "epoch": 0.6285620083839668, "grad_norm": 2.3730199337005615, "learning_rate": 7.433940935424615e-05, "loss": 1.9667, "step": 26690 }, { "epoch": 0.6287975130705101, "grad_norm": 2.3404037952423096, "learning_rate": 7.429230841693751e-05, "loss": 2.0277, "step": 26700 }, { "epoch": 0.6290330177570533, "grad_norm": 2.6131813526153564, "learning_rate": 7.424520747962885e-05, "loss": 2.0922, "step": 26710 }, { "epoch": 0.6292685224435967, "grad_norm": 1.9105520248413086, "learning_rate": 7.41981065423202e-05, "loss": 2.2015, "step": 26720 }, { "epoch": 0.6295040271301399, "grad_norm": 2.223731517791748, "learning_rate": 7.415100560501155e-05, "loss": 1.8634, "step": 26730 }, { "epoch": 0.6297395318166832, "grad_norm": 2.680253028869629, "learning_rate": 7.41039046677029e-05, "loss": 2.0635, "step": 26740 }, { "epoch": 0.6299750365032264, "grad_norm": 3.308995008468628, "learning_rate": 7.405680373039425e-05, "loss": 1.8926, "step": 26750 }, { "epoch": 0.6302105411897697, "grad_norm": 2.192030668258667, "learning_rate": 7.400970279308558e-05, "loss": 1.7986, "step": 26760 }, { "epoch": 0.6304460458763129, "grad_norm": 2.320146322250366, "learning_rate": 7.396260185577693e-05, "loss": 2.1723, "step": 26770 }, { "epoch": 0.6306815505628562, "grad_norm": 2.5888264179229736, "learning_rate": 7.391550091846828e-05, "loss": 1.9864, "step": 26780 }, { "epoch": 0.6309170552493994, "grad_norm": 2.5209715366363525, "learning_rate": 7.386839998115963e-05, "loss": 2.0133, "step": 26790 }, { "epoch": 0.6311525599359428, "grad_norm": 2.5114974975585938, "learning_rate": 7.382129904385097e-05, "loss": 2.0248, "step": 26800 }, { "epoch": 0.631388064622486, "grad_norm": 1.4139314889907837, "learning_rate": 7.377419810654232e-05, "loss": 1.7762, "step": 26810 }, { "epoch": 0.6316235693090293, "grad_norm": 2.1081104278564453, "learning_rate": 7.372709716923367e-05, "loss": 1.9416, "step": 26820 }, { "epoch": 0.6318590739955725, "grad_norm": 2.1463840007781982, "learning_rate": 7.367999623192502e-05, "loss": 1.8741, "step": 26830 }, { "epoch": 0.6320945786821158, "grad_norm": 2.403310775756836, "learning_rate": 7.363289529461637e-05, "loss": 1.974, "step": 26840 }, { "epoch": 0.632330083368659, "grad_norm": 2.203840732574463, "learning_rate": 7.358579435730771e-05, "loss": 1.9, "step": 26850 }, { "epoch": 0.6325655880552022, "grad_norm": 2.0691256523132324, "learning_rate": 7.353869341999906e-05, "loss": 1.9828, "step": 26860 }, { "epoch": 0.6328010927417456, "grad_norm": 1.5303398370742798, "learning_rate": 7.349159248269041e-05, "loss": 1.8315, "step": 26870 }, { "epoch": 0.6330365974282888, "grad_norm": 2.035423994064331, "learning_rate": 7.344449154538176e-05, "loss": 2.2133, "step": 26880 }, { "epoch": 0.6332721021148321, "grad_norm": 2.0543622970581055, "learning_rate": 7.33973906080731e-05, "loss": 1.8439, "step": 26890 }, { "epoch": 0.6335076068013753, "grad_norm": 2.3338544368743896, "learning_rate": 7.335028967076445e-05, "loss": 2.1695, "step": 26900 }, { "epoch": 0.6337431114879186, "grad_norm": 2.5324740409851074, "learning_rate": 7.33031887334558e-05, "loss": 2.1254, "step": 26910 }, { "epoch": 0.6339786161744618, "grad_norm": 2.218935489654541, "learning_rate": 7.325608779614715e-05, "loss": 2.0474, "step": 26920 }, { "epoch": 0.6342141208610051, "grad_norm": 2.561553478240967, "learning_rate": 7.320898685883848e-05, "loss": 2.3904, "step": 26930 }, { "epoch": 0.6344496255475484, "grad_norm": 2.866525411605835, "learning_rate": 7.316188592152983e-05, "loss": 2.0126, "step": 26940 }, { "epoch": 0.6346851302340917, "grad_norm": 1.68939208984375, "learning_rate": 7.311478498422118e-05, "loss": 2.0345, "step": 26950 }, { "epoch": 0.6349206349206349, "grad_norm": 2.2209982872009277, "learning_rate": 7.306768404691253e-05, "loss": 2.2129, "step": 26960 }, { "epoch": 0.6351561396071782, "grad_norm": 2.152632713317871, "learning_rate": 7.302058310960389e-05, "loss": 2.0254, "step": 26970 }, { "epoch": 0.6353916442937214, "grad_norm": 2.0267491340637207, "learning_rate": 7.297348217229522e-05, "loss": 2.1108, "step": 26980 }, { "epoch": 0.6356271489802647, "grad_norm": 1.9867489337921143, "learning_rate": 7.292638123498659e-05, "loss": 2.1763, "step": 26990 }, { "epoch": 0.6358626536668079, "grad_norm": 3.316333532333374, "learning_rate": 7.287928029767794e-05, "loss": 2.1017, "step": 27000 }, { "epoch": 0.6360981583533513, "grad_norm": 1.8042023181915283, "learning_rate": 7.283217936036927e-05, "loss": 2.1005, "step": 27010 }, { "epoch": 0.6363336630398945, "grad_norm": 2.227996587753296, "learning_rate": 7.278507842306062e-05, "loss": 2.1378, "step": 27020 }, { "epoch": 0.6365691677264378, "grad_norm": 2.519883155822754, "learning_rate": 7.273797748575197e-05, "loss": 1.8913, "step": 27030 }, { "epoch": 0.636804672412981, "grad_norm": 1.6812520027160645, "learning_rate": 7.269087654844332e-05, "loss": 1.8019, "step": 27040 }, { "epoch": 0.6370401770995243, "grad_norm": 3.9242050647735596, "learning_rate": 7.264377561113467e-05, "loss": 2.3015, "step": 27050 }, { "epoch": 0.6372756817860675, "grad_norm": 1.8133604526519775, "learning_rate": 7.259667467382601e-05, "loss": 1.9485, "step": 27060 }, { "epoch": 0.6375111864726108, "grad_norm": 2.5527639389038086, "learning_rate": 7.254957373651736e-05, "loss": 2.0823, "step": 27070 }, { "epoch": 0.637746691159154, "grad_norm": 2.00186824798584, "learning_rate": 7.250247279920871e-05, "loss": 2.0701, "step": 27080 }, { "epoch": 0.6379821958456974, "grad_norm": 2.2295942306518555, "learning_rate": 7.245537186190006e-05, "loss": 2.0213, "step": 27090 }, { "epoch": 0.6382177005322406, "grad_norm": 2.3956990242004395, "learning_rate": 7.24082709245914e-05, "loss": 1.8728, "step": 27100 }, { "epoch": 0.6384532052187839, "grad_norm": 2.1132164001464844, "learning_rate": 7.236116998728275e-05, "loss": 1.8988, "step": 27110 }, { "epoch": 0.6386887099053271, "grad_norm": 2.5249156951904297, "learning_rate": 7.23140690499741e-05, "loss": 2.051, "step": 27120 }, { "epoch": 0.6389242145918704, "grad_norm": 2.1515731811523438, "learning_rate": 7.226696811266545e-05, "loss": 1.8113, "step": 27130 }, { "epoch": 0.6391597192784136, "grad_norm": 1.9574979543685913, "learning_rate": 7.22198671753568e-05, "loss": 2.2399, "step": 27140 }, { "epoch": 0.6393952239649568, "grad_norm": 2.0978519916534424, "learning_rate": 7.217276623804814e-05, "loss": 2.333, "step": 27150 }, { "epoch": 0.6396307286515002, "grad_norm": 1.8691319227218628, "learning_rate": 7.212566530073949e-05, "loss": 2.0774, "step": 27160 }, { "epoch": 0.6398662333380434, "grad_norm": 1.9516493082046509, "learning_rate": 7.207856436343084e-05, "loss": 2.2324, "step": 27170 }, { "epoch": 0.6401017380245867, "grad_norm": 1.61760675907135, "learning_rate": 7.203146342612219e-05, "loss": 2.127, "step": 27180 }, { "epoch": 0.6403372427111299, "grad_norm": 1.9777883291244507, "learning_rate": 7.198436248881352e-05, "loss": 2.0067, "step": 27190 }, { "epoch": 0.6405727473976732, "grad_norm": 2.270724058151245, "learning_rate": 7.193726155150487e-05, "loss": 1.8146, "step": 27200 }, { "epoch": 0.6408082520842164, "grad_norm": 2.2753586769104004, "learning_rate": 7.189016061419622e-05, "loss": 2.1406, "step": 27210 }, { "epoch": 0.6410437567707598, "grad_norm": 2.114849328994751, "learning_rate": 7.184305967688757e-05, "loss": 1.9977, "step": 27220 }, { "epoch": 0.641279261457303, "grad_norm": 2.480898857116699, "learning_rate": 7.179595873957891e-05, "loss": 1.8545, "step": 27230 }, { "epoch": 0.6415147661438463, "grad_norm": 2.312798500061035, "learning_rate": 7.174885780227026e-05, "loss": 2.0963, "step": 27240 }, { "epoch": 0.6417502708303895, "grad_norm": 2.0719785690307617, "learning_rate": 7.170175686496161e-05, "loss": 2.0581, "step": 27250 }, { "epoch": 0.6419857755169328, "grad_norm": 2.2538905143737793, "learning_rate": 7.165465592765296e-05, "loss": 1.9055, "step": 27260 }, { "epoch": 0.642221280203476, "grad_norm": 2.6148452758789062, "learning_rate": 7.160755499034431e-05, "loss": 2.2561, "step": 27270 }, { "epoch": 0.6424567848900193, "grad_norm": 1.9259710311889648, "learning_rate": 7.156045405303566e-05, "loss": 2.2007, "step": 27280 }, { "epoch": 0.6426922895765625, "grad_norm": 1.789009690284729, "learning_rate": 7.151335311572701e-05, "loss": 1.9831, "step": 27290 }, { "epoch": 0.6429277942631059, "grad_norm": 3.458051919937134, "learning_rate": 7.146625217841836e-05, "loss": 2.0307, "step": 27300 }, { "epoch": 0.6431632989496491, "grad_norm": 2.757291078567505, "learning_rate": 7.14191512411097e-05, "loss": 2.1219, "step": 27310 }, { "epoch": 0.6433988036361924, "grad_norm": 1.8680845499038696, "learning_rate": 7.137205030380105e-05, "loss": 2.0296, "step": 27320 }, { "epoch": 0.6436343083227356, "grad_norm": 2.32859468460083, "learning_rate": 7.13249493664924e-05, "loss": 2.1007, "step": 27330 }, { "epoch": 0.6438698130092789, "grad_norm": 2.07627272605896, "learning_rate": 7.127784842918375e-05, "loss": 1.9358, "step": 27340 }, { "epoch": 0.6441053176958221, "grad_norm": 2.0837743282318115, "learning_rate": 7.12307474918751e-05, "loss": 1.8441, "step": 27350 }, { "epoch": 0.6443408223823655, "grad_norm": 2.417036771774292, "learning_rate": 7.118364655456644e-05, "loss": 1.9943, "step": 27360 }, { "epoch": 0.6445763270689087, "grad_norm": 2.3873021602630615, "learning_rate": 7.113654561725779e-05, "loss": 2.0055, "step": 27370 }, { "epoch": 0.644811831755452, "grad_norm": 1.9441145658493042, "learning_rate": 7.108944467994914e-05, "loss": 2.0131, "step": 27380 }, { "epoch": 0.6450473364419952, "grad_norm": 2.0448830127716064, "learning_rate": 7.104234374264049e-05, "loss": 2.2472, "step": 27390 }, { "epoch": 0.6452828411285385, "grad_norm": 2.079862594604492, "learning_rate": 7.099524280533182e-05, "loss": 1.8943, "step": 27400 }, { "epoch": 0.6455183458150817, "grad_norm": 3.530986785888672, "learning_rate": 7.094814186802317e-05, "loss": 2.0522, "step": 27410 }, { "epoch": 0.645753850501625, "grad_norm": 2.296212673187256, "learning_rate": 7.090104093071452e-05, "loss": 1.7455, "step": 27420 }, { "epoch": 0.6459893551881682, "grad_norm": 2.3414154052734375, "learning_rate": 7.085393999340587e-05, "loss": 2.0542, "step": 27430 }, { "epoch": 0.6462248598747115, "grad_norm": 2.2467596530914307, "learning_rate": 7.080683905609722e-05, "loss": 2.0408, "step": 27440 }, { "epoch": 0.6464603645612548, "grad_norm": 1.901523232460022, "learning_rate": 7.075973811878856e-05, "loss": 2.0676, "step": 27450 }, { "epoch": 0.646695869247798, "grad_norm": 2.3312385082244873, "learning_rate": 7.071263718147991e-05, "loss": 2.1138, "step": 27460 }, { "epoch": 0.6469313739343413, "grad_norm": 2.5286500453948975, "learning_rate": 7.066553624417126e-05, "loss": 2.1407, "step": 27470 }, { "epoch": 0.6471668786208845, "grad_norm": 1.9842777252197266, "learning_rate": 7.062314540059347e-05, "loss": 1.928, "step": 27480 }, { "epoch": 0.6474023833074278, "grad_norm": 2.1205496788024902, "learning_rate": 7.057604446328482e-05, "loss": 2.0597, "step": 27490 }, { "epoch": 0.647637887993971, "grad_norm": 2.428333044052124, "learning_rate": 7.052894352597617e-05, "loss": 2.0553, "step": 27500 }, { "epoch": 0.6478733926805144, "grad_norm": 2.098172664642334, "learning_rate": 7.048184258866752e-05, "loss": 2.135, "step": 27510 }, { "epoch": 0.6481088973670576, "grad_norm": 2.455638885498047, "learning_rate": 7.043474165135887e-05, "loss": 2.1021, "step": 27520 }, { "epoch": 0.6483444020536009, "grad_norm": 1.9910706281661987, "learning_rate": 7.038764071405022e-05, "loss": 2.0287, "step": 27530 }, { "epoch": 0.6485799067401441, "grad_norm": 1.8840402364730835, "learning_rate": 7.034053977674157e-05, "loss": 2.1737, "step": 27540 }, { "epoch": 0.6488154114266874, "grad_norm": 2.0080809593200684, "learning_rate": 7.029343883943292e-05, "loss": 1.8489, "step": 27550 }, { "epoch": 0.6490509161132306, "grad_norm": 2.091917037963867, "learning_rate": 7.024633790212425e-05, "loss": 2.0131, "step": 27560 }, { "epoch": 0.649286420799774, "grad_norm": 1.8030489683151245, "learning_rate": 7.01992369648156e-05, "loss": 2.0919, "step": 27570 }, { "epoch": 0.6495219254863172, "grad_norm": 1.896256923675537, "learning_rate": 7.015213602750696e-05, "loss": 2.0142, "step": 27580 }, { "epoch": 0.6497574301728605, "grad_norm": 1.7844651937484741, "learning_rate": 7.01050350901983e-05, "loss": 2.2077, "step": 27590 }, { "epoch": 0.6499929348594037, "grad_norm": 2.182560920715332, "learning_rate": 7.005793415288964e-05, "loss": 1.7973, "step": 27600 }, { "epoch": 0.650228439545947, "grad_norm": 2.0249404907226562, "learning_rate": 7.001083321558099e-05, "loss": 2.1121, "step": 27610 }, { "epoch": 0.6504639442324902, "grad_norm": 2.052584409713745, "learning_rate": 6.996373227827234e-05, "loss": 2.2913, "step": 27620 }, { "epoch": 0.6506994489190335, "grad_norm": 1.8792239427566528, "learning_rate": 6.991663134096369e-05, "loss": 2.059, "step": 27630 }, { "epoch": 0.6509349536055767, "grad_norm": 2.087036371231079, "learning_rate": 6.986953040365504e-05, "loss": 2.0064, "step": 27640 }, { "epoch": 0.6511704582921201, "grad_norm": 2.4446024894714355, "learning_rate": 6.982242946634638e-05, "loss": 2.0678, "step": 27650 }, { "epoch": 0.6514059629786633, "grad_norm": 2.256411552429199, "learning_rate": 6.977532852903773e-05, "loss": 1.9454, "step": 27660 }, { "epoch": 0.6516414676652066, "grad_norm": 2.468778610229492, "learning_rate": 6.972822759172908e-05, "loss": 2.1624, "step": 27670 }, { "epoch": 0.6518769723517498, "grad_norm": 2.614762544631958, "learning_rate": 6.968112665442043e-05, "loss": 2.1327, "step": 27680 }, { "epoch": 0.6521124770382931, "grad_norm": 1.6540695428848267, "learning_rate": 6.963402571711177e-05, "loss": 1.7609, "step": 27690 }, { "epoch": 0.6523479817248363, "grad_norm": 1.6981443166732788, "learning_rate": 6.958692477980312e-05, "loss": 1.9458, "step": 27700 }, { "epoch": 0.6525834864113795, "grad_norm": 2.1197662353515625, "learning_rate": 6.953982384249447e-05, "loss": 2.1603, "step": 27710 }, { "epoch": 0.6528189910979229, "grad_norm": 2.753307580947876, "learning_rate": 6.949272290518582e-05, "loss": 2.192, "step": 27720 }, { "epoch": 0.6530544957844661, "grad_norm": 1.7629789113998413, "learning_rate": 6.944562196787717e-05, "loss": 1.9333, "step": 27730 }, { "epoch": 0.6532900004710094, "grad_norm": 2.513230323791504, "learning_rate": 6.93985210305685e-05, "loss": 2.0639, "step": 27740 }, { "epoch": 0.6535255051575526, "grad_norm": 1.9997378587722778, "learning_rate": 6.935142009325986e-05, "loss": 1.8298, "step": 27750 }, { "epoch": 0.6537610098440959, "grad_norm": 2.183000087738037, "learning_rate": 6.93043191559512e-05, "loss": 1.8049, "step": 27760 }, { "epoch": 0.6539965145306391, "grad_norm": 2.316758155822754, "learning_rate": 6.925721821864256e-05, "loss": 2.2343, "step": 27770 }, { "epoch": 0.6542320192171824, "grad_norm": 2.8688292503356934, "learning_rate": 6.921011728133389e-05, "loss": 2.0771, "step": 27780 }, { "epoch": 0.6544675239037256, "grad_norm": 2.846386671066284, "learning_rate": 6.916301634402524e-05, "loss": 1.9481, "step": 27790 }, { "epoch": 0.654703028590269, "grad_norm": 2.241668462753296, "learning_rate": 6.911591540671659e-05, "loss": 2.0466, "step": 27800 }, { "epoch": 0.6549385332768122, "grad_norm": 1.862688422203064, "learning_rate": 6.906881446940796e-05, "loss": 2.0278, "step": 27810 }, { "epoch": 0.6551740379633555, "grad_norm": 2.669524908065796, "learning_rate": 6.902171353209929e-05, "loss": 1.9347, "step": 27820 }, { "epoch": 0.6554095426498987, "grad_norm": 1.9434707164764404, "learning_rate": 6.897461259479064e-05, "loss": 1.9777, "step": 27830 }, { "epoch": 0.655645047336442, "grad_norm": 1.9469919204711914, "learning_rate": 6.8927511657482e-05, "loss": 2.1351, "step": 27840 }, { "epoch": 0.6558805520229852, "grad_norm": 2.0823862552642822, "learning_rate": 6.888041072017334e-05, "loss": 2.1103, "step": 27850 }, { "epoch": 0.6561160567095286, "grad_norm": 2.011646270751953, "learning_rate": 6.883330978286468e-05, "loss": 2.0807, "step": 27860 }, { "epoch": 0.6563515613960718, "grad_norm": 2.085239887237549, "learning_rate": 6.878620884555603e-05, "loss": 2.0131, "step": 27870 }, { "epoch": 0.6565870660826151, "grad_norm": 2.445866346359253, "learning_rate": 6.873910790824738e-05, "loss": 1.9386, "step": 27880 }, { "epoch": 0.6568225707691583, "grad_norm": 2.3337929248809814, "learning_rate": 6.869200697093873e-05, "loss": 1.9866, "step": 27890 }, { "epoch": 0.6570580754557016, "grad_norm": 2.558706045150757, "learning_rate": 6.864490603363007e-05, "loss": 1.9361, "step": 27900 }, { "epoch": 0.6572935801422448, "grad_norm": 1.7763642072677612, "learning_rate": 6.859780509632142e-05, "loss": 2.147, "step": 27910 }, { "epoch": 0.6575290848287881, "grad_norm": 2.353127956390381, "learning_rate": 6.855070415901277e-05, "loss": 1.972, "step": 27920 }, { "epoch": 0.6577645895153313, "grad_norm": 2.1729345321655273, "learning_rate": 6.850360322170412e-05, "loss": 1.9824, "step": 27930 }, { "epoch": 0.6580000942018747, "grad_norm": 2.3141558170318604, "learning_rate": 6.845650228439547e-05, "loss": 1.8974, "step": 27940 }, { "epoch": 0.6582355988884179, "grad_norm": 2.9359517097473145, "learning_rate": 6.84094013470868e-05, "loss": 2.0506, "step": 27950 }, { "epoch": 0.6584711035749612, "grad_norm": 2.285006046295166, "learning_rate": 6.836230040977816e-05, "loss": 2.1571, "step": 27960 }, { "epoch": 0.6587066082615044, "grad_norm": 2.683715581893921, "learning_rate": 6.83151994724695e-05, "loss": 2.3031, "step": 27970 }, { "epoch": 0.6589421129480477, "grad_norm": 2.042071580886841, "learning_rate": 6.826809853516086e-05, "loss": 1.9662, "step": 27980 }, { "epoch": 0.6591776176345909, "grad_norm": 2.244554042816162, "learning_rate": 6.822099759785219e-05, "loss": 2.1763, "step": 27990 }, { "epoch": 0.6594131223211341, "grad_norm": 2.151461124420166, "learning_rate": 6.817389666054354e-05, "loss": 1.8774, "step": 28000 }, { "epoch": 0.6596486270076775, "grad_norm": 2.07513427734375, "learning_rate": 6.81267957232349e-05, "loss": 1.9599, "step": 28010 }, { "epoch": 0.6598841316942207, "grad_norm": 1.9893085956573486, "learning_rate": 6.807969478592624e-05, "loss": 2.1541, "step": 28020 }, { "epoch": 0.660119636380764, "grad_norm": 3.486293315887451, "learning_rate": 6.80325938486176e-05, "loss": 2.1084, "step": 28030 }, { "epoch": 0.6603551410673072, "grad_norm": 2.266904830932617, "learning_rate": 6.798549291130893e-05, "loss": 1.9884, "step": 28040 }, { "epoch": 0.6605906457538505, "grad_norm": 2.0467066764831543, "learning_rate": 6.793839197400028e-05, "loss": 2.172, "step": 28050 }, { "epoch": 0.6608261504403937, "grad_norm": 2.1317572593688965, "learning_rate": 6.789129103669163e-05, "loss": 2.1142, "step": 28060 }, { "epoch": 0.661061655126937, "grad_norm": 2.3432118892669678, "learning_rate": 6.784419009938298e-05, "loss": 1.9512, "step": 28070 }, { "epoch": 0.6612971598134803, "grad_norm": 2.4152426719665527, "learning_rate": 6.779708916207432e-05, "loss": 2.0989, "step": 28080 }, { "epoch": 0.6615326645000236, "grad_norm": 1.8867883682250977, "learning_rate": 6.774998822476568e-05, "loss": 1.9836, "step": 28090 }, { "epoch": 0.6617681691865668, "grad_norm": 2.182126998901367, "learning_rate": 6.770288728745703e-05, "loss": 1.8761, "step": 28100 }, { "epoch": 0.6620036738731101, "grad_norm": 2.5316390991210938, "learning_rate": 6.765578635014838e-05, "loss": 1.9713, "step": 28110 }, { "epoch": 0.6622391785596533, "grad_norm": 2.1727163791656494, "learning_rate": 6.760868541283972e-05, "loss": 1.8554, "step": 28120 }, { "epoch": 0.6624746832461966, "grad_norm": 3.6611173152923584, "learning_rate": 6.756158447553107e-05, "loss": 1.9843, "step": 28130 }, { "epoch": 0.6627101879327398, "grad_norm": 2.4131314754486084, "learning_rate": 6.751448353822242e-05, "loss": 2.0242, "step": 28140 }, { "epoch": 0.6629456926192832, "grad_norm": 1.8160946369171143, "learning_rate": 6.746738260091377e-05, "loss": 2.0622, "step": 28150 }, { "epoch": 0.6631811973058264, "grad_norm": 3.188413381576538, "learning_rate": 6.74202816636051e-05, "loss": 2.2114, "step": 28160 }, { "epoch": 0.6634167019923697, "grad_norm": 2.4607508182525635, "learning_rate": 6.737318072629646e-05, "loss": 2.0314, "step": 28170 }, { "epoch": 0.6636522066789129, "grad_norm": 2.2782578468322754, "learning_rate": 6.732607978898781e-05, "loss": 1.9546, "step": 28180 }, { "epoch": 0.6638877113654562, "grad_norm": 1.997504472732544, "learning_rate": 6.727897885167916e-05, "loss": 2.0726, "step": 28190 }, { "epoch": 0.6641232160519994, "grad_norm": 2.3978195190429688, "learning_rate": 6.72318779143705e-05, "loss": 2.1611, "step": 28200 }, { "epoch": 0.6643587207385427, "grad_norm": 2.00547456741333, "learning_rate": 6.718477697706184e-05, "loss": 2.0716, "step": 28210 }, { "epoch": 0.664594225425086, "grad_norm": 1.96976637840271, "learning_rate": 6.71376760397532e-05, "loss": 1.9797, "step": 28220 }, { "epoch": 0.6648297301116293, "grad_norm": 2.197422981262207, "learning_rate": 6.709057510244454e-05, "loss": 1.9887, "step": 28230 }, { "epoch": 0.6650652347981725, "grad_norm": 1.326515555381775, "learning_rate": 6.70434741651359e-05, "loss": 1.9457, "step": 28240 }, { "epoch": 0.6653007394847158, "grad_norm": 1.9585696458816528, "learning_rate": 6.699637322782723e-05, "loss": 2.1483, "step": 28250 }, { "epoch": 0.665536244171259, "grad_norm": 2.264357328414917, "learning_rate": 6.694927229051858e-05, "loss": 1.9455, "step": 28260 }, { "epoch": 0.6657717488578023, "grad_norm": 2.1170806884765625, "learning_rate": 6.690217135320993e-05, "loss": 2.1229, "step": 28270 }, { "epoch": 0.6660072535443455, "grad_norm": 2.3567802906036377, "learning_rate": 6.685507041590128e-05, "loss": 2.2026, "step": 28280 }, { "epoch": 0.6662427582308887, "grad_norm": 2.09476637840271, "learning_rate": 6.680796947859262e-05, "loss": 1.9125, "step": 28290 }, { "epoch": 0.6664782629174321, "grad_norm": 1.959568738937378, "learning_rate": 6.676086854128397e-05, "loss": 2.085, "step": 28300 }, { "epoch": 0.6667137676039753, "grad_norm": 3.108013153076172, "learning_rate": 6.671376760397532e-05, "loss": 1.9585, "step": 28310 }, { "epoch": 0.6669492722905186, "grad_norm": 2.3948521614074707, "learning_rate": 6.666666666666667e-05, "loss": 1.8998, "step": 28320 }, { "epoch": 0.6671847769770618, "grad_norm": 1.9991596937179565, "learning_rate": 6.6619565729358e-05, "loss": 1.9824, "step": 28330 }, { "epoch": 0.6674202816636051, "grad_norm": 1.759979486465454, "learning_rate": 6.657246479204936e-05, "loss": 1.9744, "step": 28340 }, { "epoch": 0.6676557863501483, "grad_norm": 2.247377634048462, "learning_rate": 6.652536385474071e-05, "loss": 2.1226, "step": 28350 }, { "epoch": 0.6678912910366916, "grad_norm": 3.3564887046813965, "learning_rate": 6.647826291743206e-05, "loss": 2.0988, "step": 28360 }, { "epoch": 0.6681267957232349, "grad_norm": 2.1177890300750732, "learning_rate": 6.643116198012341e-05, "loss": 1.6984, "step": 28370 }, { "epoch": 0.6683623004097782, "grad_norm": 2.21870756149292, "learning_rate": 6.638406104281476e-05, "loss": 2.1458, "step": 28380 }, { "epoch": 0.6685978050963214, "grad_norm": 1.8025680780410767, "learning_rate": 6.633696010550611e-05, "loss": 1.9258, "step": 28390 }, { "epoch": 0.6688333097828647, "grad_norm": 1.9146382808685303, "learning_rate": 6.628985916819746e-05, "loss": 1.879, "step": 28400 }, { "epoch": 0.6690688144694079, "grad_norm": 2.1155905723571777, "learning_rate": 6.62427582308888e-05, "loss": 2.2079, "step": 28410 }, { "epoch": 0.6693043191559512, "grad_norm": 2.081460475921631, "learning_rate": 6.619565729358015e-05, "loss": 2.0226, "step": 28420 }, { "epoch": 0.6695398238424944, "grad_norm": 2.065375328063965, "learning_rate": 6.61485563562715e-05, "loss": 2.0476, "step": 28430 }, { "epoch": 0.6697753285290378, "grad_norm": 1.6890816688537598, "learning_rate": 6.610145541896285e-05, "loss": 1.9391, "step": 28440 }, { "epoch": 0.670010833215581, "grad_norm": 2.4403939247131348, "learning_rate": 6.60543544816542e-05, "loss": 1.8528, "step": 28450 }, { "epoch": 0.6702463379021243, "grad_norm": 2.6802732944488525, "learning_rate": 6.600725354434553e-05, "loss": 2.1087, "step": 28460 }, { "epoch": 0.6704818425886675, "grad_norm": 1.8135290145874023, "learning_rate": 6.596015260703688e-05, "loss": 2.1068, "step": 28470 }, { "epoch": 0.6707173472752108, "grad_norm": 2.0007054805755615, "learning_rate": 6.591305166972823e-05, "loss": 2.113, "step": 28480 }, { "epoch": 0.670952851961754, "grad_norm": 2.349604368209839, "learning_rate": 6.586595073241958e-05, "loss": 1.7037, "step": 28490 }, { "epoch": 0.6711883566482973, "grad_norm": 2.2065269947052, "learning_rate": 6.581884979511092e-05, "loss": 2.3166, "step": 28500 }, { "epoch": 0.6714238613348406, "grad_norm": 2.0659828186035156, "learning_rate": 6.577174885780227e-05, "loss": 2.0392, "step": 28510 }, { "epoch": 0.6716593660213839, "grad_norm": 2.7772090435028076, "learning_rate": 6.572464792049362e-05, "loss": 1.9277, "step": 28520 }, { "epoch": 0.6718948707079271, "grad_norm": 2.1206893920898438, "learning_rate": 6.567754698318497e-05, "loss": 1.8924, "step": 28530 }, { "epoch": 0.6721303753944704, "grad_norm": 2.0614333152770996, "learning_rate": 6.563044604587632e-05, "loss": 2.1831, "step": 28540 }, { "epoch": 0.6723658800810136, "grad_norm": 1.9979201555252075, "learning_rate": 6.558334510856766e-05, "loss": 1.9522, "step": 28550 }, { "epoch": 0.6726013847675568, "grad_norm": 1.968994379043579, "learning_rate": 6.553624417125901e-05, "loss": 1.9722, "step": 28560 }, { "epoch": 0.6728368894541001, "grad_norm": 1.783308982849121, "learning_rate": 6.548914323395036e-05, "loss": 2.1729, "step": 28570 }, { "epoch": 0.6730723941406433, "grad_norm": 2.494913101196289, "learning_rate": 6.544204229664171e-05, "loss": 2.1268, "step": 28580 }, { "epoch": 0.6733078988271867, "grad_norm": 2.0739006996154785, "learning_rate": 6.539494135933305e-05, "loss": 2.0764, "step": 28590 }, { "epoch": 0.6735434035137299, "grad_norm": 3.6099002361297607, "learning_rate": 6.53478404220244e-05, "loss": 2.0012, "step": 28600 }, { "epoch": 0.6737789082002732, "grad_norm": 2.3188111782073975, "learning_rate": 6.530073948471575e-05, "loss": 2.0252, "step": 28610 }, { "epoch": 0.6740144128868164, "grad_norm": 1.7235386371612549, "learning_rate": 6.525834864113795e-05, "loss": 1.9303, "step": 28620 }, { "epoch": 0.6742499175733597, "grad_norm": 1.7218267917633057, "learning_rate": 6.521124770382931e-05, "loss": 1.9484, "step": 28630 }, { "epoch": 0.6744854222599029, "grad_norm": 2.311770439147949, "learning_rate": 6.516414676652066e-05, "loss": 2.0987, "step": 28640 }, { "epoch": 0.6747209269464463, "grad_norm": 2.6366100311279297, "learning_rate": 6.511704582921201e-05, "loss": 1.9514, "step": 28650 }, { "epoch": 0.6749564316329895, "grad_norm": 2.66902494430542, "learning_rate": 6.506994489190335e-05, "loss": 1.9368, "step": 28660 }, { "epoch": 0.6751919363195328, "grad_norm": 2.354694366455078, "learning_rate": 6.50228439545947e-05, "loss": 1.9255, "step": 28670 }, { "epoch": 0.675427441006076, "grad_norm": 2.0443005561828613, "learning_rate": 6.497574301728605e-05, "loss": 2.0929, "step": 28680 }, { "epoch": 0.6756629456926193, "grad_norm": 1.8448538780212402, "learning_rate": 6.49286420799774e-05, "loss": 2.0847, "step": 28690 }, { "epoch": 0.6758984503791625, "grad_norm": 2.754966974258423, "learning_rate": 6.488154114266874e-05, "loss": 2.1979, "step": 28700 }, { "epoch": 0.6761339550657058, "grad_norm": 2.3212506771087646, "learning_rate": 6.483444020536009e-05, "loss": 2.062, "step": 28710 }, { "epoch": 0.676369459752249, "grad_norm": 2.090217351913452, "learning_rate": 6.478733926805144e-05, "loss": 1.9911, "step": 28720 }, { "epoch": 0.6766049644387924, "grad_norm": 1.9577314853668213, "learning_rate": 6.474023833074279e-05, "loss": 1.9376, "step": 28730 }, { "epoch": 0.6768404691253356, "grad_norm": 1.8566160202026367, "learning_rate": 6.469313739343414e-05, "loss": 2.0811, "step": 28740 }, { "epoch": 0.6770759738118789, "grad_norm": 2.3416032791137695, "learning_rate": 6.464603645612548e-05, "loss": 2.1295, "step": 28750 }, { "epoch": 0.6773114784984221, "grad_norm": 1.6582307815551758, "learning_rate": 6.459893551881683e-05, "loss": 2.0106, "step": 28760 }, { "epoch": 0.6775469831849654, "grad_norm": 2.144157886505127, "learning_rate": 6.455183458150818e-05, "loss": 1.9546, "step": 28770 }, { "epoch": 0.6777824878715086, "grad_norm": 2.9796035289764404, "learning_rate": 6.450473364419953e-05, "loss": 1.9336, "step": 28780 }, { "epoch": 0.678017992558052, "grad_norm": 2.0936977863311768, "learning_rate": 6.445763270689086e-05, "loss": 2.077, "step": 28790 }, { "epoch": 0.6782534972445952, "grad_norm": 1.9568135738372803, "learning_rate": 6.441053176958221e-05, "loss": 2.1443, "step": 28800 }, { "epoch": 0.6784890019311385, "grad_norm": 2.607478618621826, "learning_rate": 6.436343083227356e-05, "loss": 2.139, "step": 28810 }, { "epoch": 0.6787245066176817, "grad_norm": 2.0148494243621826, "learning_rate": 6.431632989496491e-05, "loss": 1.7023, "step": 28820 }, { "epoch": 0.678960011304225, "grad_norm": 2.429159164428711, "learning_rate": 6.426922895765626e-05, "loss": 1.9094, "step": 28830 }, { "epoch": 0.6791955159907682, "grad_norm": 2.1411314010620117, "learning_rate": 6.42221280203476e-05, "loss": 2.1745, "step": 28840 }, { "epoch": 0.6794310206773114, "grad_norm": 2.190335988998413, "learning_rate": 6.417502708303895e-05, "loss": 2.139, "step": 28850 }, { "epoch": 0.6796665253638547, "grad_norm": 2.283425807952881, "learning_rate": 6.41279261457303e-05, "loss": 1.9698, "step": 28860 }, { "epoch": 0.679902030050398, "grad_norm": 2.0648112297058105, "learning_rate": 6.408082520842165e-05, "loss": 2.0706, "step": 28870 }, { "epoch": 0.6801375347369413, "grad_norm": 1.8722620010375977, "learning_rate": 6.403372427111299e-05, "loss": 2.0114, "step": 28880 }, { "epoch": 0.6803730394234845, "grad_norm": 2.4573814868927, "learning_rate": 6.398662333380434e-05, "loss": 1.9978, "step": 28890 }, { "epoch": 0.6806085441100278, "grad_norm": 2.1509108543395996, "learning_rate": 6.393952239649569e-05, "loss": 2.0784, "step": 28900 }, { "epoch": 0.680844048796571, "grad_norm": 2.1195876598358154, "learning_rate": 6.389242145918705e-05, "loss": 2.1025, "step": 28910 }, { "epoch": 0.6810795534831143, "grad_norm": 2.044261932373047, "learning_rate": 6.384532052187839e-05, "loss": 1.786, "step": 28920 }, { "epoch": 0.6813150581696575, "grad_norm": 3.030416250228882, "learning_rate": 6.379821958456974e-05, "loss": 2.1121, "step": 28930 }, { "epoch": 0.6815505628562009, "grad_norm": 2.920388698577881, "learning_rate": 6.375111864726109e-05, "loss": 2.0764, "step": 28940 }, { "epoch": 0.6817860675427441, "grad_norm": 1.9454491138458252, "learning_rate": 6.370401770995244e-05, "loss": 2.0691, "step": 28950 }, { "epoch": 0.6820215722292874, "grad_norm": 1.834728479385376, "learning_rate": 6.365691677264378e-05, "loss": 1.8553, "step": 28960 }, { "epoch": 0.6822570769158306, "grad_norm": 2.1100876331329346, "learning_rate": 6.360981583533513e-05, "loss": 1.8956, "step": 28970 }, { "epoch": 0.6824925816023739, "grad_norm": 1.9620522260665894, "learning_rate": 6.356271489802648e-05, "loss": 1.7718, "step": 28980 }, { "epoch": 0.6827280862889171, "grad_norm": 2.126603126525879, "learning_rate": 6.351561396071783e-05, "loss": 1.9046, "step": 28990 }, { "epoch": 0.6829635909754604, "grad_norm": 2.21046781539917, "learning_rate": 6.346851302340916e-05, "loss": 2.054, "step": 29000 }, { "epoch": 0.6831990956620037, "grad_norm": 2.2389838695526123, "learning_rate": 6.342141208610051e-05, "loss": 2.0818, "step": 29010 }, { "epoch": 0.683434600348547, "grad_norm": 2.375826358795166, "learning_rate": 6.337431114879186e-05, "loss": 1.9759, "step": 29020 }, { "epoch": 0.6836701050350902, "grad_norm": 1.9584667682647705, "learning_rate": 6.332721021148322e-05, "loss": 2.0202, "step": 29030 }, { "epoch": 0.6839056097216335, "grad_norm": 2.5491538047790527, "learning_rate": 6.328010927417457e-05, "loss": 2.0253, "step": 29040 }, { "epoch": 0.6841411144081767, "grad_norm": 1.8970832824707031, "learning_rate": 6.32330083368659e-05, "loss": 1.842, "step": 29050 }, { "epoch": 0.68437661909472, "grad_norm": 2.02691912651062, "learning_rate": 6.318590739955725e-05, "loss": 1.788, "step": 29060 }, { "epoch": 0.6846121237812632, "grad_norm": 1.954071283340454, "learning_rate": 6.31388064622486e-05, "loss": 1.9792, "step": 29070 }, { "epoch": 0.6848476284678066, "grad_norm": 2.6439526081085205, "learning_rate": 6.309170552493995e-05, "loss": 1.8697, "step": 29080 }, { "epoch": 0.6850831331543498, "grad_norm": 2.4627387523651123, "learning_rate": 6.304460458763129e-05, "loss": 2.2861, "step": 29090 }, { "epoch": 0.6853186378408931, "grad_norm": 2.732792615890503, "learning_rate": 6.299750365032264e-05, "loss": 1.9502, "step": 29100 }, { "epoch": 0.6855541425274363, "grad_norm": 2.2963614463806152, "learning_rate": 6.295040271301399e-05, "loss": 2.0442, "step": 29110 }, { "epoch": 0.6857896472139795, "grad_norm": 2.23295259475708, "learning_rate": 6.290330177570534e-05, "loss": 2.2432, "step": 29120 }, { "epoch": 0.6860251519005228, "grad_norm": 2.274610996246338, "learning_rate": 6.285620083839669e-05, "loss": 2.1089, "step": 29130 }, { "epoch": 0.686260656587066, "grad_norm": 2.2117106914520264, "learning_rate": 6.280909990108803e-05, "loss": 2.1116, "step": 29140 }, { "epoch": 0.6864961612736094, "grad_norm": 1.9509594440460205, "learning_rate": 6.276199896377938e-05, "loss": 2.033, "step": 29150 }, { "epoch": 0.6867316659601526, "grad_norm": 2.090939521789551, "learning_rate": 6.271489802647073e-05, "loss": 2.1308, "step": 29160 }, { "epoch": 0.6869671706466959, "grad_norm": 1.9330859184265137, "learning_rate": 6.266779708916208e-05, "loss": 2.0138, "step": 29170 }, { "epoch": 0.6872026753332391, "grad_norm": 2.7607204914093018, "learning_rate": 6.262069615185341e-05, "loss": 1.8859, "step": 29180 }, { "epoch": 0.6874381800197824, "grad_norm": 1.8978123664855957, "learning_rate": 6.257359521454478e-05, "loss": 1.9501, "step": 29190 }, { "epoch": 0.6876736847063256, "grad_norm": 1.9100096225738525, "learning_rate": 6.252649427723613e-05, "loss": 2.1775, "step": 29200 }, { "epoch": 0.6879091893928689, "grad_norm": 2.1695330142974854, "learning_rate": 6.247939333992748e-05, "loss": 1.8777, "step": 29210 }, { "epoch": 0.6881446940794121, "grad_norm": 2.1711602210998535, "learning_rate": 6.243229240261882e-05, "loss": 1.9542, "step": 29220 }, { "epoch": 0.6883801987659555, "grad_norm": 1.9122905731201172, "learning_rate": 6.238519146531017e-05, "loss": 2.0965, "step": 29230 }, { "epoch": 0.6886157034524987, "grad_norm": 2.8225343227386475, "learning_rate": 6.233809052800152e-05, "loss": 1.8979, "step": 29240 }, { "epoch": 0.688851208139042, "grad_norm": 2.511843204498291, "learning_rate": 6.229098959069287e-05, "loss": 1.8626, "step": 29250 }, { "epoch": 0.6890867128255852, "grad_norm": 2.351067066192627, "learning_rate": 6.22438886533842e-05, "loss": 1.9113, "step": 29260 }, { "epoch": 0.6893222175121285, "grad_norm": 2.117947578430176, "learning_rate": 6.219678771607555e-05, "loss": 2.2033, "step": 29270 }, { "epoch": 0.6895577221986717, "grad_norm": 2.907036542892456, "learning_rate": 6.21496867787669e-05, "loss": 1.9599, "step": 29280 }, { "epoch": 0.689793226885215, "grad_norm": 2.5090277194976807, "learning_rate": 6.210258584145825e-05, "loss": 2.0079, "step": 29290 }, { "epoch": 0.6900287315717583, "grad_norm": 2.303168296813965, "learning_rate": 6.205548490414959e-05, "loss": 2.1558, "step": 29300 }, { "epoch": 0.6902642362583016, "grad_norm": 1.9022759199142456, "learning_rate": 6.200838396684094e-05, "loss": 1.9063, "step": 29310 }, { "epoch": 0.6904997409448448, "grad_norm": 2.1879916191101074, "learning_rate": 6.196128302953229e-05, "loss": 1.8982, "step": 29320 }, { "epoch": 0.6907352456313881, "grad_norm": 2.6008474826812744, "learning_rate": 6.191418209222364e-05, "loss": 2.2362, "step": 29330 }, { "epoch": 0.6909707503179313, "grad_norm": 2.61698317527771, "learning_rate": 6.186708115491499e-05, "loss": 2.0725, "step": 29340 }, { "epoch": 0.6912062550044746, "grad_norm": 2.21352219581604, "learning_rate": 6.181998021760633e-05, "loss": 2.0445, "step": 29350 }, { "epoch": 0.6914417596910178, "grad_norm": 2.037073850631714, "learning_rate": 6.177287928029768e-05, "loss": 1.8788, "step": 29360 }, { "epoch": 0.6916772643775612, "grad_norm": 2.1956939697265625, "learning_rate": 6.172577834298903e-05, "loss": 2.0399, "step": 29370 }, { "epoch": 0.6919127690641044, "grad_norm": 2.5270023345947266, "learning_rate": 6.167867740568038e-05, "loss": 2.2344, "step": 29380 }, { "epoch": 0.6921482737506477, "grad_norm": 2.476280689239502, "learning_rate": 6.163157646837172e-05, "loss": 1.9711, "step": 29390 }, { "epoch": 0.6923837784371909, "grad_norm": 1.4749599695205688, "learning_rate": 6.158447553106307e-05, "loss": 2.0054, "step": 29400 }, { "epoch": 0.6926192831237341, "grad_norm": 2.379751443862915, "learning_rate": 6.153737459375442e-05, "loss": 1.9132, "step": 29410 }, { "epoch": 0.6928547878102774, "grad_norm": 2.010408401489258, "learning_rate": 6.149027365644577e-05, "loss": 2.0497, "step": 29420 }, { "epoch": 0.6930902924968206, "grad_norm": 1.9814579486846924, "learning_rate": 6.144317271913712e-05, "loss": 2.0795, "step": 29430 }, { "epoch": 0.693325797183364, "grad_norm": 2.5224850177764893, "learning_rate": 6.139607178182845e-05, "loss": 1.8552, "step": 29440 }, { "epoch": 0.6935613018699072, "grad_norm": 2.3376238346099854, "learning_rate": 6.13489708445198e-05, "loss": 1.7499, "step": 29450 }, { "epoch": 0.6937968065564505, "grad_norm": 2.1768531799316406, "learning_rate": 6.130186990721115e-05, "loss": 2.1749, "step": 29460 }, { "epoch": 0.6940323112429937, "grad_norm": 2.1843111515045166, "learning_rate": 6.12547689699025e-05, "loss": 2.0094, "step": 29470 }, { "epoch": 0.694267815929537, "grad_norm": 2.0782411098480225, "learning_rate": 6.120766803259385e-05, "loss": 2.1589, "step": 29480 }, { "epoch": 0.6945033206160802, "grad_norm": 2.4557056427001953, "learning_rate": 6.11605670952852e-05, "loss": 2.164, "step": 29490 }, { "epoch": 0.6947388253026235, "grad_norm": 2.396660804748535, "learning_rate": 6.111346615797655e-05, "loss": 2.012, "step": 29500 }, { "epoch": 0.6949743299891668, "grad_norm": 2.3271138668060303, "learning_rate": 6.10663652206679e-05, "loss": 1.9762, "step": 29510 }, { "epoch": 0.6952098346757101, "grad_norm": 2.339451789855957, "learning_rate": 6.101926428335925e-05, "loss": 1.7365, "step": 29520 }, { "epoch": 0.6954453393622533, "grad_norm": 1.9744147062301636, "learning_rate": 6.097216334605059e-05, "loss": 2.0507, "step": 29530 }, { "epoch": 0.6956808440487966, "grad_norm": 3.988935708999634, "learning_rate": 6.092506240874194e-05, "loss": 1.9501, "step": 29540 }, { "epoch": 0.6959163487353398, "grad_norm": 2.2221195697784424, "learning_rate": 6.0877961471433286e-05, "loss": 2.123, "step": 29550 }, { "epoch": 0.6961518534218831, "grad_norm": 2.0011703968048096, "learning_rate": 6.0830860534124636e-05, "loss": 1.9586, "step": 29560 }, { "epoch": 0.6963873581084263, "grad_norm": 2.5858304500579834, "learning_rate": 6.078375959681598e-05, "loss": 2.2042, "step": 29570 }, { "epoch": 0.6966228627949697, "grad_norm": 2.5795745849609375, "learning_rate": 6.073665865950733e-05, "loss": 2.159, "step": 29580 }, { "epoch": 0.6968583674815129, "grad_norm": 2.2104289531707764, "learning_rate": 6.068955772219867e-05, "loss": 1.865, "step": 29590 }, { "epoch": 0.6970938721680562, "grad_norm": 1.7108197212219238, "learning_rate": 6.064245678489002e-05, "loss": 2.1158, "step": 29600 }, { "epoch": 0.6973293768545994, "grad_norm": 2.610651731491089, "learning_rate": 6.059535584758137e-05, "loss": 2.0603, "step": 29610 }, { "epoch": 0.6975648815411427, "grad_norm": 1.6342675685882568, "learning_rate": 6.054825491027272e-05, "loss": 1.963, "step": 29620 }, { "epoch": 0.6978003862276859, "grad_norm": 2.5833704471588135, "learning_rate": 6.050115397296407e-05, "loss": 2.1255, "step": 29630 }, { "epoch": 0.6980358909142292, "grad_norm": 2.2657909393310547, "learning_rate": 6.045405303565541e-05, "loss": 1.8765, "step": 29640 }, { "epoch": 0.6982713956007724, "grad_norm": 2.124436378479004, "learning_rate": 6.040695209834676e-05, "loss": 2.2452, "step": 29650 }, { "epoch": 0.6985069002873158, "grad_norm": 2.2420623302459717, "learning_rate": 6.0359851161038104e-05, "loss": 2.1147, "step": 29660 }, { "epoch": 0.698742404973859, "grad_norm": 3.1197521686553955, "learning_rate": 6.0312750223729455e-05, "loss": 2.1481, "step": 29670 }, { "epoch": 0.6989779096604023, "grad_norm": 2.0020577907562256, "learning_rate": 6.02656492864208e-05, "loss": 2.1455, "step": 29680 }, { "epoch": 0.6992134143469455, "grad_norm": 3.46406888961792, "learning_rate": 6.021854834911215e-05, "loss": 1.8329, "step": 29690 }, { "epoch": 0.6994489190334887, "grad_norm": 2.138166904449463, "learning_rate": 6.017144741180349e-05, "loss": 2.2264, "step": 29700 }, { "epoch": 0.699684423720032, "grad_norm": 2.236649751663208, "learning_rate": 6.012434647449484e-05, "loss": 1.9857, "step": 29710 }, { "epoch": 0.6999199284065752, "grad_norm": 2.371490955352783, "learning_rate": 6.0077245537186185e-05, "loss": 2.2641, "step": 29720 }, { "epoch": 0.7001554330931186, "grad_norm": 1.9338253736495972, "learning_rate": 6.0030144599877536e-05, "loss": 1.7444, "step": 29730 }, { "epoch": 0.7003909377796618, "grad_norm": 2.1143953800201416, "learning_rate": 5.9983043662568886e-05, "loss": 2.0956, "step": 29740 }, { "epoch": 0.7006264424662051, "grad_norm": 2.2336223125457764, "learning_rate": 5.993594272526023e-05, "loss": 2.2865, "step": 29750 }, { "epoch": 0.7008619471527483, "grad_norm": 2.3659603595733643, "learning_rate": 5.9888841787951586e-05, "loss": 1.9793, "step": 29760 }, { "epoch": 0.7010974518392916, "grad_norm": 2.853128671646118, "learning_rate": 5.984174085064294e-05, "loss": 2.1234, "step": 29770 }, { "epoch": 0.7013329565258348, "grad_norm": 2.3596596717834473, "learning_rate": 5.979463991333428e-05, "loss": 1.8743, "step": 29780 }, { "epoch": 0.7015684612123781, "grad_norm": 1.9511758089065552, "learning_rate": 5.974753897602563e-05, "loss": 2.1135, "step": 29790 }, { "epoch": 0.7018039658989214, "grad_norm": 2.2908072471618652, "learning_rate": 5.9700438038716974e-05, "loss": 2.2518, "step": 29800 }, { "epoch": 0.7020394705854647, "grad_norm": 2.339449167251587, "learning_rate": 5.9653337101408324e-05, "loss": 2.2305, "step": 29810 }, { "epoch": 0.7022749752720079, "grad_norm": 2.239489793777466, "learning_rate": 5.9606236164099674e-05, "loss": 2.0766, "step": 29820 }, { "epoch": 0.7025104799585512, "grad_norm": 1.768304467201233, "learning_rate": 5.955913522679102e-05, "loss": 2.1229, "step": 29830 }, { "epoch": 0.7027459846450944, "grad_norm": 1.8999934196472168, "learning_rate": 5.951203428948237e-05, "loss": 1.9637, "step": 29840 }, { "epoch": 0.7029814893316377, "grad_norm": 2.1406350135803223, "learning_rate": 5.946493335217371e-05, "loss": 1.8846, "step": 29850 }, { "epoch": 0.7032169940181809, "grad_norm": 2.1771881580352783, "learning_rate": 5.941783241486506e-05, "loss": 1.9483, "step": 29860 }, { "epoch": 0.7034524987047243, "grad_norm": 1.9195053577423096, "learning_rate": 5.9370731477556405e-05, "loss": 1.8552, "step": 29870 }, { "epoch": 0.7036880033912675, "grad_norm": 2.817622423171997, "learning_rate": 5.9323630540247756e-05, "loss": 2.0414, "step": 29880 }, { "epoch": 0.7039235080778108, "grad_norm": 1.7897940874099731, "learning_rate": 5.92765296029391e-05, "loss": 1.8892, "step": 29890 }, { "epoch": 0.704159012764354, "grad_norm": 2.211215019226074, "learning_rate": 5.922942866563045e-05, "loss": 2.002, "step": 29900 }, { "epoch": 0.7043945174508973, "grad_norm": 2.0046420097351074, "learning_rate": 5.918232772832179e-05, "loss": 1.9973, "step": 29910 }, { "epoch": 0.7046300221374405, "grad_norm": 2.126875638961792, "learning_rate": 5.913522679101314e-05, "loss": 2.044, "step": 29920 }, { "epoch": 0.7048655268239838, "grad_norm": 2.909090995788574, "learning_rate": 5.908812585370449e-05, "loss": 1.7947, "step": 29930 }, { "epoch": 0.705101031510527, "grad_norm": 2.0971028804779053, "learning_rate": 5.904102491639584e-05, "loss": 1.9566, "step": 29940 }, { "epoch": 0.7053365361970704, "grad_norm": 2.6434972286224365, "learning_rate": 5.899392397908719e-05, "loss": 2.1791, "step": 29950 }, { "epoch": 0.7055720408836136, "grad_norm": 1.7850927114486694, "learning_rate": 5.894682304177853e-05, "loss": 2.0034, "step": 29960 }, { "epoch": 0.7058075455701568, "grad_norm": 1.7622065544128418, "learning_rate": 5.889972210446988e-05, "loss": 1.9323, "step": 29970 }, { "epoch": 0.7060430502567001, "grad_norm": 2.0412206649780273, "learning_rate": 5.8852621167161224e-05, "loss": 2.0019, "step": 29980 }, { "epoch": 0.7062785549432433, "grad_norm": 1.7954665422439575, "learning_rate": 5.8805520229852574e-05, "loss": 1.9332, "step": 29990 }, { "epoch": 0.7065140596297866, "grad_norm": 4.530249118804932, "learning_rate": 5.875841929254392e-05, "loss": 2.0373, "step": 30000 }, { "epoch": 0.7067495643163298, "grad_norm": 1.6885002851486206, "learning_rate": 5.871131835523527e-05, "loss": 1.9966, "step": 30010 }, { "epoch": 0.7069850690028732, "grad_norm": 2.3572335243225098, "learning_rate": 5.866421741792661e-05, "loss": 1.9012, "step": 30020 }, { "epoch": 0.7072205736894164, "grad_norm": 1.8825509548187256, "learning_rate": 5.861711648061796e-05, "loss": 2.2303, "step": 30030 }, { "epoch": 0.7074560783759597, "grad_norm": 3.2787723541259766, "learning_rate": 5.857001554330932e-05, "loss": 2.2594, "step": 30040 }, { "epoch": 0.7076915830625029, "grad_norm": 2.52473783493042, "learning_rate": 5.852291460600067e-05, "loss": 2.1244, "step": 30050 }, { "epoch": 0.7079270877490462, "grad_norm": 2.1791837215423584, "learning_rate": 5.847581366869201e-05, "loss": 1.973, "step": 30060 }, { "epoch": 0.7081625924355894, "grad_norm": 1.9965521097183228, "learning_rate": 5.842871273138336e-05, "loss": 1.8938, "step": 30070 }, { "epoch": 0.7083980971221328, "grad_norm": 1.438650131225586, "learning_rate": 5.8381611794074706e-05, "loss": 1.9799, "step": 30080 }, { "epoch": 0.708633601808676, "grad_norm": 1.926436185836792, "learning_rate": 5.8334510856766056e-05, "loss": 2.0451, "step": 30090 }, { "epoch": 0.7088691064952193, "grad_norm": 1.62642240524292, "learning_rate": 5.82874099194574e-05, "loss": 2.0659, "step": 30100 }, { "epoch": 0.7091046111817625, "grad_norm": 2.5094661712646484, "learning_rate": 5.824030898214875e-05, "loss": 1.9782, "step": 30110 }, { "epoch": 0.7093401158683058, "grad_norm": 1.6265511512756348, "learning_rate": 5.81932080448401e-05, "loss": 1.9198, "step": 30120 }, { "epoch": 0.709575620554849, "grad_norm": 2.2201929092407227, "learning_rate": 5.8146107107531444e-05, "loss": 1.962, "step": 30130 }, { "epoch": 0.7098111252413923, "grad_norm": 2.028233051300049, "learning_rate": 5.8099006170222794e-05, "loss": 2.0659, "step": 30140 }, { "epoch": 0.7100466299279355, "grad_norm": 2.96724796295166, "learning_rate": 5.805190523291414e-05, "loss": 2.0536, "step": 30150 }, { "epoch": 0.7102821346144789, "grad_norm": 2.110179901123047, "learning_rate": 5.800480429560549e-05, "loss": 2.0094, "step": 30160 }, { "epoch": 0.7105176393010221, "grad_norm": 2.6137187480926514, "learning_rate": 5.795770335829683e-05, "loss": 1.9766, "step": 30170 }, { "epoch": 0.7107531439875654, "grad_norm": 2.006112813949585, "learning_rate": 5.791060242098818e-05, "loss": 2.0879, "step": 30180 }, { "epoch": 0.7109886486741086, "grad_norm": 2.337247848510742, "learning_rate": 5.7863501483679525e-05, "loss": 2.038, "step": 30190 }, { "epoch": 0.7112241533606519, "grad_norm": 1.9541200399398804, "learning_rate": 5.7816400546370875e-05, "loss": 1.9604, "step": 30200 }, { "epoch": 0.7114596580471951, "grad_norm": 1.8182893991470337, "learning_rate": 5.776929960906222e-05, "loss": 1.8738, "step": 30210 }, { "epoch": 0.7116951627337385, "grad_norm": 2.3723807334899902, "learning_rate": 5.772219867175357e-05, "loss": 2.1882, "step": 30220 }, { "epoch": 0.7119306674202817, "grad_norm": 2.4405813217163086, "learning_rate": 5.767509773444492e-05, "loss": 1.9004, "step": 30230 }, { "epoch": 0.712166172106825, "grad_norm": 2.4666452407836914, "learning_rate": 5.762799679713626e-05, "loss": 2.0491, "step": 30240 }, { "epoch": 0.7124016767933682, "grad_norm": 2.4087648391723633, "learning_rate": 5.758089585982761e-05, "loss": 2.1029, "step": 30250 }, { "epoch": 0.7126371814799114, "grad_norm": 2.2330007553100586, "learning_rate": 5.7533794922518956e-05, "loss": 1.9501, "step": 30260 }, { "epoch": 0.7128726861664547, "grad_norm": 2.045724868774414, "learning_rate": 5.748669398521031e-05, "loss": 2.0743, "step": 30270 }, { "epoch": 0.7131081908529979, "grad_norm": 2.37623929977417, "learning_rate": 5.743959304790165e-05, "loss": 2.086, "step": 30280 }, { "epoch": 0.7133436955395412, "grad_norm": 2.594832181930542, "learning_rate": 5.7392492110593e-05, "loss": 2.0116, "step": 30290 }, { "epoch": 0.7135792002260845, "grad_norm": 2.3839049339294434, "learning_rate": 5.7345391173284344e-05, "loss": 2.1416, "step": 30300 }, { "epoch": 0.7138147049126278, "grad_norm": 2.3938348293304443, "learning_rate": 5.7298290235975694e-05, "loss": 2.15, "step": 30310 }, { "epoch": 0.714050209599171, "grad_norm": 1.7177648544311523, "learning_rate": 5.725118929866705e-05, "loss": 1.8613, "step": 30320 }, { "epoch": 0.7142857142857143, "grad_norm": 2.2762975692749023, "learning_rate": 5.72040883613584e-05, "loss": 1.8133, "step": 30330 }, { "epoch": 0.7145212189722575, "grad_norm": 2.3679895401000977, "learning_rate": 5.7156987424049745e-05, "loss": 1.9782, "step": 30340 }, { "epoch": 0.7147567236588008, "grad_norm": 2.099062204360962, "learning_rate": 5.7109886486741095e-05, "loss": 2.065, "step": 30350 }, { "epoch": 0.714992228345344, "grad_norm": 1.8509316444396973, "learning_rate": 5.706278554943244e-05, "loss": 2.0064, "step": 30360 }, { "epoch": 0.7152277330318874, "grad_norm": 2.6055731773376465, "learning_rate": 5.701568461212379e-05, "loss": 2.21, "step": 30370 }, { "epoch": 0.7154632377184306, "grad_norm": 2.160001754760742, "learning_rate": 5.696858367481513e-05, "loss": 2.0922, "step": 30380 }, { "epoch": 0.7156987424049739, "grad_norm": 2.1145455837249756, "learning_rate": 5.692148273750648e-05, "loss": 2.0778, "step": 30390 }, { "epoch": 0.7159342470915171, "grad_norm": 2.1301040649414062, "learning_rate": 5.6874381800197826e-05, "loss": 1.9435, "step": 30400 }, { "epoch": 0.7161697517780604, "grad_norm": 2.1226649284362793, "learning_rate": 5.6827280862889176e-05, "loss": 2.0785, "step": 30410 }, { "epoch": 0.7164052564646036, "grad_norm": 2.950019359588623, "learning_rate": 5.6780179925580526e-05, "loss": 1.7644, "step": 30420 }, { "epoch": 0.7166407611511469, "grad_norm": 2.4695847034454346, "learning_rate": 5.673307898827187e-05, "loss": 2.2652, "step": 30430 }, { "epoch": 0.7168762658376902, "grad_norm": 2.0165791511535645, "learning_rate": 5.668597805096322e-05, "loss": 1.9458, "step": 30440 }, { "epoch": 0.7171117705242335, "grad_norm": 2.2012386322021484, "learning_rate": 5.6638877113654564e-05, "loss": 1.9355, "step": 30450 }, { "epoch": 0.7173472752107767, "grad_norm": 2.5340282917022705, "learning_rate": 5.6591776176345914e-05, "loss": 2.1552, "step": 30460 }, { "epoch": 0.71758277989732, "grad_norm": 2.569276809692383, "learning_rate": 5.654467523903726e-05, "loss": 1.962, "step": 30470 }, { "epoch": 0.7178182845838632, "grad_norm": 2.128749132156372, "learning_rate": 5.649757430172861e-05, "loss": 2.036, "step": 30480 }, { "epoch": 0.7180537892704065, "grad_norm": 2.065203905105591, "learning_rate": 5.645047336441995e-05, "loss": 2.0853, "step": 30490 }, { "epoch": 0.7182892939569497, "grad_norm": 2.0871028900146484, "learning_rate": 5.64033724271113e-05, "loss": 2.2986, "step": 30500 }, { "epoch": 0.7185247986434931, "grad_norm": 1.4431718587875366, "learning_rate": 5.6356271489802645e-05, "loss": 1.9797, "step": 30510 }, { "epoch": 0.7187603033300363, "grad_norm": 2.06838321685791, "learning_rate": 5.6309170552493995e-05, "loss": 1.945, "step": 30520 }, { "epoch": 0.7189958080165796, "grad_norm": 1.6888794898986816, "learning_rate": 5.6262069615185345e-05, "loss": 1.9909, "step": 30530 }, { "epoch": 0.7192313127031228, "grad_norm": 2.028390884399414, "learning_rate": 5.621496867787669e-05, "loss": 1.8028, "step": 30540 }, { "epoch": 0.719466817389666, "grad_norm": 2.6491353511810303, "learning_rate": 5.616786774056804e-05, "loss": 2.1648, "step": 30550 }, { "epoch": 0.7197023220762093, "grad_norm": 2.0216126441955566, "learning_rate": 5.612076680325938e-05, "loss": 1.9735, "step": 30560 }, { "epoch": 0.7199378267627525, "grad_norm": 3.1546339988708496, "learning_rate": 5.607366586595073e-05, "loss": 2.1136, "step": 30570 }, { "epoch": 0.7201733314492959, "grad_norm": 2.0467398166656494, "learning_rate": 5.6026564928642076e-05, "loss": 1.8179, "step": 30580 }, { "epoch": 0.7204088361358391, "grad_norm": 1.9955779314041138, "learning_rate": 5.5979463991333426e-05, "loss": 1.9321, "step": 30590 }, { "epoch": 0.7206443408223824, "grad_norm": 2.4097037315368652, "learning_rate": 5.593236305402477e-05, "loss": 2.0949, "step": 30600 }, { "epoch": 0.7208798455089256, "grad_norm": 2.0927698612213135, "learning_rate": 5.5885262116716134e-05, "loss": 1.9115, "step": 30610 }, { "epoch": 0.7211153501954689, "grad_norm": 2.086876630783081, "learning_rate": 5.583816117940748e-05, "loss": 2.3412, "step": 30620 }, { "epoch": 0.7213508548820121, "grad_norm": 1.9654732942581177, "learning_rate": 5.579106024209883e-05, "loss": 2.1572, "step": 30630 }, { "epoch": 0.7215863595685554, "grad_norm": 1.9568740129470825, "learning_rate": 5.574395930479017e-05, "loss": 1.8459, "step": 30640 }, { "epoch": 0.7218218642550986, "grad_norm": 1.8348342180252075, "learning_rate": 5.569685836748152e-05, "loss": 1.8352, "step": 30650 }, { "epoch": 0.722057368941642, "grad_norm": 2.3031091690063477, "learning_rate": 5.5649757430172865e-05, "loss": 1.9543, "step": 30660 }, { "epoch": 0.7222928736281852, "grad_norm": 2.4159276485443115, "learning_rate": 5.5602656492864215e-05, "loss": 2.1472, "step": 30670 }, { "epoch": 0.7225283783147285, "grad_norm": 2.4604334831237793, "learning_rate": 5.555555555555556e-05, "loss": 2.1093, "step": 30680 }, { "epoch": 0.7227638830012717, "grad_norm": 2.7177553176879883, "learning_rate": 5.550845461824691e-05, "loss": 2.0667, "step": 30690 }, { "epoch": 0.722999387687815, "grad_norm": 2.469963788986206, "learning_rate": 5.546135368093825e-05, "loss": 2.0233, "step": 30700 }, { "epoch": 0.7232348923743582, "grad_norm": 2.972891330718994, "learning_rate": 5.54142527436296e-05, "loss": 1.9273, "step": 30710 }, { "epoch": 0.7234703970609015, "grad_norm": 2.8329389095306396, "learning_rate": 5.536715180632095e-05, "loss": 2.0522, "step": 30720 }, { "epoch": 0.7237059017474448, "grad_norm": 2.5914034843444824, "learning_rate": 5.5320050869012296e-05, "loss": 1.9087, "step": 30730 }, { "epoch": 0.7239414064339881, "grad_norm": 2.375659227371216, "learning_rate": 5.5272949931703646e-05, "loss": 1.9656, "step": 30740 }, { "epoch": 0.7241769111205313, "grad_norm": 2.802253246307373, "learning_rate": 5.522584899439499e-05, "loss": 1.9603, "step": 30750 }, { "epoch": 0.7244124158070746, "grad_norm": 2.0423829555511475, "learning_rate": 5.517874805708634e-05, "loss": 2.0772, "step": 30760 }, { "epoch": 0.7246479204936178, "grad_norm": 2.187680959701538, "learning_rate": 5.513164711977768e-05, "loss": 1.9225, "step": 30770 }, { "epoch": 0.7248834251801611, "grad_norm": 2.442068576812744, "learning_rate": 5.5084546182469034e-05, "loss": 1.9933, "step": 30780 }, { "epoch": 0.7251189298667043, "grad_norm": 2.3445839881896973, "learning_rate": 5.503744524516038e-05, "loss": 2.2684, "step": 30790 }, { "epoch": 0.7253544345532477, "grad_norm": 2.0452582836151123, "learning_rate": 5.499034430785173e-05, "loss": 2.1643, "step": 30800 }, { "epoch": 0.7255899392397909, "grad_norm": 2.025725841522217, "learning_rate": 5.494324337054307e-05, "loss": 1.9175, "step": 30810 }, { "epoch": 0.7258254439263341, "grad_norm": 2.7299511432647705, "learning_rate": 5.489614243323442e-05, "loss": 2.0061, "step": 30820 }, { "epoch": 0.7260609486128774, "grad_norm": 2.5968010425567627, "learning_rate": 5.484904149592577e-05, "loss": 1.905, "step": 30830 }, { "epoch": 0.7262964532994206, "grad_norm": 2.217876434326172, "learning_rate": 5.4801940558617115e-05, "loss": 2.0114, "step": 30840 }, { "epoch": 0.7265319579859639, "grad_norm": 3.317579984664917, "learning_rate": 5.4754839621308465e-05, "loss": 1.9557, "step": 30850 }, { "epoch": 0.7267674626725071, "grad_norm": 3.0695741176605225, "learning_rate": 5.470773868399981e-05, "loss": 2.0315, "step": 30860 }, { "epoch": 0.7270029673590505, "grad_norm": 2.6726536750793457, "learning_rate": 5.466063774669116e-05, "loss": 1.9777, "step": 30870 }, { "epoch": 0.7272384720455937, "grad_norm": 2.8172085285186768, "learning_rate": 5.46135368093825e-05, "loss": 2.044, "step": 30880 }, { "epoch": 0.727473976732137, "grad_norm": 2.0183653831481934, "learning_rate": 5.456643587207386e-05, "loss": 1.9781, "step": 30890 }, { "epoch": 0.7277094814186802, "grad_norm": 2.0858523845672607, "learning_rate": 5.451933493476521e-05, "loss": 1.7131, "step": 30900 }, { "epoch": 0.7279449861052235, "grad_norm": 2.115558624267578, "learning_rate": 5.447223399745656e-05, "loss": 1.992, "step": 30910 }, { "epoch": 0.7281804907917667, "grad_norm": 1.972657322883606, "learning_rate": 5.44251330601479e-05, "loss": 1.8837, "step": 30920 }, { "epoch": 0.72841599547831, "grad_norm": 1.986677885055542, "learning_rate": 5.437803212283925e-05, "loss": 2.0074, "step": 30930 }, { "epoch": 0.7286515001648532, "grad_norm": 2.1113574504852295, "learning_rate": 5.43309311855306e-05, "loss": 2.0117, "step": 30940 }, { "epoch": 0.7288870048513966, "grad_norm": 2.277940034866333, "learning_rate": 5.428383024822195e-05, "loss": 2.0174, "step": 30950 }, { "epoch": 0.7291225095379398, "grad_norm": 2.1765830516815186, "learning_rate": 5.423672931091329e-05, "loss": 2.0942, "step": 30960 }, { "epoch": 0.7293580142244831, "grad_norm": 2.458887815475464, "learning_rate": 5.418962837360464e-05, "loss": 2.0839, "step": 30970 }, { "epoch": 0.7295935189110263, "grad_norm": 2.4584996700286865, "learning_rate": 5.4142527436295984e-05, "loss": 1.8682, "step": 30980 }, { "epoch": 0.7298290235975696, "grad_norm": 2.042698860168457, "learning_rate": 5.4095426498987334e-05, "loss": 1.9744, "step": 30990 }, { "epoch": 0.7300645282841128, "grad_norm": 2.2433009147644043, "learning_rate": 5.404832556167868e-05, "loss": 2.0678, "step": 31000 }, { "epoch": 0.7303000329706562, "grad_norm": 3.455995559692383, "learning_rate": 5.400122462437003e-05, "loss": 2.0532, "step": 31010 }, { "epoch": 0.7305355376571994, "grad_norm": 3.3080856800079346, "learning_rate": 5.395412368706138e-05, "loss": 2.1967, "step": 31020 }, { "epoch": 0.7307710423437427, "grad_norm": 1.755066990852356, "learning_rate": 5.390702274975272e-05, "loss": 1.9065, "step": 31030 }, { "epoch": 0.7310065470302859, "grad_norm": 2.1286559104919434, "learning_rate": 5.385992181244407e-05, "loss": 1.8758, "step": 31040 }, { "epoch": 0.7312420517168292, "grad_norm": 1.8122589588165283, "learning_rate": 5.3812820875135416e-05, "loss": 1.9088, "step": 31050 }, { "epoch": 0.7314775564033724, "grad_norm": 2.3942129611968994, "learning_rate": 5.3765719937826766e-05, "loss": 1.9794, "step": 31060 }, { "epoch": 0.7317130610899157, "grad_norm": 2.5759265422821045, "learning_rate": 5.371861900051811e-05, "loss": 2.1059, "step": 31070 }, { "epoch": 0.731948565776459, "grad_norm": 2.152968406677246, "learning_rate": 5.367151806320946e-05, "loss": 2.1384, "step": 31080 }, { "epoch": 0.7321840704630023, "grad_norm": 1.7909654378890991, "learning_rate": 5.36244171259008e-05, "loss": 2.1023, "step": 31090 }, { "epoch": 0.7324195751495455, "grad_norm": 2.8516340255737305, "learning_rate": 5.357731618859215e-05, "loss": 2.0101, "step": 31100 }, { "epoch": 0.7326550798360887, "grad_norm": 3.418398857116699, "learning_rate": 5.35302152512835e-05, "loss": 2.0228, "step": 31110 }, { "epoch": 0.732890584522632, "grad_norm": 1.9390182495117188, "learning_rate": 5.348311431397485e-05, "loss": 1.8994, "step": 31120 }, { "epoch": 0.7331260892091752, "grad_norm": 2.929248809814453, "learning_rate": 5.34360133766662e-05, "loss": 2.017, "step": 31130 }, { "epoch": 0.7333615938957185, "grad_norm": 2.084099531173706, "learning_rate": 5.338891243935754e-05, "loss": 2.0448, "step": 31140 }, { "epoch": 0.7335970985822617, "grad_norm": 1.9358183145523071, "learning_rate": 5.334181150204889e-05, "loss": 1.9732, "step": 31150 }, { "epoch": 0.7338326032688051, "grad_norm": 1.9566363096237183, "learning_rate": 5.3294710564740234e-05, "loss": 2.023, "step": 31160 }, { "epoch": 0.7340681079553483, "grad_norm": 2.517813205718994, "learning_rate": 5.324760962743159e-05, "loss": 1.9894, "step": 31170 }, { "epoch": 0.7343036126418916, "grad_norm": 1.871909737586975, "learning_rate": 5.320050869012294e-05, "loss": 2.1048, "step": 31180 }, { "epoch": 0.7345391173284348, "grad_norm": 1.6476243734359741, "learning_rate": 5.3153407752814285e-05, "loss": 1.8816, "step": 31190 }, { "epoch": 0.7347746220149781, "grad_norm": 2.7056660652160645, "learning_rate": 5.3106306815505635e-05, "loss": 1.8774, "step": 31200 }, { "epoch": 0.7350101267015213, "grad_norm": 2.0394725799560547, "learning_rate": 5.3059205878196986e-05, "loss": 1.9307, "step": 31210 }, { "epoch": 0.7352456313880646, "grad_norm": 2.695197105407715, "learning_rate": 5.301210494088833e-05, "loss": 1.842, "step": 31220 }, { "epoch": 0.7354811360746079, "grad_norm": 2.9238481521606445, "learning_rate": 5.296500400357968e-05, "loss": 2.1305, "step": 31230 }, { "epoch": 0.7357166407611512, "grad_norm": 2.0291872024536133, "learning_rate": 5.291790306627102e-05, "loss": 1.9462, "step": 31240 }, { "epoch": 0.7359521454476944, "grad_norm": 2.4080393314361572, "learning_rate": 5.287080212896237e-05, "loss": 2.1098, "step": 31250 }, { "epoch": 0.7361876501342377, "grad_norm": 2.3340492248535156, "learning_rate": 5.2823701191653717e-05, "loss": 2.135, "step": 31260 }, { "epoch": 0.7364231548207809, "grad_norm": 2.0928733348846436, "learning_rate": 5.277660025434507e-05, "loss": 2.1051, "step": 31270 }, { "epoch": 0.7366586595073242, "grad_norm": 2.603736162185669, "learning_rate": 5.272949931703641e-05, "loss": 2.2034, "step": 31280 }, { "epoch": 0.7368941641938674, "grad_norm": 3.005927324295044, "learning_rate": 5.268239837972776e-05, "loss": 2.0459, "step": 31290 }, { "epoch": 0.7371296688804108, "grad_norm": 2.0738449096679688, "learning_rate": 5.2635297442419104e-05, "loss": 2.1972, "step": 31300 }, { "epoch": 0.737365173566954, "grad_norm": 2.542039394378662, "learning_rate": 5.2588196505110454e-05, "loss": 1.8309, "step": 31310 }, { "epoch": 0.7376006782534973, "grad_norm": 2.0776071548461914, "learning_rate": 5.2541095567801804e-05, "loss": 2.2318, "step": 31320 }, { "epoch": 0.7378361829400405, "grad_norm": 2.2851767539978027, "learning_rate": 5.249399463049315e-05, "loss": 2.1569, "step": 31330 }, { "epoch": 0.7380716876265838, "grad_norm": 2.047200918197632, "learning_rate": 5.24468936931845e-05, "loss": 1.9189, "step": 31340 }, { "epoch": 0.738307192313127, "grad_norm": 2.037782669067383, "learning_rate": 5.239979275587584e-05, "loss": 2.0439, "step": 31350 }, { "epoch": 0.7385426969996703, "grad_norm": 2.144557476043701, "learning_rate": 5.235269181856719e-05, "loss": 1.9919, "step": 31360 }, { "epoch": 0.7387782016862136, "grad_norm": 1.9935516119003296, "learning_rate": 5.2305590881258535e-05, "loss": 1.9855, "step": 31370 }, { "epoch": 0.7390137063727569, "grad_norm": 2.8330445289611816, "learning_rate": 5.2258489943949886e-05, "loss": 2.1518, "step": 31380 }, { "epoch": 0.7392492110593001, "grad_norm": 2.2874932289123535, "learning_rate": 5.221138900664123e-05, "loss": 2.0948, "step": 31390 }, { "epoch": 0.7394847157458433, "grad_norm": 2.6140425205230713, "learning_rate": 5.216428806933258e-05, "loss": 2.0383, "step": 31400 }, { "epoch": 0.7397202204323866, "grad_norm": 2.7529566287994385, "learning_rate": 5.211718713202392e-05, "loss": 2.0262, "step": 31410 }, { "epoch": 0.7399557251189298, "grad_norm": 1.8774211406707764, "learning_rate": 5.207008619471527e-05, "loss": 1.7626, "step": 31420 }, { "epoch": 0.7401912298054731, "grad_norm": 2.481081008911133, "learning_rate": 5.202298525740662e-05, "loss": 2.2189, "step": 31430 }, { "epoch": 0.7404267344920163, "grad_norm": 3.3495407104492188, "learning_rate": 5.197588432009797e-05, "loss": 2.0925, "step": 31440 }, { "epoch": 0.7406622391785597, "grad_norm": 2.11091685295105, "learning_rate": 5.1928783382789324e-05, "loss": 2.1724, "step": 31450 }, { "epoch": 0.7408977438651029, "grad_norm": 2.0129551887512207, "learning_rate": 5.1881682445480674e-05, "loss": 2.1202, "step": 31460 }, { "epoch": 0.7411332485516462, "grad_norm": 1.9652073383331299, "learning_rate": 5.183458150817202e-05, "loss": 1.9064, "step": 31470 }, { "epoch": 0.7413687532381894, "grad_norm": 2.4702093601226807, "learning_rate": 5.178748057086337e-05, "loss": 2.042, "step": 31480 }, { "epoch": 0.7416042579247327, "grad_norm": 1.9683281183242798, "learning_rate": 5.174037963355471e-05, "loss": 1.8423, "step": 31490 }, { "epoch": 0.7418397626112759, "grad_norm": 1.8819502592086792, "learning_rate": 5.169327869624606e-05, "loss": 1.9363, "step": 31500 }, { "epoch": 0.7420752672978193, "grad_norm": 2.266874074935913, "learning_rate": 5.164617775893741e-05, "loss": 1.9024, "step": 31510 }, { "epoch": 0.7423107719843625, "grad_norm": 2.113044023513794, "learning_rate": 5.1599076821628755e-05, "loss": 1.9809, "step": 31520 }, { "epoch": 0.7425462766709058, "grad_norm": 2.2748920917510986, "learning_rate": 5.1551975884320105e-05, "loss": 2.0686, "step": 31530 }, { "epoch": 0.742781781357449, "grad_norm": 2.078479051589966, "learning_rate": 5.150487494701145e-05, "loss": 2.1953, "step": 31540 }, { "epoch": 0.7430172860439923, "grad_norm": 2.49847412109375, "learning_rate": 5.14577740097028e-05, "loss": 2.0343, "step": 31550 }, { "epoch": 0.7432527907305355, "grad_norm": 2.6483688354492188, "learning_rate": 5.141067307239414e-05, "loss": 1.9767, "step": 31560 }, { "epoch": 0.7434882954170788, "grad_norm": 2.081569194793701, "learning_rate": 5.136357213508549e-05, "loss": 2.0236, "step": 31570 }, { "epoch": 0.743723800103622, "grad_norm": 2.007598876953125, "learning_rate": 5.1316471197776836e-05, "loss": 2.079, "step": 31580 }, { "epoch": 0.7439593047901654, "grad_norm": 2.3295485973358154, "learning_rate": 5.1269370260468186e-05, "loss": 1.9723, "step": 31590 }, { "epoch": 0.7441948094767086, "grad_norm": 2.001807689666748, "learning_rate": 5.122226932315953e-05, "loss": 2.0025, "step": 31600 }, { "epoch": 0.7444303141632519, "grad_norm": 2.1212780475616455, "learning_rate": 5.117516838585088e-05, "loss": 2.1097, "step": 31610 }, { "epoch": 0.7446658188497951, "grad_norm": 1.8547629117965698, "learning_rate": 5.112806744854223e-05, "loss": 1.9761, "step": 31620 }, { "epoch": 0.7449013235363384, "grad_norm": 1.8750020265579224, "learning_rate": 5.1080966511233574e-05, "loss": 2.0775, "step": 31630 }, { "epoch": 0.7451368282228816, "grad_norm": 2.390493869781494, "learning_rate": 5.1033865573924924e-05, "loss": 2.0353, "step": 31640 }, { "epoch": 0.745372332909425, "grad_norm": 1.8279821872711182, "learning_rate": 5.098676463661627e-05, "loss": 1.958, "step": 31650 }, { "epoch": 0.7456078375959682, "grad_norm": 2.5792455673217773, "learning_rate": 5.093966369930762e-05, "loss": 2.2457, "step": 31660 }, { "epoch": 0.7458433422825114, "grad_norm": 2.464423894882202, "learning_rate": 5.089256276199896e-05, "loss": 1.8873, "step": 31670 }, { "epoch": 0.7460788469690547, "grad_norm": 1.911726951599121, "learning_rate": 5.084546182469031e-05, "loss": 2.0966, "step": 31680 }, { "epoch": 0.7463143516555979, "grad_norm": 2.0924248695373535, "learning_rate": 5.0798360887381655e-05, "loss": 2.1157, "step": 31690 }, { "epoch": 0.7465498563421412, "grad_norm": 2.2013473510742188, "learning_rate": 5.0751259950073005e-05, "loss": 2.05, "step": 31700 }, { "epoch": 0.7467853610286844, "grad_norm": 2.036799430847168, "learning_rate": 5.070415901276435e-05, "loss": 2.1727, "step": 31710 }, { "epoch": 0.7470208657152277, "grad_norm": 1.8770381212234497, "learning_rate": 5.06570580754557e-05, "loss": 2.0502, "step": 31720 }, { "epoch": 0.747256370401771, "grad_norm": 1.9372336864471436, "learning_rate": 5.060995713814705e-05, "loss": 2.1371, "step": 31730 }, { "epoch": 0.7474918750883143, "grad_norm": 2.189121961593628, "learning_rate": 5.0562856200838406e-05, "loss": 2.0499, "step": 31740 }, { "epoch": 0.7477273797748575, "grad_norm": 2.1549293994903564, "learning_rate": 5.051575526352975e-05, "loss": 1.9287, "step": 31750 }, { "epoch": 0.7479628844614008, "grad_norm": 2.0482614040374756, "learning_rate": 5.04686543262211e-05, "loss": 2.0564, "step": 31760 }, { "epoch": 0.748198389147944, "grad_norm": 2.111997604370117, "learning_rate": 5.0421553388912443e-05, "loss": 2.176, "step": 31770 }, { "epoch": 0.7484338938344873, "grad_norm": 2.4631214141845703, "learning_rate": 5.0374452451603794e-05, "loss": 1.9805, "step": 31780 }, { "epoch": 0.7486693985210305, "grad_norm": 3.002066135406494, "learning_rate": 5.032735151429514e-05, "loss": 2.091, "step": 31790 }, { "epoch": 0.7489049032075739, "grad_norm": 2.605787992477417, "learning_rate": 5.028025057698649e-05, "loss": 1.8293, "step": 31800 }, { "epoch": 0.7491404078941171, "grad_norm": 1.7942407131195068, "learning_rate": 5.023314963967784e-05, "loss": 2.0559, "step": 31810 }, { "epoch": 0.7493759125806604, "grad_norm": 2.3381400108337402, "learning_rate": 5.018604870236918e-05, "loss": 2.1534, "step": 31820 }, { "epoch": 0.7496114172672036, "grad_norm": 2.1282408237457275, "learning_rate": 5.013894776506053e-05, "loss": 1.6971, "step": 31830 }, { "epoch": 0.7498469219537469, "grad_norm": 2.5377397537231445, "learning_rate": 5.0091846827751875e-05, "loss": 2.1498, "step": 31840 }, { "epoch": 0.7500824266402901, "grad_norm": 2.1873764991760254, "learning_rate": 5.0044745890443225e-05, "loss": 1.9524, "step": 31850 }, { "epoch": 0.7503179313268334, "grad_norm": 1.9060252904891968, "learning_rate": 4.999764495313457e-05, "loss": 1.9569, "step": 31860 }, { "epoch": 0.7505534360133767, "grad_norm": 1.914854645729065, "learning_rate": 4.995054401582592e-05, "loss": 1.9475, "step": 31870 }, { "epoch": 0.75078894069992, "grad_norm": 3.2591161727905273, "learning_rate": 4.990344307851726e-05, "loss": 2.0404, "step": 31880 }, { "epoch": 0.7510244453864632, "grad_norm": 1.9363534450531006, "learning_rate": 4.985634214120861e-05, "loss": 1.9256, "step": 31890 }, { "epoch": 0.7512599500730065, "grad_norm": 2.1461710929870605, "learning_rate": 4.9809241203899956e-05, "loss": 1.9352, "step": 31900 }, { "epoch": 0.7514954547595497, "grad_norm": 2.079094648361206, "learning_rate": 4.9762140266591306e-05, "loss": 1.9824, "step": 31910 }, { "epoch": 0.751730959446093, "grad_norm": 3.0540053844451904, "learning_rate": 4.9715039329282656e-05, "loss": 2.2878, "step": 31920 }, { "epoch": 0.7519664641326362, "grad_norm": 2.7210094928741455, "learning_rate": 4.9667938391974e-05, "loss": 1.8646, "step": 31930 }, { "epoch": 0.7522019688191796, "grad_norm": 2.256350517272949, "learning_rate": 4.962083745466535e-05, "loss": 1.6932, "step": 31940 }, { "epoch": 0.7524374735057228, "grad_norm": 1.946227788925171, "learning_rate": 4.95737365173567e-05, "loss": 2.0174, "step": 31950 }, { "epoch": 0.752672978192266, "grad_norm": 2.2353408336639404, "learning_rate": 4.952663558004805e-05, "loss": 2.0512, "step": 31960 }, { "epoch": 0.7529084828788093, "grad_norm": 2.020702362060547, "learning_rate": 4.9479534642739394e-05, "loss": 2.1851, "step": 31970 }, { "epoch": 0.7531439875653525, "grad_norm": 2.0567731857299805, "learning_rate": 4.9432433705430744e-05, "loss": 2.0855, "step": 31980 }, { "epoch": 0.7533794922518958, "grad_norm": 2.680443525314331, "learning_rate": 4.938533276812209e-05, "loss": 2.0761, "step": 31990 }, { "epoch": 0.753614996938439, "grad_norm": 2.444499969482422, "learning_rate": 4.933823183081344e-05, "loss": 1.9018, "step": 32000 }, { "epoch": 0.7538505016249823, "grad_norm": 2.7612671852111816, "learning_rate": 4.929113089350478e-05, "loss": 1.9357, "step": 32010 }, { "epoch": 0.7540860063115256, "grad_norm": 2.018686532974243, "learning_rate": 4.924402995619613e-05, "loss": 1.9346, "step": 32020 }, { "epoch": 0.7543215109980689, "grad_norm": 2.3052027225494385, "learning_rate": 4.9196929018887475e-05, "loss": 1.9173, "step": 32030 }, { "epoch": 0.7545570156846121, "grad_norm": 2.0256094932556152, "learning_rate": 4.9149828081578826e-05, "loss": 1.9119, "step": 32040 }, { "epoch": 0.7547925203711554, "grad_norm": 2.7691421508789062, "learning_rate": 4.910272714427017e-05, "loss": 1.9702, "step": 32050 }, { "epoch": 0.7550280250576986, "grad_norm": 2.235715866088867, "learning_rate": 4.905562620696152e-05, "loss": 1.9636, "step": 32060 }, { "epoch": 0.7552635297442419, "grad_norm": 2.506202459335327, "learning_rate": 4.900852526965287e-05, "loss": 1.9745, "step": 32070 }, { "epoch": 0.7554990344307851, "grad_norm": 2.220933198928833, "learning_rate": 4.896142433234421e-05, "loss": 2.2018, "step": 32080 }, { "epoch": 0.7557345391173285, "grad_norm": 2.0806100368499756, "learning_rate": 4.891432339503556e-05, "loss": 2.0028, "step": 32090 }, { "epoch": 0.7559700438038717, "grad_norm": 2.027374744415283, "learning_rate": 4.8867222457726913e-05, "loss": 2.0422, "step": 32100 }, { "epoch": 0.756205548490415, "grad_norm": 2.054565191268921, "learning_rate": 4.8820121520418264e-05, "loss": 1.9676, "step": 32110 }, { "epoch": 0.7564410531769582, "grad_norm": 2.5844099521636963, "learning_rate": 4.877302058310961e-05, "loss": 2.1372, "step": 32120 }, { "epoch": 0.7566765578635015, "grad_norm": 2.344665765762329, "learning_rate": 4.872591964580096e-05, "loss": 2.2227, "step": 32130 }, { "epoch": 0.7569120625500447, "grad_norm": 2.02775502204895, "learning_rate": 4.86788187084923e-05, "loss": 1.8766, "step": 32140 }, { "epoch": 0.757147567236588, "grad_norm": 1.9430595636367798, "learning_rate": 4.863171777118365e-05, "loss": 1.9213, "step": 32150 }, { "epoch": 0.7573830719231313, "grad_norm": 1.9600436687469482, "learning_rate": 4.8584616833874995e-05, "loss": 1.7373, "step": 32160 }, { "epoch": 0.7576185766096746, "grad_norm": 2.5143792629241943, "learning_rate": 4.8537515896566345e-05, "loss": 2.1279, "step": 32170 }, { "epoch": 0.7578540812962178, "grad_norm": 1.9432998895645142, "learning_rate": 4.849041495925769e-05, "loss": 2.0553, "step": 32180 }, { "epoch": 0.7580895859827611, "grad_norm": 2.147117853164673, "learning_rate": 4.844331402194904e-05, "loss": 1.9338, "step": 32190 }, { "epoch": 0.7583250906693043, "grad_norm": 1.7949907779693604, "learning_rate": 4.839621308464038e-05, "loss": 2.0411, "step": 32200 }, { "epoch": 0.7585605953558476, "grad_norm": 1.945996880531311, "learning_rate": 4.834911214733173e-05, "loss": 1.9042, "step": 32210 }, { "epoch": 0.7587961000423908, "grad_norm": 3.3498096466064453, "learning_rate": 4.830201121002308e-05, "loss": 2.2681, "step": 32220 }, { "epoch": 0.759031604728934, "grad_norm": 1.85516357421875, "learning_rate": 4.825491027271443e-05, "loss": 1.9829, "step": 32230 }, { "epoch": 0.7592671094154774, "grad_norm": 1.8677126169204712, "learning_rate": 4.8207809335405776e-05, "loss": 1.8494, "step": 32240 }, { "epoch": 0.7595026141020206, "grad_norm": 1.955237865447998, "learning_rate": 4.8160708398097126e-05, "loss": 1.9311, "step": 32250 }, { "epoch": 0.7597381187885639, "grad_norm": 2.534681558609009, "learning_rate": 4.811360746078848e-05, "loss": 2.2401, "step": 32260 }, { "epoch": 0.7599736234751071, "grad_norm": 1.8097875118255615, "learning_rate": 4.806650652347982e-05, "loss": 1.8716, "step": 32270 }, { "epoch": 0.7602091281616504, "grad_norm": 2.7228007316589355, "learning_rate": 4.801940558617117e-05, "loss": 1.9849, "step": 32280 }, { "epoch": 0.7604446328481936, "grad_norm": 2.104480028152466, "learning_rate": 4.7972304648862514e-05, "loss": 2.1219, "step": 32290 }, { "epoch": 0.760680137534737, "grad_norm": 2.193178176879883, "learning_rate": 4.7925203711553864e-05, "loss": 1.8933, "step": 32300 }, { "epoch": 0.7609156422212802, "grad_norm": 2.3160879611968994, "learning_rate": 4.787810277424521e-05, "loss": 2.0682, "step": 32310 }, { "epoch": 0.7611511469078235, "grad_norm": 1.9633328914642334, "learning_rate": 4.783100183693656e-05, "loss": 1.893, "step": 32320 }, { "epoch": 0.7613866515943667, "grad_norm": 2.395254611968994, "learning_rate": 4.77839008996279e-05, "loss": 1.9148, "step": 32330 }, { "epoch": 0.76162215628091, "grad_norm": 2.234740972518921, "learning_rate": 4.773679996231925e-05, "loss": 1.9468, "step": 32340 }, { "epoch": 0.7618576609674532, "grad_norm": 2.9619855880737305, "learning_rate": 4.7689699025010595e-05, "loss": 1.7942, "step": 32350 }, { "epoch": 0.7620931656539965, "grad_norm": 2.4581329822540283, "learning_rate": 4.7642598087701945e-05, "loss": 1.9489, "step": 32360 }, { "epoch": 0.7623286703405397, "grad_norm": 2.132481098175049, "learning_rate": 4.7595497150393295e-05, "loss": 1.8993, "step": 32370 }, { "epoch": 0.7625641750270831, "grad_norm": 2.9130518436431885, "learning_rate": 4.7548396213084646e-05, "loss": 2.1784, "step": 32380 }, { "epoch": 0.7627996797136263, "grad_norm": 2.35318922996521, "learning_rate": 4.750129527577599e-05, "loss": 2.0111, "step": 32390 }, { "epoch": 0.7630351844001696, "grad_norm": 2.9324629306793213, "learning_rate": 4.745419433846734e-05, "loss": 1.9419, "step": 32400 }, { "epoch": 0.7632706890867128, "grad_norm": 1.9878653287887573, "learning_rate": 4.740709340115869e-05, "loss": 1.9623, "step": 32410 }, { "epoch": 0.7635061937732561, "grad_norm": 1.8808915615081787, "learning_rate": 4.735999246385003e-05, "loss": 2.12, "step": 32420 }, { "epoch": 0.7637416984597993, "grad_norm": 1.9779167175292969, "learning_rate": 4.7312891526541383e-05, "loss": 1.775, "step": 32430 }, { "epoch": 0.7639772031463427, "grad_norm": 2.064002752304077, "learning_rate": 4.726579058923273e-05, "loss": 2.0461, "step": 32440 }, { "epoch": 0.7642127078328859, "grad_norm": 1.897849440574646, "learning_rate": 4.721868965192408e-05, "loss": 1.8057, "step": 32450 }, { "epoch": 0.7644482125194292, "grad_norm": 2.096017837524414, "learning_rate": 4.717158871461542e-05, "loss": 1.9373, "step": 32460 }, { "epoch": 0.7646837172059724, "grad_norm": 2.7307066917419434, "learning_rate": 4.712448777730677e-05, "loss": 2.1702, "step": 32470 }, { "epoch": 0.7649192218925157, "grad_norm": 1.8949542045593262, "learning_rate": 4.7077386839998114e-05, "loss": 1.9492, "step": 32480 }, { "epoch": 0.7651547265790589, "grad_norm": 2.4673449993133545, "learning_rate": 4.7030285902689465e-05, "loss": 2.3952, "step": 32490 }, { "epoch": 0.7653902312656022, "grad_norm": 2.2674577236175537, "learning_rate": 4.698318496538081e-05, "loss": 1.9935, "step": 32500 }, { "epoch": 0.7656257359521454, "grad_norm": 2.0791029930114746, "learning_rate": 4.6936084028072165e-05, "loss": 2.0564, "step": 32510 }, { "epoch": 0.7658612406386887, "grad_norm": 2.5843989849090576, "learning_rate": 4.688898309076351e-05, "loss": 1.7758, "step": 32520 }, { "epoch": 0.766096745325232, "grad_norm": 2.141845226287842, "learning_rate": 4.684188215345486e-05, "loss": 1.9268, "step": 32530 }, { "epoch": 0.7663322500117752, "grad_norm": 2.0539655685424805, "learning_rate": 4.67947812161462e-05, "loss": 1.9727, "step": 32540 }, { "epoch": 0.7665677546983185, "grad_norm": 2.356175184249878, "learning_rate": 4.674768027883755e-05, "loss": 2.1268, "step": 32550 }, { "epoch": 0.7668032593848617, "grad_norm": 2.6625282764434814, "learning_rate": 4.67005793415289e-05, "loss": 2.0887, "step": 32560 }, { "epoch": 0.767038764071405, "grad_norm": 2.2994375228881836, "learning_rate": 4.6653478404220246e-05, "loss": 1.7722, "step": 32570 }, { "epoch": 0.7672742687579482, "grad_norm": 2.1485538482666016, "learning_rate": 4.6606377466911596e-05, "loss": 1.914, "step": 32580 }, { "epoch": 0.7675097734444916, "grad_norm": 3.24705171585083, "learning_rate": 4.655927652960294e-05, "loss": 1.9643, "step": 32590 }, { "epoch": 0.7677452781310348, "grad_norm": 2.247652292251587, "learning_rate": 4.651217559229429e-05, "loss": 2.2326, "step": 32600 }, { "epoch": 0.7679807828175781, "grad_norm": 2.2194790840148926, "learning_rate": 4.6465074654985634e-05, "loss": 2.0224, "step": 32610 }, { "epoch": 0.7682162875041213, "grad_norm": 2.246136426925659, "learning_rate": 4.6417973717676984e-05, "loss": 2.1937, "step": 32620 }, { "epoch": 0.7684517921906646, "grad_norm": 1.978644847869873, "learning_rate": 4.637087278036833e-05, "loss": 2.0933, "step": 32630 }, { "epoch": 0.7686872968772078, "grad_norm": 2.822878360748291, "learning_rate": 4.632377184305968e-05, "loss": 2.1852, "step": 32640 }, { "epoch": 0.7689228015637511, "grad_norm": 1.890211820602417, "learning_rate": 4.627667090575102e-05, "loss": 1.9076, "step": 32650 }, { "epoch": 0.7691583062502944, "grad_norm": 2.3078582286834717, "learning_rate": 4.622956996844238e-05, "loss": 1.9756, "step": 32660 }, { "epoch": 0.7693938109368377, "grad_norm": 2.1552579402923584, "learning_rate": 4.618246903113372e-05, "loss": 2.0046, "step": 32670 }, { "epoch": 0.7696293156233809, "grad_norm": 2.208951711654663, "learning_rate": 4.613536809382507e-05, "loss": 2.1688, "step": 32680 }, { "epoch": 0.7698648203099242, "grad_norm": 2.6358840465545654, "learning_rate": 4.6088267156516415e-05, "loss": 2.0845, "step": 32690 }, { "epoch": 0.7701003249964674, "grad_norm": 2.166287422180176, "learning_rate": 4.6041166219207765e-05, "loss": 2.0353, "step": 32700 }, { "epoch": 0.7703358296830107, "grad_norm": 2.0117709636688232, "learning_rate": 4.5994065281899116e-05, "loss": 2.0734, "step": 32710 }, { "epoch": 0.7705713343695539, "grad_norm": 2.068957805633545, "learning_rate": 4.594696434459046e-05, "loss": 2.0584, "step": 32720 }, { "epoch": 0.7708068390560973, "grad_norm": 2.884422540664673, "learning_rate": 4.589986340728181e-05, "loss": 2.0943, "step": 32730 }, { "epoch": 0.7710423437426405, "grad_norm": 2.284824848175049, "learning_rate": 4.585276246997315e-05, "loss": 2.0243, "step": 32740 }, { "epoch": 0.7712778484291838, "grad_norm": 2.4009203910827637, "learning_rate": 4.58056615326645e-05, "loss": 2.0731, "step": 32750 }, { "epoch": 0.771513353115727, "grad_norm": 2.728984832763672, "learning_rate": 4.5758560595355847e-05, "loss": 2.1744, "step": 32760 }, { "epoch": 0.7717488578022703, "grad_norm": 2.1261510848999023, "learning_rate": 4.57114596580472e-05, "loss": 2.0703, "step": 32770 }, { "epoch": 0.7719843624888135, "grad_norm": 2.4731087684631348, "learning_rate": 4.566435872073854e-05, "loss": 1.8765, "step": 32780 }, { "epoch": 0.7722198671753568, "grad_norm": 2.0476648807525635, "learning_rate": 4.561725778342989e-05, "loss": 2.0803, "step": 32790 }, { "epoch": 0.7724553718619, "grad_norm": 2.126819372177124, "learning_rate": 4.557015684612124e-05, "loss": 1.9244, "step": 32800 }, { "epoch": 0.7726908765484433, "grad_norm": 1.6341440677642822, "learning_rate": 4.552305590881259e-05, "loss": 2.0313, "step": 32810 }, { "epoch": 0.7729263812349866, "grad_norm": 2.055776596069336, "learning_rate": 4.5475954971503935e-05, "loss": 2.0049, "step": 32820 }, { "epoch": 0.7731618859215298, "grad_norm": 2.976670265197754, "learning_rate": 4.5428854034195285e-05, "loss": 2.3065, "step": 32830 }, { "epoch": 0.7733973906080731, "grad_norm": 2.6509125232696533, "learning_rate": 4.538175309688663e-05, "loss": 2.1162, "step": 32840 }, { "epoch": 0.7736328952946163, "grad_norm": 2.617938756942749, "learning_rate": 4.533465215957798e-05, "loss": 2.0919, "step": 32850 }, { "epoch": 0.7738683999811596, "grad_norm": 2.4530789852142334, "learning_rate": 4.528755122226933e-05, "loss": 2.009, "step": 32860 }, { "epoch": 0.7741039046677028, "grad_norm": 2.889514207839966, "learning_rate": 4.524045028496067e-05, "loss": 1.8075, "step": 32870 }, { "epoch": 0.7743394093542462, "grad_norm": 2.127507448196411, "learning_rate": 4.519334934765202e-05, "loss": 2.0197, "step": 32880 }, { "epoch": 0.7745749140407894, "grad_norm": 2.079528570175171, "learning_rate": 4.5146248410343366e-05, "loss": 2.0391, "step": 32890 }, { "epoch": 0.7748104187273327, "grad_norm": 2.2359161376953125, "learning_rate": 4.5099147473034716e-05, "loss": 2.0698, "step": 32900 }, { "epoch": 0.7750459234138759, "grad_norm": 1.9062904119491577, "learning_rate": 4.505204653572606e-05, "loss": 2.1326, "step": 32910 }, { "epoch": 0.7752814281004192, "grad_norm": 2.303953170776367, "learning_rate": 4.500494559841741e-05, "loss": 2.0753, "step": 32920 }, { "epoch": 0.7755169327869624, "grad_norm": 1.8101952075958252, "learning_rate": 4.495784466110875e-05, "loss": 1.8815, "step": 32930 }, { "epoch": 0.7757524374735058, "grad_norm": 3.982452630996704, "learning_rate": 4.491074372380011e-05, "loss": 1.9328, "step": 32940 }, { "epoch": 0.775987942160049, "grad_norm": 2.1649701595306396, "learning_rate": 4.4863642786491454e-05, "loss": 1.8076, "step": 32950 }, { "epoch": 0.7762234468465923, "grad_norm": 1.8240478038787842, "learning_rate": 4.4816541849182804e-05, "loss": 2.0128, "step": 32960 }, { "epoch": 0.7764589515331355, "grad_norm": 1.8030163049697876, "learning_rate": 4.476944091187415e-05, "loss": 2.0234, "step": 32970 }, { "epoch": 0.7766944562196788, "grad_norm": 1.9468859434127808, "learning_rate": 4.47223399745655e-05, "loss": 1.9977, "step": 32980 }, { "epoch": 0.776929960906222, "grad_norm": 2.0538527965545654, "learning_rate": 4.467523903725684e-05, "loss": 2.141, "step": 32990 }, { "epoch": 0.7771654655927653, "grad_norm": 2.1472928524017334, "learning_rate": 4.462813809994819e-05, "loss": 2.1504, "step": 33000 }, { "epoch": 0.7774009702793085, "grad_norm": 2.2194812297821045, "learning_rate": 4.458103716263954e-05, "loss": 2.0039, "step": 33010 }, { "epoch": 0.7776364749658519, "grad_norm": 4.218422889709473, "learning_rate": 4.4533936225330885e-05, "loss": 1.922, "step": 33020 }, { "epoch": 0.7778719796523951, "grad_norm": 2.516357421875, "learning_rate": 4.4486835288022235e-05, "loss": 2.0226, "step": 33030 }, { "epoch": 0.7781074843389384, "grad_norm": 2.656029224395752, "learning_rate": 4.443973435071358e-05, "loss": 1.9103, "step": 33040 }, { "epoch": 0.7783429890254816, "grad_norm": 2.5791871547698975, "learning_rate": 4.439263341340493e-05, "loss": 1.9794, "step": 33050 }, { "epoch": 0.7785784937120249, "grad_norm": 2.0614676475524902, "learning_rate": 4.434553247609627e-05, "loss": 1.8569, "step": 33060 }, { "epoch": 0.7788139983985681, "grad_norm": 3.1220650672912598, "learning_rate": 4.429843153878762e-05, "loss": 2.0549, "step": 33070 }, { "epoch": 0.7790495030851113, "grad_norm": NaN, "learning_rate": 4.425133060147897e-05, "loss": 1.9899, "step": 33080 }, { "epoch": 0.7792850077716547, "grad_norm": 2.013601541519165, "learning_rate": 4.4208939757901184e-05, "loss": 1.9919, "step": 33090 }, { "epoch": 0.7795205124581979, "grad_norm": 2.3044540882110596, "learning_rate": 4.4161838820592534e-05, "loss": 2.0695, "step": 33100 }, { "epoch": 0.7797560171447412, "grad_norm": 2.1326963901519775, "learning_rate": 4.411473788328388e-05, "loss": 2.0311, "step": 33110 }, { "epoch": 0.7799915218312844, "grad_norm": 2.112426280975342, "learning_rate": 4.406763694597523e-05, "loss": 1.8339, "step": 33120 }, { "epoch": 0.7802270265178277, "grad_norm": 1.8452937602996826, "learning_rate": 4.402053600866657e-05, "loss": 2.0248, "step": 33130 }, { "epoch": 0.7804625312043709, "grad_norm": 2.265781879425049, "learning_rate": 4.397343507135792e-05, "loss": 1.9383, "step": 33140 }, { "epoch": 0.7806980358909142, "grad_norm": 2.641981601715088, "learning_rate": 4.3926334134049265e-05, "loss": 2.059, "step": 33150 }, { "epoch": 0.7809335405774575, "grad_norm": 2.4268856048583984, "learning_rate": 4.3879233196740616e-05, "loss": 2.2207, "step": 33160 }, { "epoch": 0.7811690452640008, "grad_norm": 2.2304317951202393, "learning_rate": 4.3832132259431966e-05, "loss": 2.1449, "step": 33170 }, { "epoch": 0.781404549950544, "grad_norm": 2.062983274459839, "learning_rate": 4.378503132212331e-05, "loss": 1.8112, "step": 33180 }, { "epoch": 0.7816400546370873, "grad_norm": 1.8598015308380127, "learning_rate": 4.373793038481466e-05, "loss": 1.8849, "step": 33190 }, { "epoch": 0.7818755593236305, "grad_norm": 1.722225546836853, "learning_rate": 4.369082944750601e-05, "loss": 2.0335, "step": 33200 }, { "epoch": 0.7821110640101738, "grad_norm": 4.0551958084106445, "learning_rate": 4.364372851019736e-05, "loss": 2.0743, "step": 33210 }, { "epoch": 0.782346568696717, "grad_norm": 1.9605753421783447, "learning_rate": 4.360133766661957e-05, "loss": 2.0431, "step": 33220 }, { "epoch": 0.7825820733832604, "grad_norm": 2.312108278274536, "learning_rate": 4.3554236729310915e-05, "loss": 1.9877, "step": 33230 }, { "epoch": 0.7828175780698036, "grad_norm": 2.241905927658081, "learning_rate": 4.3507135792002265e-05, "loss": 2.0147, "step": 33240 }, { "epoch": 0.7830530827563469, "grad_norm": 2.4801273345947266, "learning_rate": 4.346003485469361e-05, "loss": 1.9757, "step": 33250 }, { "epoch": 0.7832885874428901, "grad_norm": 2.7878518104553223, "learning_rate": 4.341293391738496e-05, "loss": 2.0236, "step": 33260 }, { "epoch": 0.7835240921294334, "grad_norm": 1.9572044610977173, "learning_rate": 4.33658329800763e-05, "loss": 2.1393, "step": 33270 }, { "epoch": 0.7837595968159766, "grad_norm": 1.8226878643035889, "learning_rate": 4.331873204276765e-05, "loss": 1.9322, "step": 33280 }, { "epoch": 0.7839951015025199, "grad_norm": 1.8301420211791992, "learning_rate": 4.3271631105458996e-05, "loss": 2.0656, "step": 33290 }, { "epoch": 0.7842306061890632, "grad_norm": 3.304522752761841, "learning_rate": 4.3224530168150346e-05, "loss": 1.7974, "step": 33300 }, { "epoch": 0.7844661108756065, "grad_norm": 2.201037645339966, "learning_rate": 4.3177429230841696e-05, "loss": 1.9841, "step": 33310 }, { "epoch": 0.7847016155621497, "grad_norm": 1.9371416568756104, "learning_rate": 4.3130328293533047e-05, "loss": 2.1516, "step": 33320 }, { "epoch": 0.784937120248693, "grad_norm": 2.4234302043914795, "learning_rate": 4.308322735622439e-05, "loss": 2.0129, "step": 33330 }, { "epoch": 0.7851726249352362, "grad_norm": 1.8257781267166138, "learning_rate": 4.303612641891574e-05, "loss": 2.0591, "step": 33340 }, { "epoch": 0.7854081296217795, "grad_norm": 2.209580659866333, "learning_rate": 4.2989025481607084e-05, "loss": 2.033, "step": 33350 }, { "epoch": 0.7856436343083227, "grad_norm": 2.0317904949188232, "learning_rate": 4.2941924544298434e-05, "loss": 2.0874, "step": 33360 }, { "epoch": 0.785879138994866, "grad_norm": 2.2737340927124023, "learning_rate": 4.2894823606989784e-05, "loss": 2.0869, "step": 33370 }, { "epoch": 0.7861146436814093, "grad_norm": 1.857039213180542, "learning_rate": 4.284772266968113e-05, "loss": 2.1403, "step": 33380 }, { "epoch": 0.7863501483679525, "grad_norm": 2.16298508644104, "learning_rate": 4.280062173237248e-05, "loss": 1.8499, "step": 33390 }, { "epoch": 0.7865856530544958, "grad_norm": 2.2282299995422363, "learning_rate": 4.275352079506382e-05, "loss": 2.1735, "step": 33400 }, { "epoch": 0.786821157741039, "grad_norm": 1.993444800376892, "learning_rate": 4.270641985775517e-05, "loss": 2.1354, "step": 33410 }, { "epoch": 0.7870566624275823, "grad_norm": 2.0155577659606934, "learning_rate": 4.2659318920446515e-05, "loss": 1.7992, "step": 33420 }, { "epoch": 0.7872921671141255, "grad_norm": 2.0637929439544678, "learning_rate": 4.2612217983137865e-05, "loss": 1.9629, "step": 33430 }, { "epoch": 0.7875276718006688, "grad_norm": 2.51766300201416, "learning_rate": 4.256511704582921e-05, "loss": 1.8442, "step": 33440 }, { "epoch": 0.7877631764872121, "grad_norm": 2.253438949584961, "learning_rate": 4.2518016108520566e-05, "loss": 1.9771, "step": 33450 }, { "epoch": 0.7879986811737554, "grad_norm": 2.1923258304595947, "learning_rate": 4.247091517121191e-05, "loss": 2.2046, "step": 33460 }, { "epoch": 0.7882341858602986, "grad_norm": 1.9330971240997314, "learning_rate": 4.242381423390326e-05, "loss": 1.9438, "step": 33470 }, { "epoch": 0.7884696905468419, "grad_norm": 1.7735408544540405, "learning_rate": 4.23767132965946e-05, "loss": 2.0522, "step": 33480 }, { "epoch": 0.7887051952333851, "grad_norm": 2.54807448387146, "learning_rate": 4.232961235928595e-05, "loss": 1.9661, "step": 33490 }, { "epoch": 0.7889406999199284, "grad_norm": 1.8328182697296143, "learning_rate": 4.22825114219773e-05, "loss": 2.1688, "step": 33500 }, { "epoch": 0.7891762046064716, "grad_norm": 2.4814724922180176, "learning_rate": 4.223541048466865e-05, "loss": 2.03, "step": 33510 }, { "epoch": 0.789411709293015, "grad_norm": 2.5519399642944336, "learning_rate": 4.218830954736e-05, "loss": 2.1314, "step": 33520 }, { "epoch": 0.7896472139795582, "grad_norm": 2.523111343383789, "learning_rate": 4.214120861005134e-05, "loss": 1.9807, "step": 33530 }, { "epoch": 0.7898827186661015, "grad_norm": 2.3048107624053955, "learning_rate": 4.209410767274269e-05, "loss": 2.0443, "step": 33540 }, { "epoch": 0.7901182233526447, "grad_norm": 2.162158966064453, "learning_rate": 4.2047006735434034e-05, "loss": 2.0237, "step": 33550 }, { "epoch": 0.790353728039188, "grad_norm": 2.4160468578338623, "learning_rate": 4.1999905798125385e-05, "loss": 2.1968, "step": 33560 }, { "epoch": 0.7905892327257312, "grad_norm": 1.7282012701034546, "learning_rate": 4.195280486081673e-05, "loss": 1.9573, "step": 33570 }, { "epoch": 0.7908247374122745, "grad_norm": 2.083235025405884, "learning_rate": 4.190570392350808e-05, "loss": 2.0517, "step": 33580 }, { "epoch": 0.7910602420988178, "grad_norm": 2.5902810096740723, "learning_rate": 4.185860298619943e-05, "loss": 2.072, "step": 33590 }, { "epoch": 0.7912957467853611, "grad_norm": 2.1250321865081787, "learning_rate": 4.181150204889078e-05, "loss": 2.1724, "step": 33600 }, { "epoch": 0.7915312514719043, "grad_norm": 2.0615923404693604, "learning_rate": 4.176440111158212e-05, "loss": 2.0236, "step": 33610 }, { "epoch": 0.7917667561584476, "grad_norm": 2.1386961936950684, "learning_rate": 4.171730017427347e-05, "loss": 2.1057, "step": 33620 }, { "epoch": 0.7920022608449908, "grad_norm": 2.5183324813842773, "learning_rate": 4.1670199236964816e-05, "loss": 2.0803, "step": 33630 }, { "epoch": 0.7922377655315341, "grad_norm": 2.7656891345977783, "learning_rate": 4.1623098299656166e-05, "loss": 2.0875, "step": 33640 }, { "epoch": 0.7924732702180773, "grad_norm": 1.7094897031784058, "learning_rate": 4.157599736234751e-05, "loss": 2.0225, "step": 33650 }, { "epoch": 0.7927087749046206, "grad_norm": 2.0408098697662354, "learning_rate": 4.152889642503886e-05, "loss": 1.9889, "step": 33660 }, { "epoch": 0.7929442795911639, "grad_norm": 2.6148180961608887, "learning_rate": 4.148179548773021e-05, "loss": 2.3067, "step": 33670 }, { "epoch": 0.7931797842777071, "grad_norm": 1.6949024200439453, "learning_rate": 4.1434694550421554e-05, "loss": 2.2897, "step": 33680 }, { "epoch": 0.7934152889642504, "grad_norm": 2.3153419494628906, "learning_rate": 4.1387593613112904e-05, "loss": 1.8178, "step": 33690 }, { "epoch": 0.7936507936507936, "grad_norm": 2.5159363746643066, "learning_rate": 4.134049267580425e-05, "loss": 2.291, "step": 33700 }, { "epoch": 0.7938862983373369, "grad_norm": 1.6296318769454956, "learning_rate": 4.12933917384956e-05, "loss": 1.9528, "step": 33710 }, { "epoch": 0.7941218030238801, "grad_norm": 2.256610870361328, "learning_rate": 4.124629080118694e-05, "loss": 2.1584, "step": 33720 }, { "epoch": 0.7943573077104235, "grad_norm": 1.8672707080841064, "learning_rate": 4.11991898638783e-05, "loss": 2.0653, "step": 33730 }, { "epoch": 0.7945928123969667, "grad_norm": 3.621340751647949, "learning_rate": 4.115208892656964e-05, "loss": 2.2101, "step": 33740 }, { "epoch": 0.79482831708351, "grad_norm": 2.5111987590789795, "learning_rate": 4.110498798926099e-05, "loss": 2.0825, "step": 33750 }, { "epoch": 0.7950638217700532, "grad_norm": 2.316218614578247, "learning_rate": 4.1057887051952335e-05, "loss": 1.979, "step": 33760 }, { "epoch": 0.7952993264565965, "grad_norm": 3.524585247039795, "learning_rate": 4.1010786114643686e-05, "loss": 1.9178, "step": 33770 }, { "epoch": 0.7955348311431397, "grad_norm": 2.1990773677825928, "learning_rate": 4.096368517733503e-05, "loss": 2.0901, "step": 33780 }, { "epoch": 0.795770335829683, "grad_norm": 3.369415760040283, "learning_rate": 4.091658424002638e-05, "loss": 1.7894, "step": 33790 }, { "epoch": 0.7960058405162262, "grad_norm": 1.9027162790298462, "learning_rate": 4.086948330271772e-05, "loss": 2.0495, "step": 33800 }, { "epoch": 0.7962413452027696, "grad_norm": 2.3677480220794678, "learning_rate": 4.082238236540907e-05, "loss": 2.124, "step": 33810 }, { "epoch": 0.7964768498893128, "grad_norm": 2.0192174911499023, "learning_rate": 4.077528142810042e-05, "loss": 1.9715, "step": 33820 }, { "epoch": 0.7967123545758561, "grad_norm": 2.7535741329193115, "learning_rate": 4.072818049079177e-05, "loss": 1.9513, "step": 33830 }, { "epoch": 0.7969478592623993, "grad_norm": 1.9463825225830078, "learning_rate": 4.068107955348312e-05, "loss": 1.8706, "step": 33840 }, { "epoch": 0.7971833639489426, "grad_norm": 2.1753220558166504, "learning_rate": 4.063397861617446e-05, "loss": 2.2774, "step": 33850 }, { "epoch": 0.7974188686354858, "grad_norm": 1.5415219068527222, "learning_rate": 4.058687767886581e-05, "loss": 2.0952, "step": 33860 }, { "epoch": 0.7976543733220292, "grad_norm": 2.0215823650360107, "learning_rate": 4.053977674155716e-05, "loss": 1.9228, "step": 33870 }, { "epoch": 0.7978898780085724, "grad_norm": 1.6013972759246826, "learning_rate": 4.049267580424851e-05, "loss": 2.037, "step": 33880 }, { "epoch": 0.7981253826951157, "grad_norm": 2.624075174331665, "learning_rate": 4.0445574866939855e-05, "loss": 1.9571, "step": 33890 }, { "epoch": 0.7983608873816589, "grad_norm": 2.599543571472168, "learning_rate": 4.0398473929631205e-05, "loss": 2.0544, "step": 33900 }, { "epoch": 0.7985963920682022, "grad_norm": 1.950036883354187, "learning_rate": 4.035137299232255e-05, "loss": 2.0248, "step": 33910 }, { "epoch": 0.7988318967547454, "grad_norm": 2.2384231090545654, "learning_rate": 4.03042720550139e-05, "loss": 2.0692, "step": 33920 }, { "epoch": 0.7990674014412886, "grad_norm": 1.6910631656646729, "learning_rate": 4.025717111770524e-05, "loss": 1.9093, "step": 33930 }, { "epoch": 0.799302906127832, "grad_norm": 2.5212926864624023, "learning_rate": 4.021007018039659e-05, "loss": 1.9333, "step": 33940 }, { "epoch": 0.7995384108143752, "grad_norm": 2.185635566711426, "learning_rate": 4.0162969243087936e-05, "loss": 2.2586, "step": 33950 }, { "epoch": 0.7997739155009185, "grad_norm": 3.235745429992676, "learning_rate": 4.0115868305779286e-05, "loss": 2.0323, "step": 33960 }, { "epoch": 0.8000094201874617, "grad_norm": 1.8148341178894043, "learning_rate": 4.0068767368470636e-05, "loss": 2.091, "step": 33970 }, { "epoch": 0.800244924874005, "grad_norm": 2.3433852195739746, "learning_rate": 4.002166643116198e-05, "loss": 1.7543, "step": 33980 }, { "epoch": 0.8004804295605482, "grad_norm": 1.957887053489685, "learning_rate": 3.997456549385333e-05, "loss": 1.9187, "step": 33990 }, { "epoch": 0.8007159342470915, "grad_norm": 2.367794990539551, "learning_rate": 3.9927464556544673e-05, "loss": 2.0184, "step": 34000 }, { "epoch": 0.8009514389336347, "grad_norm": 2.2443687915802, "learning_rate": 3.9880363619236024e-05, "loss": 1.9743, "step": 34010 }, { "epoch": 0.8011869436201781, "grad_norm": 1.9915101528167725, "learning_rate": 3.9833262681927374e-05, "loss": 1.9113, "step": 34020 }, { "epoch": 0.8014224483067213, "grad_norm": 2.523921489715576, "learning_rate": 3.9786161744618724e-05, "loss": 1.9004, "step": 34030 }, { "epoch": 0.8016579529932646, "grad_norm": 2.2113091945648193, "learning_rate": 3.973906080731007e-05, "loss": 2.2037, "step": 34040 }, { "epoch": 0.8018934576798078, "grad_norm": 1.9317169189453125, "learning_rate": 3.969195987000142e-05, "loss": 2.1463, "step": 34050 }, { "epoch": 0.8021289623663511, "grad_norm": 1.804018259048462, "learning_rate": 3.964485893269276e-05, "loss": 2.1225, "step": 34060 }, { "epoch": 0.8023644670528943, "grad_norm": 1.9248290061950684, "learning_rate": 3.959775799538411e-05, "loss": 2.0875, "step": 34070 }, { "epoch": 0.8025999717394376, "grad_norm": 2.0950820446014404, "learning_rate": 3.9550657058075455e-05, "loss": 2.1594, "step": 34080 }, { "epoch": 0.8028354764259809, "grad_norm": 2.13773775100708, "learning_rate": 3.9503556120766805e-05, "loss": 1.6516, "step": 34090 }, { "epoch": 0.8030709811125242, "grad_norm": 1.8024755716323853, "learning_rate": 3.945645518345815e-05, "loss": 1.9598, "step": 34100 }, { "epoch": 0.8033064857990674, "grad_norm": 2.8858230113983154, "learning_rate": 3.94093542461495e-05, "loss": 1.8878, "step": 34110 }, { "epoch": 0.8035419904856107, "grad_norm": 2.173196315765381, "learning_rate": 3.936225330884085e-05, "loss": 1.8901, "step": 34120 }, { "epoch": 0.8037774951721539, "grad_norm": 2.5591368675231934, "learning_rate": 3.931515237153219e-05, "loss": 1.82, "step": 34130 }, { "epoch": 0.8040129998586972, "grad_norm": 2.587899923324585, "learning_rate": 3.926805143422354e-05, "loss": 2.0842, "step": 34140 }, { "epoch": 0.8042485045452404, "grad_norm": 1.9660160541534424, "learning_rate": 3.9220950496914886e-05, "loss": 1.9926, "step": 34150 }, { "epoch": 0.8044840092317838, "grad_norm": 2.291707754135132, "learning_rate": 3.9173849559606243e-05, "loss": 1.9393, "step": 34160 }, { "epoch": 0.804719513918327, "grad_norm": 2.452038288116455, "learning_rate": 3.912674862229759e-05, "loss": 2.0658, "step": 34170 }, { "epoch": 0.8049550186048703, "grad_norm": 1.9962801933288574, "learning_rate": 3.907964768498894e-05, "loss": 2.0453, "step": 34180 }, { "epoch": 0.8051905232914135, "grad_norm": 2.2015650272369385, "learning_rate": 3.903254674768028e-05, "loss": 2.1626, "step": 34190 }, { "epoch": 0.8054260279779568, "grad_norm": 2.1929237842559814, "learning_rate": 3.898544581037163e-05, "loss": 2.1904, "step": 34200 }, { "epoch": 0.8056615326645, "grad_norm": 2.3259692192077637, "learning_rate": 3.8938344873062974e-05, "loss": 1.9968, "step": 34210 }, { "epoch": 0.8058970373510432, "grad_norm": 2.7438056468963623, "learning_rate": 3.8891243935754325e-05, "loss": 2.2442, "step": 34220 }, { "epoch": 0.8061325420375866, "grad_norm": 2.609450340270996, "learning_rate": 3.884414299844567e-05, "loss": 1.9798, "step": 34230 }, { "epoch": 0.8063680467241298, "grad_norm": 2.7057836055755615, "learning_rate": 3.879704206113702e-05, "loss": 1.6244, "step": 34240 }, { "epoch": 0.8066035514106731, "grad_norm": 2.6504015922546387, "learning_rate": 3.874994112382836e-05, "loss": 2.1608, "step": 34250 }, { "epoch": 0.8068390560972163, "grad_norm": 2.614208936691284, "learning_rate": 3.870284018651971e-05, "loss": 2.085, "step": 34260 }, { "epoch": 0.8070745607837596, "grad_norm": 2.1055519580841064, "learning_rate": 3.865573924921106e-05, "loss": 2.0212, "step": 34270 }, { "epoch": 0.8073100654703028, "grad_norm": 2.314775228500366, "learning_rate": 3.8608638311902406e-05, "loss": 2.0582, "step": 34280 }, { "epoch": 0.8075455701568461, "grad_norm": 2.320479393005371, "learning_rate": 3.8561537374593756e-05, "loss": 1.8997, "step": 34290 }, { "epoch": 0.8077810748433893, "grad_norm": 2.615222692489624, "learning_rate": 3.8514436437285106e-05, "loss": 2.1215, "step": 34300 }, { "epoch": 0.8080165795299327, "grad_norm": 1.9585306644439697, "learning_rate": 3.8467335499976456e-05, "loss": 1.8919, "step": 34310 }, { "epoch": 0.8082520842164759, "grad_norm": 1.8004975318908691, "learning_rate": 3.84202345626678e-05, "loss": 2.1203, "step": 34320 }, { "epoch": 0.8084875889030192, "grad_norm": 1.973084568977356, "learning_rate": 3.837313362535915e-05, "loss": 2.0844, "step": 34330 }, { "epoch": 0.8087230935895624, "grad_norm": 2.2893195152282715, "learning_rate": 3.8326032688050494e-05, "loss": 1.9385, "step": 34340 }, { "epoch": 0.8089585982761057, "grad_norm": 2.5790085792541504, "learning_rate": 3.8278931750741844e-05, "loss": 1.9747, "step": 34350 }, { "epoch": 0.8091941029626489, "grad_norm": 2.1595449447631836, "learning_rate": 3.823183081343319e-05, "loss": 2.1628, "step": 34360 }, { "epoch": 0.8094296076491923, "grad_norm": 2.5433499813079834, "learning_rate": 3.818472987612454e-05, "loss": 1.9291, "step": 34370 }, { "epoch": 0.8096651123357355, "grad_norm": 2.732644557952881, "learning_rate": 3.813762893881588e-05, "loss": 2.1055, "step": 34380 }, { "epoch": 0.8099006170222788, "grad_norm": 1.8708646297454834, "learning_rate": 3.809052800150723e-05, "loss": 1.9993, "step": 34390 }, { "epoch": 0.810136121708822, "grad_norm": 1.994516134262085, "learning_rate": 3.8043427064198575e-05, "loss": 2.0301, "step": 34400 }, { "epoch": 0.8103716263953653, "grad_norm": 2.354677677154541, "learning_rate": 3.7996326126889925e-05, "loss": 2.0536, "step": 34410 }, { "epoch": 0.8106071310819085, "grad_norm": 1.9902782440185547, "learning_rate": 3.7949225189581275e-05, "loss": 2.0342, "step": 34420 }, { "epoch": 0.8108426357684518, "grad_norm": 2.1519672870635986, "learning_rate": 3.790212425227262e-05, "loss": 2.0471, "step": 34430 }, { "epoch": 0.811078140454995, "grad_norm": 2.525325059890747, "learning_rate": 3.785502331496397e-05, "loss": 1.9082, "step": 34440 }, { "epoch": 0.8113136451415384, "grad_norm": 1.9717092514038086, "learning_rate": 3.780792237765532e-05, "loss": 2.0308, "step": 34450 }, { "epoch": 0.8115491498280816, "grad_norm": 2.371563196182251, "learning_rate": 3.776082144034667e-05, "loss": 2.2107, "step": 34460 }, { "epoch": 0.8117846545146249, "grad_norm": 2.0133941173553467, "learning_rate": 3.771372050303801e-05, "loss": 2.2433, "step": 34470 }, { "epoch": 0.8120201592011681, "grad_norm": 2.075589418411255, "learning_rate": 3.766661956572936e-05, "loss": 2.0989, "step": 34480 }, { "epoch": 0.8122556638877113, "grad_norm": 2.919072389602661, "learning_rate": 3.761951862842071e-05, "loss": 2.1238, "step": 34490 }, { "epoch": 0.8124911685742546, "grad_norm": 2.5039098262786865, "learning_rate": 3.757241769111206e-05, "loss": 2.1452, "step": 34500 }, { "epoch": 0.8127266732607978, "grad_norm": 2.3447000980377197, "learning_rate": 3.75253167538034e-05, "loss": 2.1173, "step": 34510 }, { "epoch": 0.8129621779473412, "grad_norm": 2.079946279525757, "learning_rate": 3.747821581649475e-05, "loss": 1.7781, "step": 34520 }, { "epoch": 0.8131976826338844, "grad_norm": 2.0972554683685303, "learning_rate": 3.7431114879186094e-05, "loss": 1.8383, "step": 34530 }, { "epoch": 0.8134331873204277, "grad_norm": 1.8633981943130493, "learning_rate": 3.7384013941877444e-05, "loss": 1.967, "step": 34540 }, { "epoch": 0.8136686920069709, "grad_norm": 1.938206434249878, "learning_rate": 3.733691300456879e-05, "loss": 2.0227, "step": 34550 }, { "epoch": 0.8139041966935142, "grad_norm": 2.096994400024414, "learning_rate": 3.728981206726014e-05, "loss": 2.109, "step": 34560 }, { "epoch": 0.8141397013800574, "grad_norm": 2.675692319869995, "learning_rate": 3.724271112995149e-05, "loss": 2.0283, "step": 34570 }, { "epoch": 0.8143752060666007, "grad_norm": 2.3348910808563232, "learning_rate": 3.719561019264284e-05, "loss": 2.0057, "step": 34580 }, { "epoch": 0.814610710753144, "grad_norm": 1.5937950611114502, "learning_rate": 3.714850925533418e-05, "loss": 1.9629, "step": 34590 }, { "epoch": 0.8148462154396873, "grad_norm": 2.59680438041687, "learning_rate": 3.710140831802553e-05, "loss": 1.8433, "step": 34600 }, { "epoch": 0.8150817201262305, "grad_norm": 2.0707480907440186, "learning_rate": 3.705430738071688e-05, "loss": 1.9767, "step": 34610 }, { "epoch": 0.8153172248127738, "grad_norm": 2.1386687755584717, "learning_rate": 3.7007206443408226e-05, "loss": 1.9353, "step": 34620 }, { "epoch": 0.815552729499317, "grad_norm": 2.207897663116455, "learning_rate": 3.6960105506099576e-05, "loss": 2.0739, "step": 34630 }, { "epoch": 0.8157882341858603, "grad_norm": 1.8906501531600952, "learning_rate": 3.691300456879092e-05, "loss": 2.0127, "step": 34640 }, { "epoch": 0.8160237388724035, "grad_norm": 2.1605560779571533, "learning_rate": 3.686590363148227e-05, "loss": 1.8724, "step": 34650 }, { "epoch": 0.8162592435589469, "grad_norm": 2.182072401046753, "learning_rate": 3.681880269417361e-05, "loss": 1.968, "step": 34660 }, { "epoch": 0.8164947482454901, "grad_norm": 2.064140796661377, "learning_rate": 3.6771701756864964e-05, "loss": 2.0005, "step": 34670 }, { "epoch": 0.8167302529320334, "grad_norm": 2.037813186645508, "learning_rate": 3.672460081955631e-05, "loss": 2.0115, "step": 34680 }, { "epoch": 0.8169657576185766, "grad_norm": 2.205673933029175, "learning_rate": 3.667749988224766e-05, "loss": 1.8754, "step": 34690 }, { "epoch": 0.8172012623051199, "grad_norm": 2.1787984371185303, "learning_rate": 3.6630398944939e-05, "loss": 2.0636, "step": 34700 }, { "epoch": 0.8174367669916631, "grad_norm": 2.24696946144104, "learning_rate": 3.658329800763035e-05, "loss": 2.071, "step": 34710 }, { "epoch": 0.8176722716782064, "grad_norm": 1.690644383430481, "learning_rate": 3.65361970703217e-05, "loss": 1.9747, "step": 34720 }, { "epoch": 0.8179077763647497, "grad_norm": 2.699493646621704, "learning_rate": 3.648909613301305e-05, "loss": 2.1167, "step": 34730 }, { "epoch": 0.818143281051293, "grad_norm": 2.260894536972046, "learning_rate": 3.6441995195704395e-05, "loss": 1.8883, "step": 34740 }, { "epoch": 0.8183787857378362, "grad_norm": 1.964855670928955, "learning_rate": 3.6394894258395745e-05, "loss": 2.0409, "step": 34750 }, { "epoch": 0.8186142904243795, "grad_norm": 2.360180616378784, "learning_rate": 3.6347793321087095e-05, "loss": 1.9799, "step": 34760 }, { "epoch": 0.8188497951109227, "grad_norm": 2.9807870388031006, "learning_rate": 3.630069238377844e-05, "loss": 2.2196, "step": 34770 }, { "epoch": 0.8190852997974659, "grad_norm": 2.496344566345215, "learning_rate": 3.625359144646979e-05, "loss": 1.8034, "step": 34780 }, { "epoch": 0.8193208044840092, "grad_norm": 2.7705297470092773, "learning_rate": 3.620649050916113e-05, "loss": 1.8999, "step": 34790 }, { "epoch": 0.8195563091705524, "grad_norm": 2.4351303577423096, "learning_rate": 3.615938957185248e-05, "loss": 1.8937, "step": 34800 }, { "epoch": 0.8197918138570958, "grad_norm": 2.3407814502716064, "learning_rate": 3.6112288634543826e-05, "loss": 2.2182, "step": 34810 }, { "epoch": 0.820027318543639, "grad_norm": 1.9275591373443604, "learning_rate": 3.606518769723518e-05, "loss": 2.0223, "step": 34820 }, { "epoch": 0.8202628232301823, "grad_norm": 2.0065784454345703, "learning_rate": 3.601808675992652e-05, "loss": 2.2715, "step": 34830 }, { "epoch": 0.8204983279167255, "grad_norm": 2.351534366607666, "learning_rate": 3.597098582261787e-05, "loss": 2.1069, "step": 34840 }, { "epoch": 0.8207338326032688, "grad_norm": 2.254944086074829, "learning_rate": 3.5923884885309214e-05, "loss": 2.1348, "step": 34850 }, { "epoch": 0.820969337289812, "grad_norm": 4.640080451965332, "learning_rate": 3.587678394800057e-05, "loss": 1.981, "step": 34860 }, { "epoch": 0.8212048419763553, "grad_norm": 1.7675470113754272, "learning_rate": 3.5829683010691914e-05, "loss": 1.9438, "step": 34870 }, { "epoch": 0.8214403466628986, "grad_norm": 1.8417401313781738, "learning_rate": 3.5782582073383265e-05, "loss": 1.9817, "step": 34880 }, { "epoch": 0.8216758513494419, "grad_norm": 2.11193585395813, "learning_rate": 3.573548113607461e-05, "loss": 2.0047, "step": 34890 }, { "epoch": 0.8219113560359851, "grad_norm": 1.9333069324493408, "learning_rate": 3.568838019876596e-05, "loss": 1.9611, "step": 34900 }, { "epoch": 0.8221468607225284, "grad_norm": 1.8805556297302246, "learning_rate": 3.564127926145731e-05, "loss": 1.9072, "step": 34910 }, { "epoch": 0.8223823654090716, "grad_norm": 2.1735150814056396, "learning_rate": 3.559417832414865e-05, "loss": 1.8782, "step": 34920 }, { "epoch": 0.8226178700956149, "grad_norm": 2.4724745750427246, "learning_rate": 3.554707738684e-05, "loss": 2.0225, "step": 34930 }, { "epoch": 0.8228533747821581, "grad_norm": 2.609894037246704, "learning_rate": 3.5499976449531346e-05, "loss": 2.1728, "step": 34940 }, { "epoch": 0.8230888794687015, "grad_norm": 2.194990634918213, "learning_rate": 3.5452875512222696e-05, "loss": 1.8967, "step": 34950 }, { "epoch": 0.8233243841552447, "grad_norm": 1.8386231660842896, "learning_rate": 3.540577457491404e-05, "loss": 2.0285, "step": 34960 }, { "epoch": 0.823559888841788, "grad_norm": 2.252108573913574, "learning_rate": 3.535867363760539e-05, "loss": 2.0548, "step": 34970 }, { "epoch": 0.8237953935283312, "grad_norm": 1.995373249053955, "learning_rate": 3.531157270029673e-05, "loss": 2.0336, "step": 34980 }, { "epoch": 0.8240308982148745, "grad_norm": 2.212472438812256, "learning_rate": 3.526447176298808e-05, "loss": 2.0237, "step": 34990 }, { "epoch": 0.8242664029014177, "grad_norm": 2.2565536499023438, "learning_rate": 3.5217370825679434e-05, "loss": 2.2211, "step": 35000 }, { "epoch": 0.824501907587961, "grad_norm": 2.493603229522705, "learning_rate": 3.5170269888370784e-05, "loss": 2.1334, "step": 35010 }, { "epoch": 0.8247374122745043, "grad_norm": 2.159635305404663, "learning_rate": 3.512316895106213e-05, "loss": 1.9526, "step": 35020 }, { "epoch": 0.8249729169610476, "grad_norm": 1.947871208190918, "learning_rate": 3.507606801375348e-05, "loss": 1.9906, "step": 35030 }, { "epoch": 0.8252084216475908, "grad_norm": 2.8672406673431396, "learning_rate": 3.502896707644482e-05, "loss": 2.2269, "step": 35040 }, { "epoch": 0.8254439263341341, "grad_norm": 2.212104082107544, "learning_rate": 3.498186613913617e-05, "loss": 1.9107, "step": 35050 }, { "epoch": 0.8256794310206773, "grad_norm": 2.2118680477142334, "learning_rate": 3.493476520182752e-05, "loss": 2.1372, "step": 35060 }, { "epoch": 0.8259149357072205, "grad_norm": 2.289013385772705, "learning_rate": 3.4887664264518865e-05, "loss": 1.9752, "step": 35070 }, { "epoch": 0.8261504403937638, "grad_norm": 2.4700567722320557, "learning_rate": 3.4840563327210215e-05, "loss": 2.0911, "step": 35080 }, { "epoch": 0.826385945080307, "grad_norm": 1.9941130876541138, "learning_rate": 3.479346238990156e-05, "loss": 1.8645, "step": 35090 }, { "epoch": 0.8266214497668504, "grad_norm": 2.4892849922180176, "learning_rate": 3.474636145259291e-05, "loss": 1.8108, "step": 35100 }, { "epoch": 0.8268569544533936, "grad_norm": 1.7279706001281738, "learning_rate": 3.469926051528425e-05, "loss": 1.8917, "step": 35110 }, { "epoch": 0.8270924591399369, "grad_norm": 1.975010871887207, "learning_rate": 3.46521595779756e-05, "loss": 1.9208, "step": 35120 }, { "epoch": 0.8273279638264801, "grad_norm": 2.1341896057128906, "learning_rate": 3.4605058640666946e-05, "loss": 2.0969, "step": 35130 }, { "epoch": 0.8275634685130234, "grad_norm": 2.393791437149048, "learning_rate": 3.4557957703358296e-05, "loss": 1.9602, "step": 35140 }, { "epoch": 0.8277989731995666, "grad_norm": 2.1387171745300293, "learning_rate": 3.4510856766049647e-05, "loss": 2.0583, "step": 35150 }, { "epoch": 0.82803447788611, "grad_norm": 1.9722431898117065, "learning_rate": 3.4463755828741e-05, "loss": 2.0649, "step": 35160 }, { "epoch": 0.8282699825726532, "grad_norm": 2.32572340965271, "learning_rate": 3.441665489143234e-05, "loss": 2.1126, "step": 35170 }, { "epoch": 0.8285054872591965, "grad_norm": 2.5280470848083496, "learning_rate": 3.436955395412369e-05, "loss": 2.0072, "step": 35180 }, { "epoch": 0.8287409919457397, "grad_norm": 2.0874745845794678, "learning_rate": 3.4322453016815034e-05, "loss": 2.0661, "step": 35190 }, { "epoch": 0.828976496632283, "grad_norm": 3.3035924434661865, "learning_rate": 3.4275352079506384e-05, "loss": 1.9418, "step": 35200 }, { "epoch": 0.8292120013188262, "grad_norm": 2.1365528106689453, "learning_rate": 3.4228251142197735e-05, "loss": 1.8834, "step": 35210 }, { "epoch": 0.8294475060053695, "grad_norm": 1.7832911014556885, "learning_rate": 3.418115020488908e-05, "loss": 1.9537, "step": 35220 }, { "epoch": 0.8296830106919127, "grad_norm": 2.0132415294647217, "learning_rate": 3.413404926758043e-05, "loss": 2.0083, "step": 35230 }, { "epoch": 0.8299185153784561, "grad_norm": 2.6595003604888916, "learning_rate": 3.408694833027177e-05, "loss": 2.0959, "step": 35240 }, { "epoch": 0.8301540200649993, "grad_norm": 1.921191692352295, "learning_rate": 3.403984739296312e-05, "loss": 1.9271, "step": 35250 }, { "epoch": 0.8303895247515426, "grad_norm": 2.7424440383911133, "learning_rate": 3.3992746455654465e-05, "loss": 2.0904, "step": 35260 }, { "epoch": 0.8306250294380858, "grad_norm": 5.584036827087402, "learning_rate": 3.3945645518345816e-05, "loss": 2.0697, "step": 35270 }, { "epoch": 0.8308605341246291, "grad_norm": 2.842258930206299, "learning_rate": 3.389854458103716e-05, "loss": 1.9869, "step": 35280 }, { "epoch": 0.8310960388111723, "grad_norm": 1.9061213731765747, "learning_rate": 3.3851443643728516e-05, "loss": 2.3207, "step": 35290 }, { "epoch": 0.8313315434977157, "grad_norm": 2.775942087173462, "learning_rate": 3.380434270641986e-05, "loss": 1.9147, "step": 35300 }, { "epoch": 0.8315670481842589, "grad_norm": 3.2270853519439697, "learning_rate": 3.375724176911121e-05, "loss": 2.175, "step": 35310 }, { "epoch": 0.8318025528708022, "grad_norm": 1.8016290664672852, "learning_rate": 3.371014083180255e-05, "loss": 1.8835, "step": 35320 }, { "epoch": 0.8320380575573454, "grad_norm": 2.776650905609131, "learning_rate": 3.3663039894493904e-05, "loss": 1.7118, "step": 35330 }, { "epoch": 0.8322735622438886, "grad_norm": 2.003655433654785, "learning_rate": 3.361593895718525e-05, "loss": 1.9875, "step": 35340 }, { "epoch": 0.8325090669304319, "grad_norm": 2.3793911933898926, "learning_rate": 3.35688380198766e-05, "loss": 2.1075, "step": 35350 }, { "epoch": 0.8327445716169751, "grad_norm": 2.2091314792633057, "learning_rate": 3.352173708256795e-05, "loss": 2.068, "step": 35360 }, { "epoch": 0.8329800763035184, "grad_norm": 2.356450319290161, "learning_rate": 3.347463614525929e-05, "loss": 2.0795, "step": 35370 }, { "epoch": 0.8332155809900617, "grad_norm": 1.9805959463119507, "learning_rate": 3.342753520795064e-05, "loss": 2.0017, "step": 35380 }, { "epoch": 0.833451085676605, "grad_norm": 1.8965378999710083, "learning_rate": 3.3380434270641985e-05, "loss": 2.0095, "step": 35390 }, { "epoch": 0.8336865903631482, "grad_norm": 2.3308956623077393, "learning_rate": 3.3333333333333335e-05, "loss": 1.6953, "step": 35400 }, { "epoch": 0.8339220950496915, "grad_norm": 1.7506887912750244, "learning_rate": 3.328623239602468e-05, "loss": 2.011, "step": 35410 }, { "epoch": 0.8341575997362347, "grad_norm": 1.6992536783218384, "learning_rate": 3.323913145871603e-05, "loss": 1.9904, "step": 35420 }, { "epoch": 0.834393104422778, "grad_norm": 2.503995418548584, "learning_rate": 3.319203052140738e-05, "loss": 2.0786, "step": 35430 }, { "epoch": 0.8346286091093212, "grad_norm": 1.9842063188552856, "learning_rate": 3.314492958409873e-05, "loss": 1.9527, "step": 35440 }, { "epoch": 0.8348641137958646, "grad_norm": 1.7640032768249512, "learning_rate": 3.309782864679007e-05, "loss": 1.8065, "step": 35450 }, { "epoch": 0.8350996184824078, "grad_norm": 2.2963879108428955, "learning_rate": 3.305072770948142e-05, "loss": 1.8249, "step": 35460 }, { "epoch": 0.8353351231689511, "grad_norm": 1.9893380403518677, "learning_rate": 3.3003626772172766e-05, "loss": 2.1082, "step": 35470 }, { "epoch": 0.8355706278554943, "grad_norm": 1.7458503246307373, "learning_rate": 3.2956525834864117e-05, "loss": 1.9985, "step": 35480 }, { "epoch": 0.8358061325420376, "grad_norm": 1.997781753540039, "learning_rate": 3.290942489755546e-05, "loss": 2.1124, "step": 35490 }, { "epoch": 0.8360416372285808, "grad_norm": 3.002406358718872, "learning_rate": 3.286232396024681e-05, "loss": 2.0895, "step": 35500 }, { "epoch": 0.8362771419151241, "grad_norm": 1.5130270719528198, "learning_rate": 3.281522302293816e-05, "loss": 1.8941, "step": 35510 }, { "epoch": 0.8365126466016674, "grad_norm": 1.7578104734420776, "learning_rate": 3.2768122085629504e-05, "loss": 2.0347, "step": 35520 }, { "epoch": 0.8367481512882107, "grad_norm": 2.7366182804107666, "learning_rate": 3.2721021148320854e-05, "loss": 1.8944, "step": 35530 }, { "epoch": 0.8369836559747539, "grad_norm": 2.484748601913452, "learning_rate": 3.26739202110122e-05, "loss": 1.9773, "step": 35540 }, { "epoch": 0.8372191606612972, "grad_norm": 2.0619559288024902, "learning_rate": 3.262681927370355e-05, "loss": 2.0534, "step": 35550 }, { "epoch": 0.8374546653478404, "grad_norm": 2.169790744781494, "learning_rate": 3.257971833639489e-05, "loss": 1.9748, "step": 35560 }, { "epoch": 0.8376901700343837, "grad_norm": 2.2871201038360596, "learning_rate": 3.253261739908625e-05, "loss": 2.1623, "step": 35570 }, { "epoch": 0.8379256747209269, "grad_norm": 2.192305326461792, "learning_rate": 3.248551646177759e-05, "loss": 1.9842, "step": 35580 }, { "epoch": 0.8381611794074703, "grad_norm": 1.6627674102783203, "learning_rate": 3.243841552446894e-05, "loss": 2.0446, "step": 35590 }, { "epoch": 0.8383966840940135, "grad_norm": 1.8032068014144897, "learning_rate": 3.2391314587160286e-05, "loss": 1.9092, "step": 35600 }, { "epoch": 0.8386321887805568, "grad_norm": 2.1932010650634766, "learning_rate": 3.2344213649851636e-05, "loss": 2.0882, "step": 35610 }, { "epoch": 0.8388676934671, "grad_norm": 2.2689177989959717, "learning_rate": 3.229711271254298e-05, "loss": 2.0097, "step": 35620 }, { "epoch": 0.8391031981536432, "grad_norm": 1.843926191329956, "learning_rate": 3.225001177523433e-05, "loss": 2.1152, "step": 35630 }, { "epoch": 0.8393387028401865, "grad_norm": 1.8309720754623413, "learning_rate": 3.220291083792567e-05, "loss": 1.7824, "step": 35640 }, { "epoch": 0.8395742075267297, "grad_norm": 1.8428617715835571, "learning_rate": 3.215580990061702e-05, "loss": 2.0781, "step": 35650 }, { "epoch": 0.839809712213273, "grad_norm": 3.1680748462677, "learning_rate": 3.2108708963308374e-05, "loss": 2.0001, "step": 35660 }, { "epoch": 0.8400452168998163, "grad_norm": 2.44405198097229, "learning_rate": 3.206160802599972e-05, "loss": 1.9798, "step": 35670 }, { "epoch": 0.8402807215863596, "grad_norm": 2.168524980545044, "learning_rate": 3.201450708869107e-05, "loss": 1.7701, "step": 35680 }, { "epoch": 0.8405162262729028, "grad_norm": 2.2438528537750244, "learning_rate": 3.196740615138241e-05, "loss": 2.1856, "step": 35690 }, { "epoch": 0.8407517309594461, "grad_norm": 3.037527084350586, "learning_rate": 3.192030521407376e-05, "loss": 2.1172, "step": 35700 }, { "epoch": 0.8409872356459893, "grad_norm": 2.3657798767089844, "learning_rate": 3.187320427676511e-05, "loss": 2.2592, "step": 35710 }, { "epoch": 0.8412227403325326, "grad_norm": 2.6857681274414062, "learning_rate": 3.182610333945646e-05, "loss": 1.8174, "step": 35720 }, { "epoch": 0.8414582450190758, "grad_norm": 2.249049186706543, "learning_rate": 3.1779002402147805e-05, "loss": 2.0437, "step": 35730 }, { "epoch": 0.8416937497056192, "grad_norm": 2.1826348304748535, "learning_rate": 3.1731901464839155e-05, "loss": 2.1848, "step": 35740 }, { "epoch": 0.8419292543921624, "grad_norm": 2.04245924949646, "learning_rate": 3.16848005275305e-05, "loss": 1.8733, "step": 35750 }, { "epoch": 0.8421647590787057, "grad_norm": 2.0764975547790527, "learning_rate": 3.163769959022185e-05, "loss": 1.9855, "step": 35760 }, { "epoch": 0.8424002637652489, "grad_norm": 2.581373453140259, "learning_rate": 3.159059865291319e-05, "loss": 1.7349, "step": 35770 }, { "epoch": 0.8426357684517922, "grad_norm": 2.6888554096221924, "learning_rate": 3.154349771560454e-05, "loss": 2.0537, "step": 35780 }, { "epoch": 0.8428712731383354, "grad_norm": 1.8182003498077393, "learning_rate": 3.1496396778295886e-05, "loss": 2.1486, "step": 35790 }, { "epoch": 0.8431067778248788, "grad_norm": 2.2902069091796875, "learning_rate": 3.1449295840987236e-05, "loss": 2.056, "step": 35800 }, { "epoch": 0.843342282511422, "grad_norm": 1.7687901258468628, "learning_rate": 3.1402194903678587e-05, "loss": 2.1473, "step": 35810 }, { "epoch": 0.8435777871979653, "grad_norm": 1.9152394533157349, "learning_rate": 3.135509396636993e-05, "loss": 2.008, "step": 35820 }, { "epoch": 0.8438132918845085, "grad_norm": 2.308910846710205, "learning_rate": 3.130799302906128e-05, "loss": 1.9928, "step": 35830 }, { "epoch": 0.8440487965710518, "grad_norm": 1.848122477531433, "learning_rate": 3.1260892091752624e-05, "loss": 2.1119, "step": 35840 }, { "epoch": 0.844284301257595, "grad_norm": 2.760970115661621, "learning_rate": 3.121379115444398e-05, "loss": 1.9572, "step": 35850 }, { "epoch": 0.8445198059441383, "grad_norm": 2.340911865234375, "learning_rate": 3.1166690217135324e-05, "loss": 2.0947, "step": 35860 }, { "epoch": 0.8447553106306815, "grad_norm": 1.8320900201797485, "learning_rate": 3.1119589279826674e-05, "loss": 1.8869, "step": 35870 }, { "epoch": 0.8449908153172249, "grad_norm": 2.2665231227874756, "learning_rate": 3.107248834251802e-05, "loss": 2.0459, "step": 35880 }, { "epoch": 0.8452263200037681, "grad_norm": 2.3234121799468994, "learning_rate": 3.102538740520937e-05, "loss": 1.8317, "step": 35890 }, { "epoch": 0.8454618246903114, "grad_norm": 2.1993918418884277, "learning_rate": 3.097828646790071e-05, "loss": 2.1804, "step": 35900 }, { "epoch": 0.8456973293768546, "grad_norm": 2.1090517044067383, "learning_rate": 3.093118553059206e-05, "loss": 1.921, "step": 35910 }, { "epoch": 0.8459328340633978, "grad_norm": 2.0984067916870117, "learning_rate": 3.0884084593283405e-05, "loss": 1.9881, "step": 35920 }, { "epoch": 0.8461683387499411, "grad_norm": 1.9138286113739014, "learning_rate": 3.0836983655974756e-05, "loss": 1.965, "step": 35930 }, { "epoch": 0.8464038434364843, "grad_norm": 2.4115593433380127, "learning_rate": 3.07898827186661e-05, "loss": 2.0701, "step": 35940 }, { "epoch": 0.8466393481230277, "grad_norm": 2.430145025253296, "learning_rate": 3.074278178135745e-05, "loss": 1.8851, "step": 35950 }, { "epoch": 0.8468748528095709, "grad_norm": 2.130075216293335, "learning_rate": 3.06956808440488e-05, "loss": 2.0154, "step": 35960 }, { "epoch": 0.8471103574961142, "grad_norm": 2.5573925971984863, "learning_rate": 3.064857990674014e-05, "loss": 2.2392, "step": 35970 }, { "epoch": 0.8473458621826574, "grad_norm": 1.901427984237671, "learning_rate": 3.060147896943149e-05, "loss": 1.7748, "step": 35980 }, { "epoch": 0.8475813668692007, "grad_norm": 2.2052485942840576, "learning_rate": 3.0554378032122843e-05, "loss": 1.8585, "step": 35990 }, { "epoch": 0.8478168715557439, "grad_norm": 2.0382041931152344, "learning_rate": 3.050727709481419e-05, "loss": 1.9512, "step": 36000 }, { "epoch": 0.8480523762422872, "grad_norm": 2.326741933822632, "learning_rate": 3.0460176157505537e-05, "loss": 2.1158, "step": 36010 }, { "epoch": 0.8482878809288305, "grad_norm": 2.1949450969696045, "learning_rate": 3.0413075220196884e-05, "loss": 1.7401, "step": 36020 }, { "epoch": 0.8485233856153738, "grad_norm": 1.9501028060913086, "learning_rate": 3.036597428288823e-05, "loss": 2.1736, "step": 36030 }, { "epoch": 0.848758890301917, "grad_norm": 1.9012770652770996, "learning_rate": 3.0318873345579578e-05, "loss": 1.9965, "step": 36040 }, { "epoch": 0.8489943949884603, "grad_norm": 2.4797487258911133, "learning_rate": 3.0271772408270928e-05, "loss": 2.0385, "step": 36050 }, { "epoch": 0.8492298996750035, "grad_norm": 2.465705394744873, "learning_rate": 3.0224671470962275e-05, "loss": 1.9674, "step": 36060 }, { "epoch": 0.8494654043615468, "grad_norm": 3.0579581260681152, "learning_rate": 3.0177570533653622e-05, "loss": 1.9021, "step": 36070 }, { "epoch": 0.84970090904809, "grad_norm": 1.4716150760650635, "learning_rate": 3.013046959634497e-05, "loss": 1.9811, "step": 36080 }, { "epoch": 0.8499364137346334, "grad_norm": 2.094615936279297, "learning_rate": 3.0083368659036315e-05, "loss": 2.1144, "step": 36090 }, { "epoch": 0.8501719184211766, "grad_norm": 2.2217042446136475, "learning_rate": 3.0036267721727662e-05, "loss": 2.1761, "step": 36100 }, { "epoch": 0.8504074231077199, "grad_norm": 2.1508212089538574, "learning_rate": 2.998916678441901e-05, "loss": 2.0179, "step": 36110 }, { "epoch": 0.8506429277942631, "grad_norm": 2.1316213607788086, "learning_rate": 2.9942065847110356e-05, "loss": 2.0658, "step": 36120 }, { "epoch": 0.8508784324808064, "grad_norm": 2.383114814758301, "learning_rate": 2.9894964909801703e-05, "loss": 1.9316, "step": 36130 }, { "epoch": 0.8511139371673496, "grad_norm": 2.28301739692688, "learning_rate": 2.9847863972493056e-05, "loss": 2.1035, "step": 36140 }, { "epoch": 0.8513494418538929, "grad_norm": 2.0573742389678955, "learning_rate": 2.9800763035184403e-05, "loss": 2.0037, "step": 36150 }, { "epoch": 0.8515849465404361, "grad_norm": 2.18030047416687, "learning_rate": 2.975366209787575e-05, "loss": 1.925, "step": 36160 }, { "epoch": 0.8518204512269795, "grad_norm": 2.2723090648651123, "learning_rate": 2.9706561160567097e-05, "loss": 2.1188, "step": 36170 }, { "epoch": 0.8520559559135227, "grad_norm": 2.1768040657043457, "learning_rate": 2.9659460223258444e-05, "loss": 1.9652, "step": 36180 }, { "epoch": 0.8522914606000659, "grad_norm": 2.63767671585083, "learning_rate": 2.961235928594979e-05, "loss": 2.0274, "step": 36190 }, { "epoch": 0.8525269652866092, "grad_norm": 2.380066394805908, "learning_rate": 2.956525834864114e-05, "loss": 1.9089, "step": 36200 }, { "epoch": 0.8527624699731524, "grad_norm": 1.9944638013839722, "learning_rate": 2.9518157411332488e-05, "loss": 1.815, "step": 36210 }, { "epoch": 0.8529979746596957, "grad_norm": 1.8857179880142212, "learning_rate": 2.9471056474023835e-05, "loss": 1.8514, "step": 36220 }, { "epoch": 0.8532334793462389, "grad_norm": 1.8483797311782837, "learning_rate": 2.942395553671518e-05, "loss": 1.9566, "step": 36230 }, { "epoch": 0.8534689840327823, "grad_norm": 2.12707781791687, "learning_rate": 2.937685459940653e-05, "loss": 2.0217, "step": 36240 }, { "epoch": 0.8537044887193255, "grad_norm": 2.1420063972473145, "learning_rate": 2.9329753662097875e-05, "loss": 2.2063, "step": 36250 }, { "epoch": 0.8539399934058688, "grad_norm": 2.08191180229187, "learning_rate": 2.9282652724789222e-05, "loss": 1.9304, "step": 36260 }, { "epoch": 0.854175498092412, "grad_norm": 1.8427835702896118, "learning_rate": 2.923555178748057e-05, "loss": 1.7183, "step": 36270 }, { "epoch": 0.8544110027789553, "grad_norm": 2.656949996948242, "learning_rate": 2.9188450850171923e-05, "loss": 2.0153, "step": 36280 }, { "epoch": 0.8546465074654985, "grad_norm": 1.9904245138168335, "learning_rate": 2.914134991286327e-05, "loss": 1.9786, "step": 36290 }, { "epoch": 0.8548820121520418, "grad_norm": 2.09130859375, "learning_rate": 2.9094248975554616e-05, "loss": 1.9968, "step": 36300 }, { "epoch": 0.8551175168385851, "grad_norm": 2.567244529724121, "learning_rate": 2.9047148038245963e-05, "loss": 1.9054, "step": 36310 }, { "epoch": 0.8553530215251284, "grad_norm": 2.4783241748809814, "learning_rate": 2.900004710093731e-05, "loss": 1.9424, "step": 36320 }, { "epoch": 0.8555885262116716, "grad_norm": 2.3222262859344482, "learning_rate": 2.8952946163628657e-05, "loss": 2.0085, "step": 36330 }, { "epoch": 0.8558240308982149, "grad_norm": 2.0765700340270996, "learning_rate": 2.8905845226320004e-05, "loss": 1.9701, "step": 36340 }, { "epoch": 0.8560595355847581, "grad_norm": 2.526127815246582, "learning_rate": 2.8858744289011354e-05, "loss": 2.0037, "step": 36350 }, { "epoch": 0.8562950402713014, "grad_norm": 2.3003344535827637, "learning_rate": 2.88116433517027e-05, "loss": 2.0852, "step": 36360 }, { "epoch": 0.8565305449578446, "grad_norm": 2.28483247756958, "learning_rate": 2.8764542414394048e-05, "loss": 2.1826, "step": 36370 }, { "epoch": 0.856766049644388, "grad_norm": 2.5736210346221924, "learning_rate": 2.8717441477085395e-05, "loss": 1.9093, "step": 36380 }, { "epoch": 0.8570015543309312, "grad_norm": 2.280210018157959, "learning_rate": 2.867034053977674e-05, "loss": 2.1804, "step": 36390 }, { "epoch": 0.8572370590174745, "grad_norm": 2.255749225616455, "learning_rate": 2.8623239602468088e-05, "loss": 2.0126, "step": 36400 }, { "epoch": 0.8574725637040177, "grad_norm": 2.0187699794769287, "learning_rate": 2.8576138665159435e-05, "loss": 2.2085, "step": 36410 }, { "epoch": 0.857708068390561, "grad_norm": 3.545119285583496, "learning_rate": 2.852903772785079e-05, "loss": 2.1997, "step": 36420 }, { "epoch": 0.8579435730771042, "grad_norm": 2.619332790374756, "learning_rate": 2.8481936790542136e-05, "loss": 2.0218, "step": 36430 }, { "epoch": 0.8581790777636475, "grad_norm": 2.5285348892211914, "learning_rate": 2.8434835853233483e-05, "loss": 1.9183, "step": 36440 }, { "epoch": 0.8584145824501908, "grad_norm": 3.5454587936401367, "learning_rate": 2.838773491592483e-05, "loss": 2.0172, "step": 36450 }, { "epoch": 0.8586500871367341, "grad_norm": 1.9408241510391235, "learning_rate": 2.8340633978616176e-05, "loss": 2.2235, "step": 36460 }, { "epoch": 0.8588855918232773, "grad_norm": 2.6820948123931885, "learning_rate": 2.8293533041307523e-05, "loss": 2.005, "step": 36470 }, { "epoch": 0.8591210965098205, "grad_norm": 1.9332228899002075, "learning_rate": 2.824643210399887e-05, "loss": 1.9547, "step": 36480 }, { "epoch": 0.8593566011963638, "grad_norm": 1.865426778793335, "learning_rate": 2.8199331166690217e-05, "loss": 1.8152, "step": 36490 }, { "epoch": 0.859592105882907, "grad_norm": 1.7702504396438599, "learning_rate": 2.8152230229381567e-05, "loss": 2.0508, "step": 36500 }, { "epoch": 0.8598276105694503, "grad_norm": 1.7681245803833008, "learning_rate": 2.8105129292072914e-05, "loss": 1.9025, "step": 36510 }, { "epoch": 0.8600631152559935, "grad_norm": 1.7548736333847046, "learning_rate": 2.805802835476426e-05, "loss": 1.7362, "step": 36520 }, { "epoch": 0.8602986199425369, "grad_norm": 2.2081425189971924, "learning_rate": 2.8010927417455608e-05, "loss": 2.0412, "step": 36530 }, { "epoch": 0.8605341246290801, "grad_norm": 1.8118314743041992, "learning_rate": 2.7963826480146954e-05, "loss": 1.9552, "step": 36540 }, { "epoch": 0.8607696293156234, "grad_norm": 2.4875271320343018, "learning_rate": 2.79167255428383e-05, "loss": 2.0385, "step": 36550 }, { "epoch": 0.8610051340021666, "grad_norm": 2.542076349258423, "learning_rate": 2.7869624605529655e-05, "loss": 2.1191, "step": 36560 }, { "epoch": 0.8612406386887099, "grad_norm": 2.1214327812194824, "learning_rate": 2.7822523668221002e-05, "loss": 2.0365, "step": 36570 }, { "epoch": 0.8614761433752531, "grad_norm": 4.163021564483643, "learning_rate": 2.777542273091235e-05, "loss": 2.0507, "step": 36580 }, { "epoch": 0.8617116480617965, "grad_norm": 2.633694648742676, "learning_rate": 2.7728321793603696e-05, "loss": 1.9404, "step": 36590 }, { "epoch": 0.8619471527483397, "grad_norm": 2.0306925773620605, "learning_rate": 2.7681220856295042e-05, "loss": 1.8616, "step": 36600 }, { "epoch": 0.862182657434883, "grad_norm": 2.5609326362609863, "learning_rate": 2.763411991898639e-05, "loss": 2.0449, "step": 36610 }, { "epoch": 0.8624181621214262, "grad_norm": 3.2793045043945312, "learning_rate": 2.7587018981677736e-05, "loss": 1.8033, "step": 36620 }, { "epoch": 0.8626536668079695, "grad_norm": 2.455267906188965, "learning_rate": 2.7539918044369083e-05, "loss": 2.0809, "step": 36630 }, { "epoch": 0.8628891714945127, "grad_norm": 2.3325164318084717, "learning_rate": 2.749281710706043e-05, "loss": 2.2098, "step": 36640 }, { "epoch": 0.863124676181056, "grad_norm": 2.911341667175293, "learning_rate": 2.744571616975178e-05, "loss": 1.9998, "step": 36650 }, { "epoch": 0.8633601808675992, "grad_norm": 1.7594044208526611, "learning_rate": 2.7398615232443127e-05, "loss": 1.9491, "step": 36660 }, { "epoch": 0.8635956855541426, "grad_norm": 1.8261598348617554, "learning_rate": 2.7351514295134474e-05, "loss": 1.9535, "step": 36670 }, { "epoch": 0.8638311902406858, "grad_norm": 2.3168840408325195, "learning_rate": 2.730441335782582e-05, "loss": 2.0609, "step": 36680 }, { "epoch": 0.8640666949272291, "grad_norm": NaN, "learning_rate": 2.7257312420517167e-05, "loss": 2.0912, "step": 36690 }, { "epoch": 0.8643021996137723, "grad_norm": 2.733128786087036, "learning_rate": 2.7214921576939385e-05, "loss": 1.8064, "step": 36700 }, { "epoch": 0.8645377043003156, "grad_norm": 2.325484275817871, "learning_rate": 2.7167820639630732e-05, "loss": 2.0417, "step": 36710 }, { "epoch": 0.8647732089868588, "grad_norm": 2.0244216918945312, "learning_rate": 2.712071970232208e-05, "loss": 1.9551, "step": 36720 }, { "epoch": 0.8650087136734022, "grad_norm": 3.063344955444336, "learning_rate": 2.7073618765013426e-05, "loss": 2.0306, "step": 36730 }, { "epoch": 0.8652442183599454, "grad_norm": 2.5917723178863525, "learning_rate": 2.7026517827704773e-05, "loss": 1.9062, "step": 36740 }, { "epoch": 0.8654797230464887, "grad_norm": 1.9023863077163696, "learning_rate": 2.697941689039612e-05, "loss": 2.0226, "step": 36750 }, { "epoch": 0.8657152277330319, "grad_norm": 2.4680817127227783, "learning_rate": 2.6932315953087467e-05, "loss": 2.0635, "step": 36760 }, { "epoch": 0.8659507324195751, "grad_norm": 1.7801569700241089, "learning_rate": 2.6885215015778813e-05, "loss": 1.7529, "step": 36770 }, { "epoch": 0.8661862371061184, "grad_norm": 2.0391175746917725, "learning_rate": 2.683811407847016e-05, "loss": 2.1877, "step": 36780 }, { "epoch": 0.8664217417926616, "grad_norm": 1.8722394704818726, "learning_rate": 2.6791013141161507e-05, "loss": 2.0154, "step": 36790 }, { "epoch": 0.866657246479205, "grad_norm": 1.8816248178482056, "learning_rate": 2.6743912203852854e-05, "loss": 1.9464, "step": 36800 }, { "epoch": 0.8668927511657482, "grad_norm": 2.1142876148223877, "learning_rate": 2.6696811266544208e-05, "loss": 2.108, "step": 36810 }, { "epoch": 0.8671282558522915, "grad_norm": 2.861016273498535, "learning_rate": 2.6649710329235554e-05, "loss": 1.998, "step": 36820 }, { "epoch": 0.8673637605388347, "grad_norm": 2.2118663787841797, "learning_rate": 2.66026093919269e-05, "loss": 2.0853, "step": 36830 }, { "epoch": 0.867599265225378, "grad_norm": 2.2175252437591553, "learning_rate": 2.6555508454618248e-05, "loss": 1.8626, "step": 36840 }, { "epoch": 0.8678347699119212, "grad_norm": 1.7482407093048096, "learning_rate": 2.65084075173096e-05, "loss": 2.0537, "step": 36850 }, { "epoch": 0.8680702745984645, "grad_norm": 1.8986228704452515, "learning_rate": 2.6461306580000945e-05, "loss": 2.0676, "step": 36860 }, { "epoch": 0.8683057792850077, "grad_norm": 3.3696067333221436, "learning_rate": 2.6414205642692292e-05, "loss": 2.1246, "step": 36870 }, { "epoch": 0.8685412839715511, "grad_norm": 2.3474841117858887, "learning_rate": 2.636710470538364e-05, "loss": 2.0152, "step": 36880 }, { "epoch": 0.8687767886580943, "grad_norm": 2.315103769302368, "learning_rate": 2.6320003768074986e-05, "loss": 2.1758, "step": 36890 }, { "epoch": 0.8690122933446376, "grad_norm": 1.727292776107788, "learning_rate": 2.6272902830766333e-05, "loss": 2.0967, "step": 36900 }, { "epoch": 0.8692477980311808, "grad_norm": 2.4542670249938965, "learning_rate": 2.622580189345768e-05, "loss": 1.9784, "step": 36910 }, { "epoch": 0.8694833027177241, "grad_norm": 2.198284149169922, "learning_rate": 2.6178700956149026e-05, "loss": 1.8952, "step": 36920 }, { "epoch": 0.8697188074042673, "grad_norm": 3.0475144386291504, "learning_rate": 2.6131600018840373e-05, "loss": 1.8998, "step": 36930 }, { "epoch": 0.8699543120908106, "grad_norm": 1.985602855682373, "learning_rate": 2.608449908153172e-05, "loss": 1.948, "step": 36940 }, { "epoch": 0.8701898167773539, "grad_norm": 2.757429838180542, "learning_rate": 2.6037398144223067e-05, "loss": 1.9312, "step": 36950 }, { "epoch": 0.8704253214638972, "grad_norm": 2.614267349243164, "learning_rate": 2.599029720691442e-05, "loss": 2.0093, "step": 36960 }, { "epoch": 0.8706608261504404, "grad_norm": 1.770949363708496, "learning_rate": 2.5943196269605767e-05, "loss": 1.9344, "step": 36970 }, { "epoch": 0.8708963308369837, "grad_norm": 2.9338138103485107, "learning_rate": 2.5896095332297114e-05, "loss": 1.9617, "step": 36980 }, { "epoch": 0.8711318355235269, "grad_norm": 2.102163076400757, "learning_rate": 2.584899439498846e-05, "loss": 2.0294, "step": 36990 }, { "epoch": 0.8713673402100702, "grad_norm": 1.8639206886291504, "learning_rate": 2.580189345767981e-05, "loss": 1.9084, "step": 37000 }, { "epoch": 0.8716028448966134, "grad_norm": 2.295996904373169, "learning_rate": 2.5754792520371158e-05, "loss": 2.081, "step": 37010 }, { "epoch": 0.8718383495831568, "grad_norm": 2.4227380752563477, "learning_rate": 2.5707691583062505e-05, "loss": 2.0755, "step": 37020 }, { "epoch": 0.8720738542697, "grad_norm": 2.3559842109680176, "learning_rate": 2.5660590645753852e-05, "loss": 1.8186, "step": 37030 }, { "epoch": 0.8723093589562432, "grad_norm": 2.341525077819824, "learning_rate": 2.56134897084452e-05, "loss": 2.0243, "step": 37040 }, { "epoch": 0.8725448636427865, "grad_norm": 3.58907413482666, "learning_rate": 2.5566388771136546e-05, "loss": 2.1113, "step": 37050 }, { "epoch": 0.8727803683293297, "grad_norm": 1.8478801250457764, "learning_rate": 2.5519287833827893e-05, "loss": 1.8545, "step": 37060 }, { "epoch": 0.873015873015873, "grad_norm": 2.5865366458892822, "learning_rate": 2.547218689651924e-05, "loss": 2.1964, "step": 37070 }, { "epoch": 0.8732513777024162, "grad_norm": 2.055079460144043, "learning_rate": 2.5425085959210586e-05, "loss": 1.9852, "step": 37080 }, { "epoch": 0.8734868823889596, "grad_norm": 1.9942408800125122, "learning_rate": 2.5377985021901933e-05, "loss": 2.0406, "step": 37090 }, { "epoch": 0.8737223870755028, "grad_norm": 2.4995577335357666, "learning_rate": 2.5330884084593287e-05, "loss": 2.0003, "step": 37100 }, { "epoch": 0.8739578917620461, "grad_norm": 2.066697597503662, "learning_rate": 2.5283783147284634e-05, "loss": 1.9404, "step": 37110 }, { "epoch": 0.8741933964485893, "grad_norm": 2.102442741394043, "learning_rate": 2.523668220997598e-05, "loss": 1.9775, "step": 37120 }, { "epoch": 0.8744289011351326, "grad_norm": 2.435946464538574, "learning_rate": 2.5189581272667327e-05, "loss": 2.0967, "step": 37130 }, { "epoch": 0.8746644058216758, "grad_norm": 1.9978781938552856, "learning_rate": 2.5142480335358674e-05, "loss": 2.0675, "step": 37140 }, { "epoch": 0.8748999105082191, "grad_norm": 2.7619597911834717, "learning_rate": 2.5095379398050024e-05, "loss": 2.0668, "step": 37150 }, { "epoch": 0.8751354151947623, "grad_norm": 1.664589285850525, "learning_rate": 2.504827846074137e-05, "loss": 2.1728, "step": 37160 }, { "epoch": 0.8753709198813057, "grad_norm": 2.1075856685638428, "learning_rate": 2.5001177523432718e-05, "loss": 1.7206, "step": 37170 }, { "epoch": 0.8756064245678489, "grad_norm": 2.175349473953247, "learning_rate": 2.4954076586124065e-05, "loss": 1.9852, "step": 37180 }, { "epoch": 0.8758419292543922, "grad_norm": 2.1654629707336426, "learning_rate": 2.4906975648815412e-05, "loss": 1.9823, "step": 37190 }, { "epoch": 0.8760774339409354, "grad_norm": 2.405961036682129, "learning_rate": 2.4859874711506762e-05, "loss": 1.8534, "step": 37200 }, { "epoch": 0.8763129386274787, "grad_norm": 1.9819753170013428, "learning_rate": 2.481277377419811e-05, "loss": 2.1591, "step": 37210 }, { "epoch": 0.8765484433140219, "grad_norm": 2.0572049617767334, "learning_rate": 2.4765672836889456e-05, "loss": 1.9411, "step": 37220 }, { "epoch": 0.8767839480005652, "grad_norm": 2.2193071842193604, "learning_rate": 2.4718571899580803e-05, "loss": 1.9532, "step": 37230 }, { "epoch": 0.8770194526871085, "grad_norm": 2.0980918407440186, "learning_rate": 2.467147096227215e-05, "loss": 1.9071, "step": 37240 }, { "epoch": 0.8772549573736518, "grad_norm": 2.7470831871032715, "learning_rate": 2.4624370024963496e-05, "loss": 2.072, "step": 37250 }, { "epoch": 0.877490462060195, "grad_norm": 1.7608436346054077, "learning_rate": 2.4577269087654843e-05, "loss": 2.0747, "step": 37260 }, { "epoch": 0.8777259667467383, "grad_norm": 1.77220618724823, "learning_rate": 2.4530168150346193e-05, "loss": 1.7371, "step": 37270 }, { "epoch": 0.8779614714332815, "grad_norm": 2.003990411758423, "learning_rate": 2.448306721303754e-05, "loss": 2.0556, "step": 37280 }, { "epoch": 0.8781969761198248, "grad_norm": 2.2261626720428467, "learning_rate": 2.4435966275728887e-05, "loss": 2.1423, "step": 37290 }, { "epoch": 0.878432480806368, "grad_norm": 2.8093926906585693, "learning_rate": 2.4388865338420237e-05, "loss": 2.0423, "step": 37300 }, { "epoch": 0.8786679854929114, "grad_norm": 2.5685336589813232, "learning_rate": 2.4341764401111584e-05, "loss": 2.134, "step": 37310 }, { "epoch": 0.8789034901794546, "grad_norm": 2.4057536125183105, "learning_rate": 2.429466346380293e-05, "loss": 2.2208, "step": 37320 }, { "epoch": 0.8791389948659978, "grad_norm": 1.4305607080459595, "learning_rate": 2.4247562526494278e-05, "loss": 2.0589, "step": 37330 }, { "epoch": 0.8793744995525411, "grad_norm": 2.37786865234375, "learning_rate": 2.4200461589185628e-05, "loss": 1.9452, "step": 37340 }, { "epoch": 0.8796100042390843, "grad_norm": 1.7371529340744019, "learning_rate": 2.4153360651876975e-05, "loss": 1.9545, "step": 37350 }, { "epoch": 0.8798455089256276, "grad_norm": 2.432138681411743, "learning_rate": 2.4106259714568322e-05, "loss": 1.9643, "step": 37360 }, { "epoch": 0.8800810136121708, "grad_norm": 2.241659641265869, "learning_rate": 2.405915877725967e-05, "loss": 1.946, "step": 37370 }, { "epoch": 0.8803165182987142, "grad_norm": 2.5859363079071045, "learning_rate": 2.4012057839951016e-05, "loss": 2.0878, "step": 37380 }, { "epoch": 0.8805520229852574, "grad_norm": 2.5846035480499268, "learning_rate": 2.3964956902642363e-05, "loss": 1.9119, "step": 37390 }, { "epoch": 0.8807875276718007, "grad_norm": 3.664036512374878, "learning_rate": 2.391785596533371e-05, "loss": 1.9178, "step": 37400 }, { "epoch": 0.8810230323583439, "grad_norm": 2.499345064163208, "learning_rate": 2.3870755028025056e-05, "loss": 2.0054, "step": 37410 }, { "epoch": 0.8812585370448872, "grad_norm": 1.7014585733413696, "learning_rate": 2.3823654090716406e-05, "loss": 2.0352, "step": 37420 }, { "epoch": 0.8814940417314304, "grad_norm": 2.9908242225646973, "learning_rate": 2.3776553153407753e-05, "loss": 1.9084, "step": 37430 }, { "epoch": 0.8817295464179737, "grad_norm": 2.379347324371338, "learning_rate": 2.37294522160991e-05, "loss": 1.9052, "step": 37440 }, { "epoch": 0.881965051104517, "grad_norm": 1.8825994729995728, "learning_rate": 2.368235127879045e-05, "loss": 2.0933, "step": 37450 }, { "epoch": 0.8822005557910603, "grad_norm": 2.112344980239868, "learning_rate": 2.3635250341481797e-05, "loss": 1.945, "step": 37460 }, { "epoch": 0.8824360604776035, "grad_norm": 2.0043137073516846, "learning_rate": 2.3588149404173144e-05, "loss": 2.0133, "step": 37470 }, { "epoch": 0.8826715651641468, "grad_norm": 2.380242347717285, "learning_rate": 2.354104846686449e-05, "loss": 2.0031, "step": 37480 }, { "epoch": 0.88290706985069, "grad_norm": 2.028883457183838, "learning_rate": 2.349394752955584e-05, "loss": 1.8129, "step": 37490 }, { "epoch": 0.8831425745372333, "grad_norm": 2.6352202892303467, "learning_rate": 2.3446846592247188e-05, "loss": 2.1235, "step": 37500 }, { "epoch": 0.8833780792237765, "grad_norm": 2.017411470413208, "learning_rate": 2.3399745654938535e-05, "loss": 1.8616, "step": 37510 }, { "epoch": 0.8836135839103199, "grad_norm": 2.081916093826294, "learning_rate": 2.3352644717629882e-05, "loss": 1.8437, "step": 37520 }, { "epoch": 0.8838490885968631, "grad_norm": 2.4663143157958984, "learning_rate": 2.330554378032123e-05, "loss": 1.7767, "step": 37530 }, { "epoch": 0.8840845932834064, "grad_norm": 2.4353020191192627, "learning_rate": 2.3258442843012576e-05, "loss": 2.2355, "step": 37540 }, { "epoch": 0.8843200979699496, "grad_norm": 2.1264777183532715, "learning_rate": 2.3211341905703922e-05, "loss": 2.0394, "step": 37550 }, { "epoch": 0.8845556026564929, "grad_norm": 2.2275872230529785, "learning_rate": 2.3164240968395273e-05, "loss": 2.2761, "step": 37560 }, { "epoch": 0.8847911073430361, "grad_norm": 4.1986284255981445, "learning_rate": 2.311714003108662e-05, "loss": 1.9435, "step": 37570 }, { "epoch": 0.8850266120295794, "grad_norm": 2.330110549926758, "learning_rate": 2.3070039093777966e-05, "loss": 1.7825, "step": 37580 }, { "epoch": 0.8852621167161226, "grad_norm": 2.903902769088745, "learning_rate": 2.3022938156469313e-05, "loss": 2.0841, "step": 37590 }, { "epoch": 0.8854976214026659, "grad_norm": 2.5278985500335693, "learning_rate": 2.2975837219160663e-05, "loss": 2.2633, "step": 37600 }, { "epoch": 0.8857331260892092, "grad_norm": 2.081477165222168, "learning_rate": 2.292873628185201e-05, "loss": 2.0897, "step": 37610 }, { "epoch": 0.8859686307757524, "grad_norm": 1.9763598442077637, "learning_rate": 2.2881635344543357e-05, "loss": 1.9917, "step": 37620 }, { "epoch": 0.8862041354622957, "grad_norm": 2.136435031890869, "learning_rate": 2.2834534407234707e-05, "loss": 1.8923, "step": 37630 }, { "epoch": 0.8864396401488389, "grad_norm": 2.021329402923584, "learning_rate": 2.2787433469926054e-05, "loss": 2.1512, "step": 37640 }, { "epoch": 0.8866751448353822, "grad_norm": 2.108199119567871, "learning_rate": 2.27403325326174e-05, "loss": 2.1143, "step": 37650 }, { "epoch": 0.8869106495219254, "grad_norm": 3.078599691390991, "learning_rate": 2.2693231595308748e-05, "loss": 2.0842, "step": 37660 }, { "epoch": 0.8871461542084688, "grad_norm": 2.235524892807007, "learning_rate": 2.2646130658000095e-05, "loss": 1.945, "step": 37670 }, { "epoch": 0.887381658895012, "grad_norm": 2.1809022426605225, "learning_rate": 2.259902972069144e-05, "loss": 1.9934, "step": 37680 }, { "epoch": 0.8876171635815553, "grad_norm": 2.0931341648101807, "learning_rate": 2.255192878338279e-05, "loss": 2.0573, "step": 37690 }, { "epoch": 0.8878526682680985, "grad_norm": 2.221452236175537, "learning_rate": 2.250482784607414e-05, "loss": 1.8676, "step": 37700 }, { "epoch": 0.8880881729546418, "grad_norm": 2.1494803428649902, "learning_rate": 2.2457726908765486e-05, "loss": 2.0305, "step": 37710 }, { "epoch": 0.888323677641185, "grad_norm": 2.0528335571289062, "learning_rate": 2.2410625971456832e-05, "loss": 1.8773, "step": 37720 }, { "epoch": 0.8885591823277283, "grad_norm": 2.3743772506713867, "learning_rate": 2.236352503414818e-05, "loss": 2.1906, "step": 37730 }, { "epoch": 0.8887946870142716, "grad_norm": 2.228240966796875, "learning_rate": 2.2316424096839526e-05, "loss": 1.9563, "step": 37740 }, { "epoch": 0.8890301917008149, "grad_norm": 2.1875791549682617, "learning_rate": 2.2269323159530876e-05, "loss": 1.8761, "step": 37750 }, { "epoch": 0.8892656963873581, "grad_norm": 3.2131400108337402, "learning_rate": 2.2222222222222223e-05, "loss": 1.999, "step": 37760 }, { "epoch": 0.8895012010739014, "grad_norm": 2.3823819160461426, "learning_rate": 2.2175121284913574e-05, "loss": 1.9455, "step": 37770 }, { "epoch": 0.8897367057604446, "grad_norm": 2.280933141708374, "learning_rate": 2.212802034760492e-05, "loss": 1.9539, "step": 37780 }, { "epoch": 0.8899722104469879, "grad_norm": 2.0002353191375732, "learning_rate": 2.2080919410296267e-05, "loss": 1.891, "step": 37790 }, { "epoch": 0.8902077151335311, "grad_norm": 1.8115776777267456, "learning_rate": 2.2033818472987614e-05, "loss": 2.0144, "step": 37800 }, { "epoch": 0.8904432198200745, "grad_norm": 2.2819020748138428, "learning_rate": 2.198671753567896e-05, "loss": 1.8285, "step": 37810 }, { "epoch": 0.8906787245066177, "grad_norm": 2.1518003940582275, "learning_rate": 2.1939616598370308e-05, "loss": 1.9067, "step": 37820 }, { "epoch": 0.890914229193161, "grad_norm": 2.508162498474121, "learning_rate": 2.1892515661061655e-05, "loss": 2.061, "step": 37830 }, { "epoch": 0.8911497338797042, "grad_norm": 2.1272640228271484, "learning_rate": 2.1845414723753005e-05, "loss": 1.9815, "step": 37840 }, { "epoch": 0.8913852385662475, "grad_norm": 3.0076818466186523, "learning_rate": 2.1798313786444352e-05, "loss": 2.0412, "step": 37850 }, { "epoch": 0.8916207432527907, "grad_norm": 2.511852264404297, "learning_rate": 2.17512128491357e-05, "loss": 2.0381, "step": 37860 }, { "epoch": 0.891856247939334, "grad_norm": 2.035937547683716, "learning_rate": 2.1704111911827045e-05, "loss": 1.9446, "step": 37870 }, { "epoch": 0.8920917526258773, "grad_norm": 1.9787641763687134, "learning_rate": 2.1657010974518392e-05, "loss": 1.9567, "step": 37880 }, { "epoch": 0.8923272573124205, "grad_norm": 2.1247246265411377, "learning_rate": 2.160991003720974e-05, "loss": 2.0504, "step": 37890 }, { "epoch": 0.8925627619989638, "grad_norm": 2.798614740371704, "learning_rate": 2.156280909990109e-05, "loss": 1.9327, "step": 37900 }, { "epoch": 0.892798266685507, "grad_norm": 2.846179246902466, "learning_rate": 2.1515708162592436e-05, "loss": 2.185, "step": 37910 }, { "epoch": 0.8930337713720503, "grad_norm": 2.097317934036255, "learning_rate": 2.1468607225283787e-05, "loss": 1.8594, "step": 37920 }, { "epoch": 0.8932692760585935, "grad_norm": 1.8242942094802856, "learning_rate": 2.1421506287975133e-05, "loss": 2.1531, "step": 37930 }, { "epoch": 0.8935047807451368, "grad_norm": 2.1491048336029053, "learning_rate": 2.137440535066648e-05, "loss": 1.8671, "step": 37940 }, { "epoch": 0.89374028543168, "grad_norm": 2.0003557205200195, "learning_rate": 2.1327304413357827e-05, "loss": 2.0186, "step": 37950 }, { "epoch": 0.8939757901182234, "grad_norm": 2.0959033966064453, "learning_rate": 2.1280203476049174e-05, "loss": 2.1963, "step": 37960 }, { "epoch": 0.8942112948047666, "grad_norm": 1.9621938467025757, "learning_rate": 2.123310253874052e-05, "loss": 2.3335, "step": 37970 }, { "epoch": 0.8944467994913099, "grad_norm": 2.0647966861724854, "learning_rate": 2.118600160143187e-05, "loss": 2.1695, "step": 37980 }, { "epoch": 0.8946823041778531, "grad_norm": 1.7961957454681396, "learning_rate": 2.1138900664123218e-05, "loss": 1.8972, "step": 37990 }, { "epoch": 0.8949178088643964, "grad_norm": 2.169931411743164, "learning_rate": 2.1091799726814565e-05, "loss": 2.2119, "step": 38000 }, { "epoch": 0.8951533135509396, "grad_norm": 2.0266969203948975, "learning_rate": 2.104469878950591e-05, "loss": 1.8381, "step": 38010 }, { "epoch": 0.895388818237483, "grad_norm": 1.9402709007263184, "learning_rate": 2.099759785219726e-05, "loss": 2.0565, "step": 38020 }, { "epoch": 0.8956243229240262, "grad_norm": 1.9579286575317383, "learning_rate": 2.0950496914888605e-05, "loss": 1.8636, "step": 38030 }, { "epoch": 0.8958598276105695, "grad_norm": 2.2410125732421875, "learning_rate": 2.0903395977579952e-05, "loss": 2.0196, "step": 38040 }, { "epoch": 0.8960953322971127, "grad_norm": 2.3416740894317627, "learning_rate": 2.0856295040271302e-05, "loss": 1.8177, "step": 38050 }, { "epoch": 0.896330836983656, "grad_norm": 1.8137468099594116, "learning_rate": 2.080919410296265e-05, "loss": 1.9998, "step": 38060 }, { "epoch": 0.8965663416701992, "grad_norm": 2.1465566158294678, "learning_rate": 2.0762093165654e-05, "loss": 1.9154, "step": 38070 }, { "epoch": 0.8968018463567425, "grad_norm": 2.6183629035949707, "learning_rate": 2.0714992228345346e-05, "loss": 2.0203, "step": 38080 }, { "epoch": 0.8970373510432857, "grad_norm": 3.3519785404205322, "learning_rate": 2.0667891291036693e-05, "loss": 2.0246, "step": 38090 }, { "epoch": 0.8972728557298291, "grad_norm": 2.2263829708099365, "learning_rate": 2.062079035372804e-05, "loss": 2.158, "step": 38100 }, { "epoch": 0.8975083604163723, "grad_norm": 2.257312536239624, "learning_rate": 2.0573689416419387e-05, "loss": 1.8202, "step": 38110 }, { "epoch": 0.8977438651029156, "grad_norm": 3.83290696144104, "learning_rate": 2.0526588479110737e-05, "loss": 2.1171, "step": 38120 }, { "epoch": 0.8979793697894588, "grad_norm": 1.9540990591049194, "learning_rate": 2.0479487541802084e-05, "loss": 2.0101, "step": 38130 }, { "epoch": 0.8982148744760021, "grad_norm": 2.0866284370422363, "learning_rate": 2.043238660449343e-05, "loss": 1.8839, "step": 38140 }, { "epoch": 0.8984503791625453, "grad_norm": 1.6635223627090454, "learning_rate": 2.0385285667184778e-05, "loss": 2.076, "step": 38150 }, { "epoch": 0.8986858838490887, "grad_norm": 2.1118903160095215, "learning_rate": 2.0338184729876125e-05, "loss": 2.0658, "step": 38160 }, { "epoch": 0.8989213885356319, "grad_norm": 1.8942980766296387, "learning_rate": 2.029108379256747e-05, "loss": 2.0003, "step": 38170 }, { "epoch": 0.8991568932221751, "grad_norm": 2.1378395557403564, "learning_rate": 2.024398285525882e-05, "loss": 2.0265, "step": 38180 }, { "epoch": 0.8993923979087184, "grad_norm": 2.5827856063842773, "learning_rate": 2.019688191795017e-05, "loss": 1.9765, "step": 38190 }, { "epoch": 0.8996279025952616, "grad_norm": 2.509906530380249, "learning_rate": 2.0149780980641515e-05, "loss": 1.8868, "step": 38200 }, { "epoch": 0.8998634072818049, "grad_norm": 2.7832489013671875, "learning_rate": 2.0102680043332862e-05, "loss": 1.9318, "step": 38210 }, { "epoch": 0.9000989119683481, "grad_norm": 2.0925934314727783, "learning_rate": 2.0055579106024213e-05, "loss": 1.7913, "step": 38220 }, { "epoch": 0.9003344166548914, "grad_norm": 2.3135039806365967, "learning_rate": 2.000847816871556e-05, "loss": 2.1221, "step": 38230 }, { "epoch": 0.9005699213414347, "grad_norm": 2.136977195739746, "learning_rate": 1.9961377231406906e-05, "loss": 2.0687, "step": 38240 }, { "epoch": 0.900805426027978, "grad_norm": 2.6171984672546387, "learning_rate": 1.9914276294098253e-05, "loss": 1.9261, "step": 38250 }, { "epoch": 0.9010409307145212, "grad_norm": 3.6002085208892822, "learning_rate": 1.9867175356789603e-05, "loss": 2.0192, "step": 38260 }, { "epoch": 0.9012764354010645, "grad_norm": 1.5538113117218018, "learning_rate": 1.982007441948095e-05, "loss": 2.016, "step": 38270 }, { "epoch": 0.9015119400876077, "grad_norm": 2.0241286754608154, "learning_rate": 1.9772973482172297e-05, "loss": 1.8286, "step": 38280 }, { "epoch": 0.901747444774151, "grad_norm": 1.9949616193771362, "learning_rate": 1.9725872544863644e-05, "loss": 1.8788, "step": 38290 }, { "epoch": 0.9019829494606942, "grad_norm": 2.359792947769165, "learning_rate": 1.967877160755499e-05, "loss": 1.8023, "step": 38300 }, { "epoch": 0.9022184541472376, "grad_norm": 2.121299982070923, "learning_rate": 1.9631670670246338e-05, "loss": 2.0192, "step": 38310 }, { "epoch": 0.9024539588337808, "grad_norm": 2.227768659591675, "learning_rate": 1.9584569732937684e-05, "loss": 2.0309, "step": 38320 }, { "epoch": 0.9026894635203241, "grad_norm": 2.0290210247039795, "learning_rate": 1.9537468795629035e-05, "loss": 2.1192, "step": 38330 }, { "epoch": 0.9029249682068673, "grad_norm": 2.3042876720428467, "learning_rate": 1.949036785832038e-05, "loss": 2.1374, "step": 38340 }, { "epoch": 0.9031604728934106, "grad_norm": 2.5515758991241455, "learning_rate": 1.944326692101173e-05, "loss": 2.2315, "step": 38350 }, { "epoch": 0.9033959775799538, "grad_norm": 2.387439012527466, "learning_rate": 1.9396165983703075e-05, "loss": 2.1361, "step": 38360 }, { "epoch": 0.9036314822664971, "grad_norm": 1.8408795595169067, "learning_rate": 1.9349065046394426e-05, "loss": 1.9147, "step": 38370 }, { "epoch": 0.9038669869530404, "grad_norm": 1.412001132965088, "learning_rate": 1.9301964109085772e-05, "loss": 1.9571, "step": 38380 }, { "epoch": 0.9041024916395837, "grad_norm": 1.8655277490615845, "learning_rate": 1.925486317177712e-05, "loss": 1.9687, "step": 38390 }, { "epoch": 0.9043379963261269, "grad_norm": 2.4135653972625732, "learning_rate": 1.9207762234468466e-05, "loss": 2.0018, "step": 38400 }, { "epoch": 0.9045735010126702, "grad_norm": 2.3086419105529785, "learning_rate": 1.9160661297159816e-05, "loss": 2.178, "step": 38410 }, { "epoch": 0.9048090056992134, "grad_norm": 1.9893290996551514, "learning_rate": 1.9113560359851163e-05, "loss": 2.228, "step": 38420 }, { "epoch": 0.9050445103857567, "grad_norm": 2.6201305389404297, "learning_rate": 1.906645942254251e-05, "loss": 2.059, "step": 38430 }, { "epoch": 0.9052800150722999, "grad_norm": 2.3512847423553467, "learning_rate": 1.9019358485233857e-05, "loss": 2.0451, "step": 38440 }, { "epoch": 0.9055155197588431, "grad_norm": 2.233941078186035, "learning_rate": 1.8972257547925204e-05, "loss": 1.9015, "step": 38450 }, { "epoch": 0.9057510244453865, "grad_norm": 3.108917713165283, "learning_rate": 1.892515661061655e-05, "loss": 1.979, "step": 38460 }, { "epoch": 0.9059865291319297, "grad_norm": 2.284076452255249, "learning_rate": 1.8878055673307898e-05, "loss": 1.9659, "step": 38470 }, { "epoch": 0.906222033818473, "grad_norm": 2.1886470317840576, "learning_rate": 1.8830954735999248e-05, "loss": 2.0916, "step": 38480 }, { "epoch": 0.9064575385050162, "grad_norm": 1.983109712600708, "learning_rate": 1.8783853798690595e-05, "loss": 2.0744, "step": 38490 }, { "epoch": 0.9066930431915595, "grad_norm": 1.651294231414795, "learning_rate": 1.873675286138194e-05, "loss": 1.8051, "step": 38500 }, { "epoch": 0.9069285478781027, "grad_norm": 2.303051710128784, "learning_rate": 1.868965192407329e-05, "loss": 1.7336, "step": 38510 }, { "epoch": 0.907164052564646, "grad_norm": 1.448872447013855, "learning_rate": 1.864255098676464e-05, "loss": 1.9543, "step": 38520 }, { "epoch": 0.9073995572511893, "grad_norm": 2.634509563446045, "learning_rate": 1.8595450049455985e-05, "loss": 2.0457, "step": 38530 }, { "epoch": 0.9076350619377326, "grad_norm": 2.3734359741210938, "learning_rate": 1.8548349112147332e-05, "loss": 2.0179, "step": 38540 }, { "epoch": 0.9078705666242758, "grad_norm": 2.052739381790161, "learning_rate": 1.8501248174838683e-05, "loss": 2.1796, "step": 38550 }, { "epoch": 0.9081060713108191, "grad_norm": 2.4247121810913086, "learning_rate": 1.845414723753003e-05, "loss": 2.0285, "step": 38560 }, { "epoch": 0.9083415759973623, "grad_norm": 2.3661036491394043, "learning_rate": 1.8407046300221376e-05, "loss": 2.1629, "step": 38570 }, { "epoch": 0.9085770806839056, "grad_norm": 1.7307307720184326, "learning_rate": 1.8359945362912723e-05, "loss": 2.0182, "step": 38580 }, { "epoch": 0.9088125853704488, "grad_norm": 2.318981885910034, "learning_rate": 1.831284442560407e-05, "loss": 2.031, "step": 38590 }, { "epoch": 0.9090480900569922, "grad_norm": 2.0078811645507812, "learning_rate": 1.8265743488295417e-05, "loss": 1.8833, "step": 38600 }, { "epoch": 0.9092835947435354, "grad_norm": 1.9469083547592163, "learning_rate": 1.8218642550986764e-05, "loss": 2.0904, "step": 38610 }, { "epoch": 0.9095190994300787, "grad_norm": 2.0202481746673584, "learning_rate": 1.8171541613678114e-05, "loss": 1.8596, "step": 38620 }, { "epoch": 0.9097546041166219, "grad_norm": 2.1429693698883057, "learning_rate": 1.812444067636946e-05, "loss": 1.774, "step": 38630 }, { "epoch": 0.9099901088031652, "grad_norm": 2.0817933082580566, "learning_rate": 1.8077339739060808e-05, "loss": 2.0136, "step": 38640 }, { "epoch": 0.9102256134897084, "grad_norm": 2.4810056686401367, "learning_rate": 1.8030238801752154e-05, "loss": 2.0795, "step": 38650 }, { "epoch": 0.9104611181762517, "grad_norm": 2.75999116897583, "learning_rate": 1.79831378644435e-05, "loss": 2.0032, "step": 38660 }, { "epoch": 0.910696622862795, "grad_norm": 2.1488144397735596, "learning_rate": 1.793603692713485e-05, "loss": 2.3905, "step": 38670 }, { "epoch": 0.9109321275493383, "grad_norm": 2.028859853744507, "learning_rate": 1.78889359898262e-05, "loss": 2.0345, "step": 38680 }, { "epoch": 0.9111676322358815, "grad_norm": 1.8787860870361328, "learning_rate": 1.784183505251755e-05, "loss": 1.9495, "step": 38690 }, { "epoch": 0.9114031369224248, "grad_norm": 2.0410072803497314, "learning_rate": 1.7794734115208896e-05, "loss": 2.0186, "step": 38700 }, { "epoch": 0.911638641608968, "grad_norm": 2.9783804416656494, "learning_rate": 1.7747633177900242e-05, "loss": 1.8578, "step": 38710 }, { "epoch": 0.9118741462955113, "grad_norm": 2.104757785797119, "learning_rate": 1.770053224059159e-05, "loss": 1.8201, "step": 38720 }, { "epoch": 0.9121096509820545, "grad_norm": 1.9207528829574585, "learning_rate": 1.7653431303282936e-05, "loss": 1.8914, "step": 38730 }, { "epoch": 0.9123451556685978, "grad_norm": 2.920927047729492, "learning_rate": 1.7606330365974283e-05, "loss": 1.9848, "step": 38740 }, { "epoch": 0.9125806603551411, "grad_norm": 2.510042190551758, "learning_rate": 1.755922942866563e-05, "loss": 2.1576, "step": 38750 }, { "epoch": 0.9128161650416843, "grad_norm": 2.3450679779052734, "learning_rate": 1.751212849135698e-05, "loss": 2.2116, "step": 38760 }, { "epoch": 0.9130516697282276, "grad_norm": 1.95484459400177, "learning_rate": 1.7465027554048327e-05, "loss": 1.9605, "step": 38770 }, { "epoch": 0.9132871744147708, "grad_norm": 2.005983352661133, "learning_rate": 1.7417926616739674e-05, "loss": 2.0368, "step": 38780 }, { "epoch": 0.9135226791013141, "grad_norm": 2.1589179039001465, "learning_rate": 1.737082567943102e-05, "loss": 1.906, "step": 38790 }, { "epoch": 0.9137581837878573, "grad_norm": 1.929579257965088, "learning_rate": 1.7323724742122367e-05, "loss": 2.0555, "step": 38800 }, { "epoch": 0.9139936884744007, "grad_norm": 2.2730865478515625, "learning_rate": 1.7276623804813714e-05, "loss": 2.1307, "step": 38810 }, { "epoch": 0.9142291931609439, "grad_norm": 1.4058763980865479, "learning_rate": 1.7229522867505065e-05, "loss": 1.6729, "step": 38820 }, { "epoch": 0.9144646978474872, "grad_norm": 2.0500447750091553, "learning_rate": 1.718242193019641e-05, "loss": 1.8054, "step": 38830 }, { "epoch": 0.9147002025340304, "grad_norm": 2.374552011489868, "learning_rate": 1.713532099288776e-05, "loss": 1.8979, "step": 38840 }, { "epoch": 0.9149357072205737, "grad_norm": 2.021498441696167, "learning_rate": 1.708822005557911e-05, "loss": 2.1172, "step": 38850 }, { "epoch": 0.9151712119071169, "grad_norm": 2.2136051654815674, "learning_rate": 1.7041119118270455e-05, "loss": 2.0001, "step": 38860 }, { "epoch": 0.9154067165936602, "grad_norm": 2.6992013454437256, "learning_rate": 1.6994018180961802e-05, "loss": 1.8245, "step": 38870 }, { "epoch": 0.9156422212802034, "grad_norm": 2.2743868827819824, "learning_rate": 1.694691724365315e-05, "loss": 1.8511, "step": 38880 }, { "epoch": 0.9158777259667468, "grad_norm": 2.4155523777008057, "learning_rate": 1.6899816306344496e-05, "loss": 1.9802, "step": 38890 }, { "epoch": 0.91611323065329, "grad_norm": 1.777964472770691, "learning_rate": 1.6852715369035846e-05, "loss": 2.0244, "step": 38900 }, { "epoch": 0.9163487353398333, "grad_norm": 2.0305655002593994, "learning_rate": 1.6805614431727193e-05, "loss": 1.9335, "step": 38910 }, { "epoch": 0.9165842400263765, "grad_norm": 2.5534350872039795, "learning_rate": 1.675851349441854e-05, "loss": 2.0425, "step": 38920 }, { "epoch": 0.9168197447129198, "grad_norm": 1.9499086141586304, "learning_rate": 1.6711412557109887e-05, "loss": 2.0643, "step": 38930 }, { "epoch": 0.917055249399463, "grad_norm": 2.4477667808532715, "learning_rate": 1.6664311619801234e-05, "loss": 1.8278, "step": 38940 }, { "epoch": 0.9172907540860064, "grad_norm": 2.7401411533355713, "learning_rate": 1.661721068249258e-05, "loss": 2.1896, "step": 38950 }, { "epoch": 0.9175262587725496, "grad_norm": 2.0226340293884277, "learning_rate": 1.6570109745183927e-05, "loss": 1.9066, "step": 38960 }, { "epoch": 0.9177617634590929, "grad_norm": 2.22379469871521, "learning_rate": 1.6523008807875278e-05, "loss": 2.0999, "step": 38970 }, { "epoch": 0.9179972681456361, "grad_norm": 2.1687543392181396, "learning_rate": 1.6475907870566624e-05, "loss": 2.0633, "step": 38980 }, { "epoch": 0.9182327728321794, "grad_norm": 3.160351514816284, "learning_rate": 1.6428806933257975e-05, "loss": 2.1236, "step": 38990 }, { "epoch": 0.9184682775187226, "grad_norm": 2.306687355041504, "learning_rate": 1.638170599594932e-05, "loss": 2.1413, "step": 39000 }, { "epoch": 0.9187037822052659, "grad_norm": 2.567837953567505, "learning_rate": 1.633460505864067e-05, "loss": 1.8291, "step": 39010 }, { "epoch": 0.9189392868918091, "grad_norm": 2.327446937561035, "learning_rate": 1.6287504121332015e-05, "loss": 2.0234, "step": 39020 }, { "epoch": 0.9191747915783524, "grad_norm": 2.052375555038452, "learning_rate": 1.6240403184023362e-05, "loss": 1.9689, "step": 39030 }, { "epoch": 0.9194102962648957, "grad_norm": 1.6674343347549438, "learning_rate": 1.6193302246714712e-05, "loss": 1.9624, "step": 39040 }, { "epoch": 0.9196458009514389, "grad_norm": 2.403968572616577, "learning_rate": 1.614620130940606e-05, "loss": 2.0186, "step": 39050 }, { "epoch": 0.9198813056379822, "grad_norm": 2.198310613632202, "learning_rate": 1.6099100372097406e-05, "loss": 1.9116, "step": 39060 }, { "epoch": 0.9201168103245254, "grad_norm": 1.7685959339141846, "learning_rate": 1.6051999434788753e-05, "loss": 1.7279, "step": 39070 }, { "epoch": 0.9203523150110687, "grad_norm": 2.2193026542663574, "learning_rate": 1.60048984974801e-05, "loss": 2.064, "step": 39080 }, { "epoch": 0.9205878196976119, "grad_norm": 2.313854455947876, "learning_rate": 1.5957797560171447e-05, "loss": 1.9564, "step": 39090 }, { "epoch": 0.9208233243841553, "grad_norm": 3.075056314468384, "learning_rate": 1.5910696622862793e-05, "loss": 1.8532, "step": 39100 }, { "epoch": 0.9210588290706985, "grad_norm": 2.181581974029541, "learning_rate": 1.5863595685554144e-05, "loss": 2.0414, "step": 39110 }, { "epoch": 0.9212943337572418, "grad_norm": 1.9923847913742065, "learning_rate": 1.581649474824549e-05, "loss": 1.8529, "step": 39120 }, { "epoch": 0.921529838443785, "grad_norm": 2.1557722091674805, "learning_rate": 1.5769393810936837e-05, "loss": 1.8607, "step": 39130 }, { "epoch": 0.9217653431303283, "grad_norm": 1.7339023351669312, "learning_rate": 1.5722292873628188e-05, "loss": 1.9141, "step": 39140 }, { "epoch": 0.9220008478168715, "grad_norm": 1.983236312866211, "learning_rate": 1.5675191936319535e-05, "loss": 2.1342, "step": 39150 }, { "epoch": 0.9222363525034148, "grad_norm": 2.0628926753997803, "learning_rate": 1.562809099901088e-05, "loss": 2.0345, "step": 39160 }, { "epoch": 0.922471857189958, "grad_norm": 2.1119203567504883, "learning_rate": 1.5580990061702228e-05, "loss": 2.033, "step": 39170 }, { "epoch": 0.9227073618765014, "grad_norm": 2.4918603897094727, "learning_rate": 1.553388912439358e-05, "loss": 1.9512, "step": 39180 }, { "epoch": 0.9229428665630446, "grad_norm": 1.8734686374664307, "learning_rate": 1.5486788187084925e-05, "loss": 1.8077, "step": 39190 }, { "epoch": 0.9231783712495879, "grad_norm": 1.4692710638046265, "learning_rate": 1.5439687249776272e-05, "loss": 2.012, "step": 39200 }, { "epoch": 0.9234138759361311, "grad_norm": 1.7556036710739136, "learning_rate": 1.539258631246762e-05, "loss": 2.0538, "step": 39210 }, { "epoch": 0.9236493806226744, "grad_norm": 1.9275639057159424, "learning_rate": 1.5345485375158966e-05, "loss": 2.2427, "step": 39220 }, { "epoch": 0.9238848853092176, "grad_norm": 2.047739267349243, "learning_rate": 1.5298384437850313e-05, "loss": 1.9106, "step": 39230 }, { "epoch": 0.924120389995761, "grad_norm": 2.382171630859375, "learning_rate": 1.525128350054166e-05, "loss": 2.0102, "step": 39240 }, { "epoch": 0.9243558946823042, "grad_norm": 1.8619623184204102, "learning_rate": 1.520418256323301e-05, "loss": 2.1269, "step": 39250 }, { "epoch": 0.9245913993688475, "grad_norm": 2.0923447608947754, "learning_rate": 1.5157081625924357e-05, "loss": 2.0977, "step": 39260 }, { "epoch": 0.9248269040553907, "grad_norm": 1.8551141023635864, "learning_rate": 1.5109980688615705e-05, "loss": 1.8932, "step": 39270 }, { "epoch": 0.925062408741934, "grad_norm": 2.1643242835998535, "learning_rate": 1.5062879751307052e-05, "loss": 2.0233, "step": 39280 }, { "epoch": 0.9252979134284772, "grad_norm": 1.629560947418213, "learning_rate": 1.5015778813998399e-05, "loss": 1.8223, "step": 39290 }, { "epoch": 0.9255334181150204, "grad_norm": 1.8448771238327026, "learning_rate": 1.4968677876689746e-05, "loss": 2.0332, "step": 39300 }, { "epoch": 0.9257689228015638, "grad_norm": 2.0944831371307373, "learning_rate": 1.4921576939381093e-05, "loss": 1.8888, "step": 39310 }, { "epoch": 0.926004427488107, "grad_norm": 2.4087986946105957, "learning_rate": 1.4874476002072443e-05, "loss": 2.0431, "step": 39320 }, { "epoch": 0.9262399321746503, "grad_norm": 2.948134422302246, "learning_rate": 1.482737506476379e-05, "loss": 1.7793, "step": 39330 }, { "epoch": 0.9264754368611935, "grad_norm": 1.8383265733718872, "learning_rate": 1.4780274127455138e-05, "loss": 2.0095, "step": 39340 }, { "epoch": 0.9267109415477368, "grad_norm": 1.9835010766983032, "learning_rate": 1.4733173190146485e-05, "loss": 2.0252, "step": 39350 }, { "epoch": 0.92694644623428, "grad_norm": 2.6888811588287354, "learning_rate": 1.4686072252837832e-05, "loss": 2.1416, "step": 39360 }, { "epoch": 0.9271819509208233, "grad_norm": 2.017975330352783, "learning_rate": 1.4638971315529179e-05, "loss": 1.879, "step": 39370 }, { "epoch": 0.9274174556073665, "grad_norm": 2.2227847576141357, "learning_rate": 1.4591870378220526e-05, "loss": 2.1047, "step": 39380 }, { "epoch": 0.9276529602939099, "grad_norm": 1.885798454284668, "learning_rate": 1.4544769440911876e-05, "loss": 1.9589, "step": 39390 }, { "epoch": 0.9278884649804531, "grad_norm": 1.9954543113708496, "learning_rate": 1.4497668503603223e-05, "loss": 2.0047, "step": 39400 }, { "epoch": 0.9281239696669964, "grad_norm": 2.5150036811828613, "learning_rate": 1.445056756629457e-05, "loss": 2.0924, "step": 39410 }, { "epoch": 0.9283594743535396, "grad_norm": 2.4229278564453125, "learning_rate": 1.4403466628985918e-05, "loss": 2.015, "step": 39420 }, { "epoch": 0.9285949790400829, "grad_norm": 2.048858404159546, "learning_rate": 1.4356365691677265e-05, "loss": 1.9736, "step": 39430 }, { "epoch": 0.9288304837266261, "grad_norm": 2.147585868835449, "learning_rate": 1.4309264754368612e-05, "loss": 1.9895, "step": 39440 }, { "epoch": 0.9290659884131695, "grad_norm": 1.9288424253463745, "learning_rate": 1.4262163817059959e-05, "loss": 2.0262, "step": 39450 }, { "epoch": 0.9293014930997127, "grad_norm": 1.9802799224853516, "learning_rate": 1.4215062879751306e-05, "loss": 1.9161, "step": 39460 }, { "epoch": 0.929536997786256, "grad_norm": 2.0962471961975098, "learning_rate": 1.4167961942442656e-05, "loss": 1.9732, "step": 39470 }, { "epoch": 0.9297725024727992, "grad_norm": 2.103632688522339, "learning_rate": 1.4120861005134003e-05, "loss": 1.8515, "step": 39480 }, { "epoch": 0.9300080071593425, "grad_norm": 2.154771327972412, "learning_rate": 1.4073760067825351e-05, "loss": 1.7922, "step": 39490 }, { "epoch": 0.9302435118458857, "grad_norm": 2.3538451194763184, "learning_rate": 1.4026659130516698e-05, "loss": 2.0359, "step": 39500 }, { "epoch": 0.930479016532429, "grad_norm": 2.0333411693573, "learning_rate": 1.3979558193208045e-05, "loss": 2.1012, "step": 39510 }, { "epoch": 0.9307145212189722, "grad_norm": 2.59187388420105, "learning_rate": 1.3932457255899392e-05, "loss": 2.2647, "step": 39520 }, { "epoch": 0.9309500259055156, "grad_norm": 2.3052356243133545, "learning_rate": 1.3885356318590739e-05, "loss": 2.0736, "step": 39530 }, { "epoch": 0.9311855305920588, "grad_norm": 2.131145477294922, "learning_rate": 1.3838255381282089e-05, "loss": 2.0845, "step": 39540 }, { "epoch": 0.9314210352786021, "grad_norm": 2.3522706031799316, "learning_rate": 1.3791154443973436e-05, "loss": 1.9605, "step": 39550 }, { "epoch": 0.9316565399651453, "grad_norm": 2.067479372024536, "learning_rate": 1.3744053506664783e-05, "loss": 2.138, "step": 39560 }, { "epoch": 0.9318920446516886, "grad_norm": 1.7099672555923462, "learning_rate": 1.3696952569356131e-05, "loss": 1.7891, "step": 39570 }, { "epoch": 0.9321275493382318, "grad_norm": 2.0473127365112305, "learning_rate": 1.3649851632047478e-05, "loss": 2.1291, "step": 39580 }, { "epoch": 0.932363054024775, "grad_norm": 2.2088100910186768, "learning_rate": 1.3602750694738825e-05, "loss": 1.9411, "step": 39590 }, { "epoch": 0.9325985587113184, "grad_norm": 2.0423855781555176, "learning_rate": 1.3555649757430172e-05, "loss": 2.0322, "step": 39600 }, { "epoch": 0.9328340633978616, "grad_norm": 3.2174267768859863, "learning_rate": 1.3508548820121522e-05, "loss": 2.0514, "step": 39610 }, { "epoch": 0.9330695680844049, "grad_norm": 1.992333173751831, "learning_rate": 1.3461447882812869e-05, "loss": 1.7315, "step": 39620 }, { "epoch": 0.9333050727709481, "grad_norm": 1.7157886028289795, "learning_rate": 1.3414346945504216e-05, "loss": 2.0937, "step": 39630 }, { "epoch": 0.9335405774574914, "grad_norm": 2.721675157546997, "learning_rate": 1.3367246008195564e-05, "loss": 2.0178, "step": 39640 }, { "epoch": 0.9337760821440346, "grad_norm": 2.151686429977417, "learning_rate": 1.3320145070886911e-05, "loss": 2.0767, "step": 39650 }, { "epoch": 0.934011586830578, "grad_norm": 2.0822360515594482, "learning_rate": 1.3273044133578258e-05, "loss": 1.8964, "step": 39660 }, { "epoch": 0.9342470915171212, "grad_norm": 2.0849602222442627, "learning_rate": 1.3225943196269605e-05, "loss": 2.074, "step": 39670 }, { "epoch": 0.9344825962036645, "grad_norm": 1.9029971361160278, "learning_rate": 1.3178842258960955e-05, "loss": 1.7693, "step": 39680 }, { "epoch": 0.9347181008902077, "grad_norm": 1.9549709558486938, "learning_rate": 1.3131741321652302e-05, "loss": 1.866, "step": 39690 }, { "epoch": 0.934953605576751, "grad_norm": 3.2078988552093506, "learning_rate": 1.3084640384343649e-05, "loss": 2.0639, "step": 39700 }, { "epoch": 0.9351891102632942, "grad_norm": 2.0642852783203125, "learning_rate": 1.3037539447034996e-05, "loss": 2.2178, "step": 39710 }, { "epoch": 0.9354246149498375, "grad_norm": 2.6901581287384033, "learning_rate": 1.2990438509726344e-05, "loss": 1.9731, "step": 39720 }, { "epoch": 0.9356601196363807, "grad_norm": 2.5783872604370117, "learning_rate": 1.2943337572417691e-05, "loss": 2.009, "step": 39730 }, { "epoch": 0.9358956243229241, "grad_norm": 2.1882944107055664, "learning_rate": 1.2896236635109038e-05, "loss": 1.7685, "step": 39740 }, { "epoch": 0.9361311290094673, "grad_norm": 2.3130943775177, "learning_rate": 1.2849135697800388e-05, "loss": 1.8615, "step": 39750 }, { "epoch": 0.9363666336960106, "grad_norm": 2.8880362510681152, "learning_rate": 1.2802034760491735e-05, "loss": 1.6732, "step": 39760 }, { "epoch": 0.9366021383825538, "grad_norm": 2.318843364715576, "learning_rate": 1.2754933823183082e-05, "loss": 1.7798, "step": 39770 }, { "epoch": 0.9368376430690971, "grad_norm": 2.4206154346466064, "learning_rate": 1.2707832885874429e-05, "loss": 1.8153, "step": 39780 }, { "epoch": 0.9370731477556403, "grad_norm": 2.1725382804870605, "learning_rate": 1.2660731948565777e-05, "loss": 1.8012, "step": 39790 }, { "epoch": 0.9373086524421836, "grad_norm": 2.6982181072235107, "learning_rate": 1.2613631011257124e-05, "loss": 1.9887, "step": 39800 }, { "epoch": 0.9375441571287269, "grad_norm": 2.1166913509368896, "learning_rate": 1.2566530073948471e-05, "loss": 1.793, "step": 39810 }, { "epoch": 0.9377796618152702, "grad_norm": 2.248126745223999, "learning_rate": 1.2519429136639821e-05, "loss": 1.9725, "step": 39820 }, { "epoch": 0.9380151665018134, "grad_norm": 2.303269386291504, "learning_rate": 1.2472328199331166e-05, "loss": 1.9097, "step": 39830 }, { "epoch": 0.9382506711883567, "grad_norm": 2.438401460647583, "learning_rate": 1.2425227262022515e-05, "loss": 1.8483, "step": 39840 }, { "epoch": 0.9384861758748999, "grad_norm": 1.9419487714767456, "learning_rate": 1.2378126324713862e-05, "loss": 1.9396, "step": 39850 }, { "epoch": 0.9387216805614432, "grad_norm": 2.5072529315948486, "learning_rate": 1.2331025387405209e-05, "loss": 1.9367, "step": 39860 }, { "epoch": 0.9389571852479864, "grad_norm": 2.39945387840271, "learning_rate": 1.2283924450096557e-05, "loss": 1.8372, "step": 39870 }, { "epoch": 0.9391926899345296, "grad_norm": 2.786134958267212, "learning_rate": 1.2236823512787906e-05, "loss": 1.9416, "step": 39880 }, { "epoch": 0.939428194621073, "grad_norm": 2.2066538333892822, "learning_rate": 1.2189722575479253e-05, "loss": 2.0354, "step": 39890 }, { "epoch": 0.9396636993076162, "grad_norm": 2.544435739517212, "learning_rate": 1.21426216381706e-05, "loss": 2.005, "step": 39900 }, { "epoch": 0.9398992039941595, "grad_norm": 2.4211769104003906, "learning_rate": 1.2095520700861948e-05, "loss": 1.986, "step": 39910 }, { "epoch": 0.9401347086807027, "grad_norm": 1.885127305984497, "learning_rate": 1.2048419763553295e-05, "loss": 2.0807, "step": 39920 }, { "epoch": 0.940370213367246, "grad_norm": 2.2707126140594482, "learning_rate": 1.2001318826244642e-05, "loss": 2.1215, "step": 39930 }, { "epoch": 0.9406057180537892, "grad_norm": 2.53247332572937, "learning_rate": 1.195421788893599e-05, "loss": 2.2294, "step": 39940 }, { "epoch": 0.9408412227403326, "grad_norm": 2.236628532409668, "learning_rate": 1.1907116951627339e-05, "loss": 2.0286, "step": 39950 }, { "epoch": 0.9410767274268758, "grad_norm": 2.0977280139923096, "learning_rate": 1.1860016014318686e-05, "loss": 1.9303, "step": 39960 }, { "epoch": 0.9413122321134191, "grad_norm": 1.9017722606658936, "learning_rate": 1.1812915077010033e-05, "loss": 1.9777, "step": 39970 }, { "epoch": 0.9415477367999623, "grad_norm": 2.0843467712402344, "learning_rate": 1.1765814139701381e-05, "loss": 2.1042, "step": 39980 }, { "epoch": 0.9417832414865056, "grad_norm": 2.946749687194824, "learning_rate": 1.1718713202392728e-05, "loss": 2.0549, "step": 39990 }, { "epoch": 0.9420187461730488, "grad_norm": 2.1347475051879883, "learning_rate": 1.1671612265084075e-05, "loss": 1.8955, "step": 40000 }, { "epoch": 0.9422542508595921, "grad_norm": 3.1052968502044678, "learning_rate": 1.1624511327775423e-05, "loss": 2.1096, "step": 40010 }, { "epoch": 0.9424897555461353, "grad_norm": 2.082920551300049, "learning_rate": 1.157741039046677e-05, "loss": 1.8824, "step": 40020 }, { "epoch": 0.9427252602326787, "grad_norm": 2.2279791831970215, "learning_rate": 1.1530309453158119e-05, "loss": 2.0551, "step": 40030 }, { "epoch": 0.9429607649192219, "grad_norm": 2.04917311668396, "learning_rate": 1.1483208515849466e-05, "loss": 1.9149, "step": 40040 }, { "epoch": 0.9431962696057652, "grad_norm": 1.9186104536056519, "learning_rate": 1.1436107578540814e-05, "loss": 1.6671, "step": 40050 }, { "epoch": 0.9434317742923084, "grad_norm": 2.3989248275756836, "learning_rate": 1.1389006641232161e-05, "loss": 2.007, "step": 40060 }, { "epoch": 0.9436672789788517, "grad_norm": 2.3239734172821045, "learning_rate": 1.1341905703923508e-05, "loss": 2.064, "step": 40070 }, { "epoch": 0.9439027836653949, "grad_norm": 1.8902684450149536, "learning_rate": 1.1294804766614857e-05, "loss": 2.0352, "step": 40080 }, { "epoch": 0.9441382883519382, "grad_norm": 6.378442764282227, "learning_rate": 1.1247703829306203e-05, "loss": 1.9671, "step": 40090 }, { "epoch": 0.9443737930384815, "grad_norm": 1.7690014839172363, "learning_rate": 1.1200602891997552e-05, "loss": 1.741, "step": 40100 }, { "epoch": 0.9446092977250248, "grad_norm": 2.239579677581787, "learning_rate": 1.1153501954688899e-05, "loss": 1.8264, "step": 40110 }, { "epoch": 0.944844802411568, "grad_norm": 2.8370306491851807, "learning_rate": 1.1106401017380247e-05, "loss": 1.9176, "step": 40120 }, { "epoch": 0.9450803070981113, "grad_norm": 2.293163537979126, "learning_rate": 1.1059300080071594e-05, "loss": 1.9672, "step": 40130 }, { "epoch": 0.9453158117846545, "grad_norm": 1.929100751876831, "learning_rate": 1.1012199142762941e-05, "loss": 1.9704, "step": 40140 }, { "epoch": 0.9455513164711977, "grad_norm": 2.4982588291168213, "learning_rate": 1.096509820545429e-05, "loss": 1.925, "step": 40150 }, { "epoch": 0.945786821157741, "grad_norm": 1.8431040048599243, "learning_rate": 1.0917997268145636e-05, "loss": 2.0691, "step": 40160 }, { "epoch": 0.9460223258442843, "grad_norm": 1.8826316595077515, "learning_rate": 1.0870896330836983e-05, "loss": 2.2448, "step": 40170 }, { "epoch": 0.9462578305308276, "grad_norm": 1.895750880241394, "learning_rate": 1.0823795393528332e-05, "loss": 2.0793, "step": 40180 }, { "epoch": 0.9464933352173708, "grad_norm": 2.6968462467193604, "learning_rate": 1.077669445621968e-05, "loss": 2.0888, "step": 40190 }, { "epoch": 0.9467288399039141, "grad_norm": 1.8779919147491455, "learning_rate": 1.0729593518911027e-05, "loss": 1.7711, "step": 40200 }, { "epoch": 0.9469643445904573, "grad_norm": 2.1035165786743164, "learning_rate": 1.0682492581602374e-05, "loss": 1.892, "step": 40210 }, { "epoch": 0.9471998492770006, "grad_norm": 3.0900187492370605, "learning_rate": 1.0635391644293723e-05, "loss": 1.9634, "step": 40220 }, { "epoch": 0.9474353539635438, "grad_norm": 2.7870278358459473, "learning_rate": 1.058829070698507e-05, "loss": 2.0701, "step": 40230 }, { "epoch": 0.9476708586500872, "grad_norm": 2.2766451835632324, "learning_rate": 1.0541189769676416e-05, "loss": 1.9357, "step": 40240 }, { "epoch": 0.9479063633366304, "grad_norm": 1.974714756011963, "learning_rate": 1.0494088832367765e-05, "loss": 1.8973, "step": 40250 }, { "epoch": 0.9481418680231737, "grad_norm": 2.290137767791748, "learning_rate": 1.0446987895059113e-05, "loss": 2.0768, "step": 40260 }, { "epoch": 0.9483773727097169, "grad_norm": 1.5795409679412842, "learning_rate": 1.039988695775046e-05, "loss": 1.8673, "step": 40270 }, { "epoch": 0.9486128773962602, "grad_norm": 2.04624605178833, "learning_rate": 1.0352786020441807e-05, "loss": 2.0825, "step": 40280 }, { "epoch": 0.9488483820828034, "grad_norm": 2.657115936279297, "learning_rate": 1.0305685083133154e-05, "loss": 1.9653, "step": 40290 }, { "epoch": 0.9490838867693467, "grad_norm": 1.960942029953003, "learning_rate": 1.0258584145824503e-05, "loss": 2.1169, "step": 40300 }, { "epoch": 0.94931939145589, "grad_norm": 2.3039536476135254, "learning_rate": 1.021148320851585e-05, "loss": 1.9415, "step": 40310 }, { "epoch": 0.9495548961424333, "grad_norm": 1.9402930736541748, "learning_rate": 1.0164382271207196e-05, "loss": 1.9241, "step": 40320 }, { "epoch": 0.9497904008289765, "grad_norm": 2.1505539417266846, "learning_rate": 1.0117281333898545e-05, "loss": 1.8727, "step": 40330 }, { "epoch": 0.9500259055155198, "grad_norm": 2.9176759719848633, "learning_rate": 1.0070180396589893e-05, "loss": 1.8201, "step": 40340 }, { "epoch": 0.950261410202063, "grad_norm": 1.9368810653686523, "learning_rate": 1.002307945928124e-05, "loss": 1.9419, "step": 40350 }, { "epoch": 0.9504969148886063, "grad_norm": 2.8279006481170654, "learning_rate": 9.975978521972587e-06, "loss": 1.9023, "step": 40360 }, { "epoch": 0.9507324195751495, "grad_norm": 1.806648850440979, "learning_rate": 9.928877584663936e-06, "loss": 2.0467, "step": 40370 }, { "epoch": 0.9509679242616929, "grad_norm": 2.361128807067871, "learning_rate": 9.881776647355283e-06, "loss": 2.1681, "step": 40380 }, { "epoch": 0.9512034289482361, "grad_norm": 2.5333521366119385, "learning_rate": 9.83467571004663e-06, "loss": 1.8089, "step": 40390 }, { "epoch": 0.9514389336347794, "grad_norm": 2.0021731853485107, "learning_rate": 9.787574772737978e-06, "loss": 2.0067, "step": 40400 }, { "epoch": 0.9516744383213226, "grad_norm": 1.8866750001907349, "learning_rate": 9.740473835429327e-06, "loss": 2.0784, "step": 40410 }, { "epoch": 0.9519099430078659, "grad_norm": 2.2933566570281982, "learning_rate": 9.693372898120673e-06, "loss": 1.9378, "step": 40420 }, { "epoch": 0.9521454476944091, "grad_norm": 2.6166129112243652, "learning_rate": 9.64627196081202e-06, "loss": 1.978, "step": 40430 }, { "epoch": 0.9523809523809523, "grad_norm": 2.130694627761841, "learning_rate": 9.599171023503369e-06, "loss": 1.9178, "step": 40440 }, { "epoch": 0.9526164570674956, "grad_norm": 2.019354820251465, "learning_rate": 9.552070086194716e-06, "loss": 1.9989, "step": 40450 }, { "epoch": 0.9528519617540389, "grad_norm": 2.3226945400238037, "learning_rate": 9.504969148886062e-06, "loss": 1.8383, "step": 40460 }, { "epoch": 0.9530874664405822, "grad_norm": 3.102097511291504, "learning_rate": 9.457868211577411e-06, "loss": 2.1891, "step": 40470 }, { "epoch": 0.9533229711271254, "grad_norm": 2.6576321125030518, "learning_rate": 9.410767274268758e-06, "loss": 2.1156, "step": 40480 }, { "epoch": 0.9535584758136687, "grad_norm": 2.235243797302246, "learning_rate": 9.363666336960106e-06, "loss": 2.0167, "step": 40490 }, { "epoch": 0.9537939805002119, "grad_norm": 4.053108215332031, "learning_rate": 9.316565399651453e-06, "loss": 1.975, "step": 40500 }, { "epoch": 0.9540294851867552, "grad_norm": 3.1133532524108887, "learning_rate": 9.269464462342802e-06, "loss": 2.1291, "step": 40510 }, { "epoch": 0.9542649898732984, "grad_norm": 1.7396987676620483, "learning_rate": 9.222363525034149e-06, "loss": 2.0171, "step": 40520 }, { "epoch": 0.9545004945598418, "grad_norm": 2.360766649246216, "learning_rate": 9.175262587725496e-06, "loss": 1.8423, "step": 40530 }, { "epoch": 0.954735999246385, "grad_norm": 1.9602024555206299, "learning_rate": 9.128161650416844e-06, "loss": 1.8434, "step": 40540 }, { "epoch": 0.9549715039329283, "grad_norm": 1.9240727424621582, "learning_rate": 9.081060713108191e-06, "loss": 2.0884, "step": 40550 }, { "epoch": 0.9552070086194715, "grad_norm": 1.481034517288208, "learning_rate": 9.03395977579954e-06, "loss": 2.115, "step": 40560 }, { "epoch": 0.9554425133060148, "grad_norm": 2.2302894592285156, "learning_rate": 8.986858838490886e-06, "loss": 1.9161, "step": 40570 }, { "epoch": 0.955678017992558, "grad_norm": 2.155421733856201, "learning_rate": 8.939757901182235e-06, "loss": 2.0161, "step": 40580 }, { "epoch": 0.9559135226791013, "grad_norm": 2.0020880699157715, "learning_rate": 8.892656963873582e-06, "loss": 1.7731, "step": 40590 }, { "epoch": 0.9561490273656446, "grad_norm": 2.630326271057129, "learning_rate": 8.845556026564929e-06, "loss": 2.0842, "step": 40600 }, { "epoch": 0.9563845320521879, "grad_norm": 2.0237209796905518, "learning_rate": 8.798455089256277e-06, "loss": 1.8431, "step": 40610 }, { "epoch": 0.9566200367387311, "grad_norm": 2.217899799346924, "learning_rate": 8.751354151947624e-06, "loss": 1.9634, "step": 40620 }, { "epoch": 0.9568555414252744, "grad_norm": 2.581691026687622, "learning_rate": 8.704253214638971e-06, "loss": 2.1512, "step": 40630 }, { "epoch": 0.9570910461118176, "grad_norm": 1.7218579053878784, "learning_rate": 8.65715227733032e-06, "loss": 1.9162, "step": 40640 }, { "epoch": 0.9573265507983609, "grad_norm": 2.563023328781128, "learning_rate": 8.610051340021668e-06, "loss": 2.0915, "step": 40650 }, { "epoch": 0.9575620554849041, "grad_norm": 2.261781692504883, "learning_rate": 8.562950402713015e-06, "loss": 2.2342, "step": 40660 }, { "epoch": 0.9577975601714475, "grad_norm": 2.305806875228882, "learning_rate": 8.515849465404362e-06, "loss": 2.096, "step": 40670 }, { "epoch": 0.9580330648579907, "grad_norm": 2.0696635246276855, "learning_rate": 8.46874852809571e-06, "loss": 2.0407, "step": 40680 }, { "epoch": 0.958268569544534, "grad_norm": 2.0293238162994385, "learning_rate": 8.421647590787057e-06, "loss": 1.9653, "step": 40690 }, { "epoch": 0.9585040742310772, "grad_norm": 1.9397081136703491, "learning_rate": 8.374546653478404e-06, "loss": 2.1373, "step": 40700 }, { "epoch": 0.9587395789176204, "grad_norm": 2.025054931640625, "learning_rate": 8.32744571616975e-06, "loss": 2.0634, "step": 40710 }, { "epoch": 0.9589750836041637, "grad_norm": 2.3190510272979736, "learning_rate": 8.2803447788611e-06, "loss": 1.8368, "step": 40720 }, { "epoch": 0.9592105882907069, "grad_norm": 2.0201010704040527, "learning_rate": 8.233243841552448e-06, "loss": 1.9479, "step": 40730 }, { "epoch": 0.9594460929772503, "grad_norm": 2.5316407680511475, "learning_rate": 8.186142904243795e-06, "loss": 2.0755, "step": 40740 }, { "epoch": 0.9596815976637935, "grad_norm": 1.6399452686309814, "learning_rate": 8.139041966935143e-06, "loss": 1.9441, "step": 40750 }, { "epoch": 0.9599171023503368, "grad_norm": 2.183414936065674, "learning_rate": 8.09194102962649e-06, "loss": 1.8982, "step": 40760 }, { "epoch": 0.96015260703688, "grad_norm": 2.358241319656372, "learning_rate": 8.044840092317837e-06, "loss": 2.098, "step": 40770 }, { "epoch": 0.9603881117234233, "grad_norm": 2.2123827934265137, "learning_rate": 7.997739155009184e-06, "loss": 1.9308, "step": 40780 }, { "epoch": 0.9606236164099665, "grad_norm": 2.3104357719421387, "learning_rate": 7.950638217700532e-06, "loss": 2.118, "step": 40790 }, { "epoch": 0.9608591210965098, "grad_norm": 2.0428285598754883, "learning_rate": 7.903537280391881e-06, "loss": 2.0509, "step": 40800 }, { "epoch": 0.961094625783053, "grad_norm": 1.9097564220428467, "learning_rate": 7.856436343083228e-06, "loss": 1.8856, "step": 40810 }, { "epoch": 0.9613301304695964, "grad_norm": 2.2115395069122314, "learning_rate": 7.809335405774575e-06, "loss": 1.8819, "step": 40820 }, { "epoch": 0.9615656351561396, "grad_norm": 1.5304468870162964, "learning_rate": 7.762234468465923e-06, "loss": 1.7923, "step": 40830 }, { "epoch": 0.9618011398426829, "grad_norm": 2.6274614334106445, "learning_rate": 7.71513353115727e-06, "loss": 2.2177, "step": 40840 }, { "epoch": 0.9620366445292261, "grad_norm": 2.414781093597412, "learning_rate": 7.668032593848617e-06, "loss": 2.0156, "step": 40850 }, { "epoch": 0.9622721492157694, "grad_norm": 2.1377460956573486, "learning_rate": 7.620931656539966e-06, "loss": 2.076, "step": 40860 }, { "epoch": 0.9625076539023126, "grad_norm": 1.8874634504318237, "learning_rate": 7.573830719231313e-06, "loss": 2.1652, "step": 40870 }, { "epoch": 0.962743158588856, "grad_norm": 2.2391018867492676, "learning_rate": 7.52672978192266e-06, "loss": 1.9276, "step": 40880 }, { "epoch": 0.9629786632753992, "grad_norm": 2.3317453861236572, "learning_rate": 7.479628844614008e-06, "loss": 2.1197, "step": 40890 }, { "epoch": 0.9632141679619425, "grad_norm": 2.019249677658081, "learning_rate": 7.432527907305356e-06, "loss": 2.2113, "step": 40900 }, { "epoch": 0.9634496726484857, "grad_norm": 2.417649984359741, "learning_rate": 7.385426969996703e-06, "loss": 1.7919, "step": 40910 }, { "epoch": 0.963685177335029, "grad_norm": 1.9419840574264526, "learning_rate": 7.33832603268805e-06, "loss": 1.8799, "step": 40920 }, { "epoch": 0.9639206820215722, "grad_norm": 2.6572020053863525, "learning_rate": 7.291225095379399e-06, "loss": 2.0273, "step": 40930 }, { "epoch": 0.9641561867081155, "grad_norm": 1.8981895446777344, "learning_rate": 7.244124158070746e-06, "loss": 2.0674, "step": 40940 }, { "epoch": 0.9643916913946587, "grad_norm": 2.1581413745880127, "learning_rate": 7.197023220762093e-06, "loss": 1.9784, "step": 40950 }, { "epoch": 0.9646271960812021, "grad_norm": 1.9728518724441528, "learning_rate": 7.149922283453441e-06, "loss": 1.9786, "step": 40960 }, { "epoch": 0.9648627007677453, "grad_norm": 1.9783124923706055, "learning_rate": 7.102821346144789e-06, "loss": 1.9053, "step": 40970 }, { "epoch": 0.9650982054542886, "grad_norm": 1.9872087240219116, "learning_rate": 7.055720408836136e-06, "loss": 1.867, "step": 40980 }, { "epoch": 0.9653337101408318, "grad_norm": 2.02413010597229, "learning_rate": 7.008619471527483e-06, "loss": 2.0919, "step": 40990 }, { "epoch": 0.965569214827375, "grad_norm": 2.385622978210449, "learning_rate": 6.961518534218832e-06, "loss": 1.9556, "step": 41000 }, { "epoch": 0.9658047195139183, "grad_norm": 1.7820403575897217, "learning_rate": 6.914417596910179e-06, "loss": 1.8152, "step": 41010 }, { "epoch": 0.9660402242004615, "grad_norm": 2.060683250427246, "learning_rate": 6.867316659601526e-06, "loss": 2.093, "step": 41020 }, { "epoch": 0.9662757288870049, "grad_norm": 4.170253753662109, "learning_rate": 6.820215722292873e-06, "loss": 2.0762, "step": 41030 }, { "epoch": 0.9665112335735481, "grad_norm": 2.2294230461120605, "learning_rate": 6.773114784984222e-06, "loss": 1.8819, "step": 41040 }, { "epoch": 0.9667467382600914, "grad_norm": 1.990818977355957, "learning_rate": 6.726013847675569e-06, "loss": 2.1329, "step": 41050 }, { "epoch": 0.9669822429466346, "grad_norm": 1.956518530845642, "learning_rate": 6.678912910366916e-06, "loss": 2.1007, "step": 41060 }, { "epoch": 0.9672177476331779, "grad_norm": 2.288674831390381, "learning_rate": 6.631811973058265e-06, "loss": 2.0032, "step": 41070 }, { "epoch": 0.9674532523197211, "grad_norm": 2.927011728286743, "learning_rate": 6.584711035749612e-06, "loss": 2.0179, "step": 41080 }, { "epoch": 0.9676887570062644, "grad_norm": 2.0494003295898438, "learning_rate": 6.537610098440959e-06, "loss": 2.1701, "step": 41090 }, { "epoch": 0.9679242616928077, "grad_norm": 1.8264861106872559, "learning_rate": 6.490509161132306e-06, "loss": 2.0592, "step": 41100 }, { "epoch": 0.968159766379351, "grad_norm": 2.0611650943756104, "learning_rate": 6.443408223823655e-06, "loss": 2.1585, "step": 41110 }, { "epoch": 0.9683952710658942, "grad_norm": 2.264493703842163, "learning_rate": 6.396307286515002e-06, "loss": 1.9042, "step": 41120 }, { "epoch": 0.9686307757524375, "grad_norm": 2.1851072311401367, "learning_rate": 6.349206349206349e-06, "loss": 1.8886, "step": 41130 }, { "epoch": 0.9688662804389807, "grad_norm": 2.544363021850586, "learning_rate": 6.302105411897698e-06, "loss": 2.0393, "step": 41140 }, { "epoch": 0.969101785125524, "grad_norm": 2.145167827606201, "learning_rate": 6.255004474589045e-06, "loss": 2.1154, "step": 41150 }, { "epoch": 0.9693372898120672, "grad_norm": 2.0689308643341064, "learning_rate": 6.207903537280392e-06, "loss": 2.2185, "step": 41160 }, { "epoch": 0.9695727944986106, "grad_norm": 1.8527134656906128, "learning_rate": 6.16080259997174e-06, "loss": 1.9672, "step": 41170 }, { "epoch": 0.9698082991851538, "grad_norm": 1.9993964433670044, "learning_rate": 6.113701662663087e-06, "loss": 2.0707, "step": 41180 }, { "epoch": 0.9700438038716971, "grad_norm": 2.341604709625244, "learning_rate": 6.066600725354435e-06, "loss": 1.7601, "step": 41190 }, { "epoch": 0.9702793085582403, "grad_norm": 2.557854413986206, "learning_rate": 6.019499788045782e-06, "loss": 2.0317, "step": 41200 }, { "epoch": 0.9705148132447836, "grad_norm": 2.4617807865142822, "learning_rate": 5.97239885073713e-06, "loss": 2.0245, "step": 41210 }, { "epoch": 0.9707503179313268, "grad_norm": 2.309467077255249, "learning_rate": 5.925297913428478e-06, "loss": 2.0089, "step": 41220 }, { "epoch": 0.9709858226178701, "grad_norm": 2.5535097122192383, "learning_rate": 5.878196976119825e-06, "loss": 2.198, "step": 41230 }, { "epoch": 0.9712213273044134, "grad_norm": 2.2563560009002686, "learning_rate": 5.831096038811173e-06, "loss": 2.2526, "step": 41240 }, { "epoch": 0.9714568319909567, "grad_norm": 2.1398732662200928, "learning_rate": 5.78399510150252e-06, "loss": 1.8543, "step": 41250 }, { "epoch": 0.9716923366774999, "grad_norm": 1.8591779470443726, "learning_rate": 5.736894164193868e-06, "loss": 1.8086, "step": 41260 }, { "epoch": 0.9719278413640432, "grad_norm": 1.8352787494659424, "learning_rate": 5.6897932268852146e-06, "loss": 1.9678, "step": 41270 }, { "epoch": 0.9721633460505864, "grad_norm": 2.3946144580841064, "learning_rate": 5.642692289576563e-06, "loss": 2.0764, "step": 41280 }, { "epoch": 0.9723988507371296, "grad_norm": 1.8993937969207764, "learning_rate": 5.595591352267911e-06, "loss": 2.0317, "step": 41290 }, { "epoch": 0.9726343554236729, "grad_norm": 1.8595757484436035, "learning_rate": 5.548490414959258e-06, "loss": 2.11, "step": 41300 }, { "epoch": 0.9728698601102161, "grad_norm": 2.409909725189209, "learning_rate": 5.501389477650605e-06, "loss": 2.1661, "step": 41310 }, { "epoch": 0.9731053647967595, "grad_norm": 2.140245199203491, "learning_rate": 5.454288540341953e-06, "loss": 1.9142, "step": 41320 }, { "epoch": 0.9733408694833027, "grad_norm": 1.9340143203735352, "learning_rate": 5.407187603033301e-06, "loss": 2.1344, "step": 41330 }, { "epoch": 0.973576374169846, "grad_norm": 2.362156867980957, "learning_rate": 5.364796759455514e-06, "loss": 1.6951, "step": 41340 }, { "epoch": 0.9738118788563892, "grad_norm": 1.840448260307312, "learning_rate": 5.317695822146861e-06, "loss": 1.9706, "step": 41350 }, { "epoch": 0.9740473835429325, "grad_norm": 1.9519342184066772, "learning_rate": 5.270594884838208e-06, "loss": 2.0591, "step": 41360 }, { "epoch": 0.9742828882294757, "grad_norm": 1.9666930437088013, "learning_rate": 5.223493947529557e-06, "loss": 2.0745, "step": 41370 }, { "epoch": 0.974518392916019, "grad_norm": 2.266784429550171, "learning_rate": 5.176393010220904e-06, "loss": 2.0185, "step": 41380 }, { "epoch": 0.9747538976025623, "grad_norm": 2.675137758255005, "learning_rate": 5.129292072912251e-06, "loss": 1.9687, "step": 41390 }, { "epoch": 0.9749894022891056, "grad_norm": 2.1197988986968994, "learning_rate": 5.082191135603598e-06, "loss": 1.9073, "step": 41400 }, { "epoch": 0.9752249069756488, "grad_norm": 2.488123655319214, "learning_rate": 5.035090198294947e-06, "loss": 1.9533, "step": 41410 }, { "epoch": 0.9754604116621921, "grad_norm": 2.570176362991333, "learning_rate": 4.9879892609862936e-06, "loss": 2.0849, "step": 41420 }, { "epoch": 0.9756959163487353, "grad_norm": 2.56309175491333, "learning_rate": 4.940888323677641e-06, "loss": 2.1509, "step": 41430 }, { "epoch": 0.9759314210352786, "grad_norm": 1.848645806312561, "learning_rate": 4.893787386368989e-06, "loss": 2.0104, "step": 41440 }, { "epoch": 0.9761669257218218, "grad_norm": 1.9632664918899536, "learning_rate": 4.846686449060337e-06, "loss": 1.8198, "step": 41450 }, { "epoch": 0.9764024304083652, "grad_norm": 2.0473501682281494, "learning_rate": 4.799585511751684e-06, "loss": 2.0671, "step": 41460 }, { "epoch": 0.9766379350949084, "grad_norm": 2.3164265155792236, "learning_rate": 4.752484574443031e-06, "loss": 2.0143, "step": 41470 }, { "epoch": 0.9768734397814517, "grad_norm": 2.0020151138305664, "learning_rate": 4.705383637134379e-06, "loss": 1.9446, "step": 41480 }, { "epoch": 0.9771089444679949, "grad_norm": 2.8933792114257812, "learning_rate": 4.658282699825727e-06, "loss": 2.1086, "step": 41490 }, { "epoch": 0.9773444491545382, "grad_norm": 1.7804278135299683, "learning_rate": 4.611181762517074e-06, "loss": 1.8856, "step": 41500 }, { "epoch": 0.9775799538410814, "grad_norm": 1.9400500059127808, "learning_rate": 4.564080825208422e-06, "loss": 1.9144, "step": 41510 }, { "epoch": 0.9778154585276247, "grad_norm": 2.1494195461273193, "learning_rate": 4.51697988789977e-06, "loss": 1.944, "step": 41520 }, { "epoch": 0.978050963214168, "grad_norm": 1.9199514389038086, "learning_rate": 4.4698789505911175e-06, "loss": 1.8664, "step": 41530 }, { "epoch": 0.9782864679007113, "grad_norm": 2.4373841285705566, "learning_rate": 4.422778013282464e-06, "loss": 1.9979, "step": 41540 }, { "epoch": 0.9785219725872545, "grad_norm": 1.8263568878173828, "learning_rate": 4.375677075973812e-06, "loss": 2.0598, "step": 41550 }, { "epoch": 0.9787574772737977, "grad_norm": 2.0048863887786865, "learning_rate": 4.32857613866516e-06, "loss": 1.8226, "step": 41560 }, { "epoch": 0.978992981960341, "grad_norm": 2.0698230266571045, "learning_rate": 4.281475201356507e-06, "loss": 1.8908, "step": 41570 }, { "epoch": 0.9792284866468842, "grad_norm": 2.3763415813446045, "learning_rate": 4.234374264047855e-06, "loss": 2.1119, "step": 41580 }, { "epoch": 0.9794639913334275, "grad_norm": 2.1369435787200928, "learning_rate": 4.187273326739202e-06, "loss": 1.7217, "step": 41590 }, { "epoch": 0.9796994960199708, "grad_norm": 2.2070326805114746, "learning_rate": 4.14017238943055e-06, "loss": 1.8161, "step": 41600 }, { "epoch": 0.9799350007065141, "grad_norm": 2.9065499305725098, "learning_rate": 4.093071452121897e-06, "loss": 2.0499, "step": 41610 }, { "epoch": 0.9801705053930573, "grad_norm": 2.06013560295105, "learning_rate": 4.045970514813245e-06, "loss": 1.8116, "step": 41620 }, { "epoch": 0.9804060100796006, "grad_norm": 1.7519750595092773, "learning_rate": 3.998869577504592e-06, "loss": 1.9417, "step": 41630 }, { "epoch": 0.9806415147661438, "grad_norm": 2.1897356510162354, "learning_rate": 3.9517686401959405e-06, "loss": 1.9161, "step": 41640 }, { "epoch": 0.9808770194526871, "grad_norm": 2.206636428833008, "learning_rate": 3.904667702887287e-06, "loss": 1.8641, "step": 41650 }, { "epoch": 0.9811125241392303, "grad_norm": 2.4233133792877197, "learning_rate": 3.857566765578635e-06, "loss": 1.8659, "step": 41660 }, { "epoch": 0.9813480288257737, "grad_norm": 2.3047232627868652, "learning_rate": 3.810465828269983e-06, "loss": 1.9812, "step": 41670 }, { "epoch": 0.9815835335123169, "grad_norm": 2.138484001159668, "learning_rate": 3.76336489096133e-06, "loss": 1.9185, "step": 41680 }, { "epoch": 0.9818190381988602, "grad_norm": 2.0543298721313477, "learning_rate": 3.716263953652678e-06, "loss": 1.943, "step": 41690 }, { "epoch": 0.9820545428854034, "grad_norm": 2.8174407482147217, "learning_rate": 3.669163016344025e-06, "loss": 2.2036, "step": 41700 }, { "epoch": 0.9822900475719467, "grad_norm": 1.9986740350723267, "learning_rate": 3.622062079035373e-06, "loss": 1.9317, "step": 41710 }, { "epoch": 0.9825255522584899, "grad_norm": 2.6993019580841064, "learning_rate": 3.5749611417267204e-06, "loss": 2.1434, "step": 41720 }, { "epoch": 0.9827610569450332, "grad_norm": 2.0281319618225098, "learning_rate": 3.527860204418068e-06, "loss": 1.7839, "step": 41730 }, { "epoch": 0.9829965616315764, "grad_norm": 2.107598304748535, "learning_rate": 3.480759267109416e-06, "loss": 2.0329, "step": 41740 }, { "epoch": 0.9832320663181198, "grad_norm": 2.4774956703186035, "learning_rate": 3.433658329800763e-06, "loss": 2.0114, "step": 41750 }, { "epoch": 0.983467571004663, "grad_norm": 2.693948268890381, "learning_rate": 3.386557392492111e-06, "loss": 2.0334, "step": 41760 }, { "epoch": 0.9837030756912063, "grad_norm": 2.2076406478881836, "learning_rate": 3.339456455183458e-06, "loss": 2.1783, "step": 41770 }, { "epoch": 0.9839385803777495, "grad_norm": 2.1578211784362793, "learning_rate": 3.292355517874806e-06, "loss": 1.9784, "step": 41780 }, { "epoch": 0.9841740850642928, "grad_norm": 2.496828079223633, "learning_rate": 3.245254580566153e-06, "loss": 1.8877, "step": 41790 }, { "epoch": 0.984409589750836, "grad_norm": 2.085418701171875, "learning_rate": 3.198153643257501e-06, "loss": 1.8715, "step": 41800 }, { "epoch": 0.9846450944373794, "grad_norm": 2.5021634101867676, "learning_rate": 3.151052705948849e-06, "loss": 1.8963, "step": 41810 }, { "epoch": 0.9848805991239226, "grad_norm": 2.096789836883545, "learning_rate": 3.103951768640196e-06, "loss": 1.9181, "step": 41820 }, { "epoch": 0.9851161038104659, "grad_norm": 2.0791969299316406, "learning_rate": 3.0568508313315435e-06, "loss": 1.8519, "step": 41830 }, { "epoch": 0.9853516084970091, "grad_norm": 2.2031409740448, "learning_rate": 3.009749894022891e-06, "loss": 2.0038, "step": 41840 }, { "epoch": 0.9855871131835523, "grad_norm": 1.7402150630950928, "learning_rate": 2.962648956714239e-06, "loss": 1.968, "step": 41850 }, { "epoch": 0.9858226178700956, "grad_norm": 1.8533766269683838, "learning_rate": 2.9155480194055866e-06, "loss": 2.0505, "step": 41860 }, { "epoch": 0.9860581225566388, "grad_norm": 4.589505672454834, "learning_rate": 2.868447082096934e-06, "loss": 2.0808, "step": 41870 }, { "epoch": 0.9862936272431821, "grad_norm": 1.68135404586792, "learning_rate": 2.8213461447882816e-06, "loss": 2.0021, "step": 41880 }, { "epoch": 0.9865291319297254, "grad_norm": 2.261399745941162, "learning_rate": 2.774245207479629e-06, "loss": 1.9306, "step": 41890 }, { "epoch": 0.9867646366162687, "grad_norm": 1.9954190254211426, "learning_rate": 2.7271442701709765e-06, "loss": 2.1601, "step": 41900 }, { "epoch": 0.9870001413028119, "grad_norm": 2.2311384677886963, "learning_rate": 2.680043332862324e-06, "loss": 1.9097, "step": 41910 }, { "epoch": 0.9872356459893552, "grad_norm": 1.8550012111663818, "learning_rate": 2.632942395553672e-06, "loss": 2.1214, "step": 41920 }, { "epoch": 0.9874711506758984, "grad_norm": 2.2542874813079834, "learning_rate": 2.5858414582450192e-06, "loss": 1.8132, "step": 41930 }, { "epoch": 0.9877066553624417, "grad_norm": 2.300264596939087, "learning_rate": 2.538740520936367e-06, "loss": 2.0594, "step": 41940 }, { "epoch": 0.9879421600489849, "grad_norm": 2.213343620300293, "learning_rate": 2.4916395836277142e-06, "loss": 2.2302, "step": 41950 }, { "epoch": 0.9881776647355283, "grad_norm": 2.0097548961639404, "learning_rate": 2.444538646319062e-06, "loss": 2.0405, "step": 41960 }, { "epoch": 0.9884131694220715, "grad_norm": 2.0618903636932373, "learning_rate": 2.397437709010409e-06, "loss": 1.9674, "step": 41970 }, { "epoch": 0.9886486741086148, "grad_norm": 2.110823631286621, "learning_rate": 2.350336771701757e-06, "loss": 2.25, "step": 41980 }, { "epoch": 0.988884178795158, "grad_norm": 2.1684558391571045, "learning_rate": 2.303235834393104e-06, "loss": 1.8892, "step": 41990 }, { "epoch": 0.9891196834817013, "grad_norm": 1.705733060836792, "learning_rate": 2.2561348970844523e-06, "loss": 1.9763, "step": 42000 }, { "epoch": 0.9893551881682445, "grad_norm": 1.6287431716918945, "learning_rate": 2.2090339597757996e-06, "loss": 1.9039, "step": 42010 }, { "epoch": 0.9895906928547878, "grad_norm": 2.455014944076538, "learning_rate": 2.1619330224671473e-06, "loss": 2.0905, "step": 42020 }, { "epoch": 0.989826197541331, "grad_norm": 1.862353801727295, "learning_rate": 2.1148320851584946e-06, "loss": 1.9751, "step": 42030 }, { "epoch": 0.9900617022278744, "grad_norm": 2.33832049369812, "learning_rate": 2.0677311478498423e-06, "loss": 1.888, "step": 42040 }, { "epoch": 0.9902972069144176, "grad_norm": 2.076282262802124, "learning_rate": 2.02063021054119e-06, "loss": 1.9962, "step": 42050 }, { "epoch": 0.9905327116009609, "grad_norm": 2.276674509048462, "learning_rate": 1.9735292732325373e-06, "loss": 1.9948, "step": 42060 }, { "epoch": 0.9907682162875041, "grad_norm": 2.114805221557617, "learning_rate": 1.926428335923885e-06, "loss": 2.2751, "step": 42070 }, { "epoch": 0.9910037209740474, "grad_norm": 1.974159598350525, "learning_rate": 1.8793273986152327e-06, "loss": 2.0404, "step": 42080 }, { "epoch": 0.9912392256605906, "grad_norm": 2.0039196014404297, "learning_rate": 1.8322264613065802e-06, "loss": 2.0517, "step": 42090 }, { "epoch": 0.991474730347134, "grad_norm": 1.9755150079727173, "learning_rate": 1.7851255239979276e-06, "loss": 2.1042, "step": 42100 }, { "epoch": 0.9917102350336772, "grad_norm": 2.443861722946167, "learning_rate": 1.7380245866892751e-06, "loss": 2.0485, "step": 42110 }, { "epoch": 0.9919457397202205, "grad_norm": 2.066046953201294, "learning_rate": 1.6909236493806226e-06, "loss": 1.917, "step": 42120 }, { "epoch": 0.9921812444067637, "grad_norm": 2.3617396354675293, "learning_rate": 1.6438227120719701e-06, "loss": 1.9634, "step": 42130 }, { "epoch": 0.9924167490933069, "grad_norm": 2.2212107181549072, "learning_rate": 1.5967217747633178e-06, "loss": 2.0678, "step": 42140 }, { "epoch": 0.9926522537798502, "grad_norm": 1.842693567276001, "learning_rate": 1.5496208374546653e-06, "loss": 2.0056, "step": 42150 }, { "epoch": 0.9928877584663934, "grad_norm": 2.0039405822753906, "learning_rate": 1.502519900146013e-06, "loss": 2.053, "step": 42160 }, { "epoch": 0.9931232631529368, "grad_norm": 2.3391904830932617, "learning_rate": 1.4554189628373605e-06, "loss": 2.02, "step": 42170 }, { "epoch": 0.99335876783948, "grad_norm": 2.3647069931030273, "learning_rate": 1.4083180255287082e-06, "loss": 2.0623, "step": 42180 }, { "epoch": 0.9935942725260233, "grad_norm": 2.896048069000244, "learning_rate": 1.3612170882200557e-06, "loss": 2.1104, "step": 42190 }, { "epoch": 0.9938297772125665, "grad_norm": 1.7982397079467773, "learning_rate": 1.3141161509114032e-06, "loss": 1.8518, "step": 42200 }, { "epoch": 0.9940652818991098, "grad_norm": 2.1359305381774902, "learning_rate": 1.2670152136027509e-06, "loss": 1.9961, "step": 42210 }, { "epoch": 0.994300786585653, "grad_norm": 1.8116016387939453, "learning_rate": 1.2199142762940984e-06, "loss": 2.1378, "step": 42220 }, { "epoch": 0.9945362912721963, "grad_norm": 2.0384104251861572, "learning_rate": 1.1728133389854459e-06, "loss": 2.0067, "step": 42230 }, { "epoch": 0.9947717959587395, "grad_norm": 1.8383530378341675, "learning_rate": 1.1257124016767934e-06, "loss": 2.0092, "step": 42240 }, { "epoch": 0.9950073006452829, "grad_norm": 2.583355665206909, "learning_rate": 1.078611464368141e-06, "loss": 1.9981, "step": 42250 }, { "epoch": 0.9952428053318261, "grad_norm": 1.9402409791946411, "learning_rate": 1.0315105270594886e-06, "loss": 1.9412, "step": 42260 }, { "epoch": 0.9954783100183694, "grad_norm": 2.124499797821045, "learning_rate": 9.84409589750836e-07, "loss": 2.0968, "step": 42270 }, { "epoch": 0.9957138147049126, "grad_norm": 1.9460561275482178, "learning_rate": 9.373086524421838e-07, "loss": 1.9956, "step": 42280 }, { "epoch": 0.9959493193914559, "grad_norm": 2.0415124893188477, "learning_rate": 8.902077151335312e-07, "loss": 1.9489, "step": 42290 }, { "epoch": 0.9961848240779991, "grad_norm": 2.739859104156494, "learning_rate": 8.431067778248787e-07, "loss": 2.151, "step": 42300 }, { "epoch": 0.9964203287645425, "grad_norm": 2.373000144958496, "learning_rate": 7.960058405162262e-07, "loss": 1.7111, "step": 42310 }, { "epoch": 0.9966558334510857, "grad_norm": 2.3256282806396484, "learning_rate": 7.489049032075738e-07, "loss": 1.9377, "step": 42320 }, { "epoch": 0.996891338137629, "grad_norm": 2.3008763790130615, "learning_rate": 7.018039658989214e-07, "loss": 1.9568, "step": 42330 }, { "epoch": 0.9971268428241722, "grad_norm": 2.1482903957366943, "learning_rate": 6.547030285902689e-07, "loss": 1.9677, "step": 42340 }, { "epoch": 0.9973623475107155, "grad_norm": 2.5959582328796387, "learning_rate": 6.076020912816165e-07, "loss": 2.0476, "step": 42350 }, { "epoch": 0.9975978521972587, "grad_norm": 1.8489488363265991, "learning_rate": 5.605011539729641e-07, "loss": 1.8465, "step": 42360 }, { "epoch": 0.997833356883802, "grad_norm": 2.9984638690948486, "learning_rate": 5.134002166643117e-07, "loss": 2.171, "step": 42370 }, { "epoch": 0.9980688615703452, "grad_norm": 1.8435646295547485, "learning_rate": 4.6629927935565915e-07, "loss": 1.93, "step": 42380 }, { "epoch": 0.9983043662568886, "grad_norm": 2.5411810874938965, "learning_rate": 4.1919834204700674e-07, "loss": 1.9748, "step": 42390 }, { "epoch": 0.9985398709434318, "grad_norm": 2.412177085876465, "learning_rate": 3.7209740473835434e-07, "loss": 1.8883, "step": 42400 }, { "epoch": 0.998775375629975, "grad_norm": 1.8273178339004517, "learning_rate": 3.249964674297019e-07, "loss": 1.9645, "step": 42410 }, { "epoch": 0.9990108803165183, "grad_norm": 2.5287861824035645, "learning_rate": 2.7789553012104943e-07, "loss": 1.88, "step": 42420 }, { "epoch": 0.9992463850030615, "grad_norm": 2.011352300643921, "learning_rate": 2.3079459281239697e-07, "loss": 2.0222, "step": 42430 }, { "epoch": 0.9994818896896048, "grad_norm": 2.3615148067474365, "learning_rate": 1.8369365550374452e-07, "loss": 1.9643, "step": 42440 }, { "epoch": 0.999717394376148, "grad_norm": 2.0360193252563477, "learning_rate": 1.365927181950921e-07, "loss": 1.8166, "step": 42450 }, { "epoch": 0.9999528990626914, "grad_norm": 2.6184275150299072, "learning_rate": 8.949178088643965e-08, "loss": 1.9765, "step": 42460 } ], "logging_steps": 10, "max_steps": 42462, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.784270210118451e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }